In [1]:
import numpy as np
from sklearn import linear_model

np.random.seed(6996)

# error term
epsilon_vec = np.random.normal(0, 1, 500).reshape(500,1)
# X_matrix or regressors or predictiors
X_mat = np.random.normal(0, 2, size = (500,500))
# Slope
slope_vec = np.random.uniform(1,5,500)
# Simulate Ys
Y_mat = 1 + np.cumsum(X_mat * slope_vec,axis=1)[:, 1:] + epsilon_vec
# each col of Y_mat representing one simulation vector: starting with 2 regressors, end with 500

In [9]:
reg = linear_model.LinearRegression().fit(X_mat[:, :40], Y_mat[:, 38])
reg.coef_

array([1.36638256, 1.71381778, 4.30857339, 4.02333555, 2.29722081,
       3.27457633, 3.32678737, 4.56420342, 1.54614027, 4.79530723,
       4.1302726 , 2.96639853, 4.17745142, 1.9722427 , 2.16563006,
       3.83780114, 3.01453045, 2.87295669, 4.75014475, 1.37495175,
       4.0565355 , 3.75476909, 3.2434535 , 1.16261201, 1.73704169,
       3.86547065, 1.7702381 , 3.2059882 , 1.58854431, 2.97545719,
       3.45923596, 1.85627963, 3.89199054, 3.67255803, 1.32996631,
       3.45023662, 3.50167451, 3.10411985, 3.36174135, 1.18849295])

In [11]:
reg.intercept_ + np.sum(reg.coef_ * X_mat[:, :40], axis=1)

array([ -42.8642978 ,   32.44503649,  -31.37153592,   37.9277449 ,
        -30.23949901,   49.39888178,   58.12567884,   24.54491598,
        -79.66789723,  -14.32770044,   34.83517909,  -44.72665076,
        -20.88576459,    6.69718321,   51.21947319,    1.52812135,
        -12.31493513,  -21.03782468,  -62.92464129,   19.88679775,
         55.38323218,    3.60483749,   29.1347789 ,  -64.24098329,
        -25.29461712,  -22.42175373,  -45.03315524,  -40.03860808,
         52.81518061,   34.38037207,  -31.8498408 ,   47.74003979,
         66.0841758 ,  -31.02530645,  -17.46805509,   65.73578385,
        -64.4078094 ,   37.44027923,   19.00383001,    8.53465184,
         46.12191565,  -29.34174967,   42.05500464, -121.62981454,
        -35.8347227 ,  -37.36259186,   56.84731484,   27.16373858,
        -77.72380179,  -89.38424008,   93.94935164,   -2.80337371,
         26.85049275,   52.34765947,   58.73644649,  -32.86716849,
        -22.83305764,   34.04196034,  -11.27978195,  -39.51722

In [20]:
reg = linear_model.Ridge(alpha=0.5).fit(X_mat[:, :40], Y_mat[:, 38])
reg.alpha_

AttributeError: 'Ridge' object has no attribute 'alpha_'

In [15]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(reg, X_mat[:, :40], Y_mat[:, 38], cv=5)


In [21]:
N_feature = 10
split_ratio = 0.7
train_size = int(len(X_mat)*split_ratio)
X_train = X_mat[0:train_size, 0:N_feature]
Y_train = Y_mat[0:train_size, N_feature-2]
X_test = X_mat[train_size:, 0:N_feature]
Y_test = Y_mat[train_size:, N_feature-2]


In [25]:
alphas = [5**i for i in range(-8, 2)]
regr = linear_model.RidgeCV(alphas=alphas, cv=5).fit(X_train, Y_train)

In [27]:
alphas

[2.56e-06, 1.28e-05, 6.4e-05, 0.00032, 0.0016, 0.008, 0.04, 0.2, 1, 5]

In [26]:
regr.alpha_

2.56e-06

In [32]:
reg = linear_model.LassoCV(cv=5).fit(X_train, Y_train)

In [33]:
reg.alpha_

0.019252075773247068

In [34]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


# Plot the data
#Define input array with angles from 60deg to 300deg converted to radians
x = np.array([i*np.pi/180 for i in range(60, 600, 1)])
np.random.seed(10)  # Setting seed for reproducability
y1 = np.sin(x) + np.random.normal(0, 0.15, len(x))
y2 = np.cos(x) + np.random.normal(0, 0.15, len(x))
data = pd.DataFrame(np.column_stack([x, y1, y2]), columns=['x', 'y1', 'y2'])

# Combine the data
y12 = np.append(y1, y2)
x12 = np.append(x, x)
y = np.append(np.repeat(1, len(x)), np.repeat(2, len(x)))
X = np.column_stack([x12, y12])

combined_data = pd.DataFrame(np.column_stack([X, y]), columns=['y', 'x', 'c'])
print(x12[1])

1.064650843716541
