In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [None]:
data = pd.read_csv("data.csv")

In [None]:
sns.set(style="ticks",color_codes=True)
a = sns.pairplot(data.loc[:, data.columns != 'Y'])
plt.savefig("2a.png")

In [None]:
x_data = data[["X1","X2","X3","X4","X5","X6","X7","X8"]]

y_data = data["Y"]
x_data = ((x_data - x_data.mean())/x_data.std())
print(x_data.std())

In [None]:
print(np.sum(x_data.pow(2)))

In [None]:
alphas = np.array([0.01,0.1,0.5,1,1.5,2,5,10,20,30,50,100,200,300])
ridge = Ridge(normalize= True)
coefs = []
for a in alphas:
    ridge.set_params(alpha=a)
    ridge.fit(x_data,y_data)
    coefs.append(ridge.coef_)
    
print(np.shape(coefs))
colors_list = ['red', 'brown','green', 'blue', 'orange','pink','purple','grey']
fig,ax = plt.subplots()
print(coefs[0])
for i in range(len(coefs[0])):
    ax.plot(alphas,[pt[i] for pt in coefs], color=colors_list[i])
plt.legend(data[["X1","X2","X3","X4","X5","X6","X7","X8"]].columns, loc='upper right')
ax.set_xscale('log')
plt.xlabel('log(\u03BB)')
plt.ylabel('coefs')
plt.axis('tight')
plt.title("Ridge Regression")
plt.autoscale(enable=True)
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
alphas = np.linspace(0,50,501)
def get_test(data, n):
    test =[]
    for row in range(len(data)):
        temp = []
        temp.append(row)
        test.append(temp)
    return test

def loocv(xdata, ydata,n, alphas, model):
     id_arr = get_test(x_data,n)
     mse_arr = []
     for a in alphas:
         for i in range(n):
             mse_sum = 0
             avg_cv_score = 0

             x_data_copy = x_data.copy()
             y_data_copy = y_data.copy()

             id = id_arr[i]
             x_train = x_data_copy.drop(id)
             y_train = y_data_copy.drop(id)
             x_test = x_data.iloc[id]
             y_test = y_data.iloc[id]

             model.set_params(alpha=a)
             model.fit(x_train, y_train)
             pred = model.predict(x_test)
             mse_sum += mean_squared_error(y_test, pred)
             avg_cv_score = mse_sum/n
         mse_arr.append(avg_cv_score)
     result = {'\u03BB':alphas,'Cross-Validation Score':mse_arr}
     df = pd.DataFrame(data = result)
     return df
ridge_res = loocv(x_data, y_data,len(x_data),alphas,ridge)
ridge_res.plot(x='\u03BB',y='Cross-Validation Score',kind='line')
plt.xscale('log')
plt.xlabel('log(\u03BB)')
plt.ylabel('coefs')
plt.axis('tight')
plt.title('Ridge LOOCV')
plt.autoscale(enable=True)
plt.show()


In [None]:
x_text = get_test(x_data, len(x_data))
mse_sum = 0

for i in range(len(x_data)):
    xdata = x_data.copy()
    ydata = y_data.copy()

    id = x_text[i]
    x_train = xdata.drop(id)
    x_test = x_data.iloc[id]
    y_train = ydata.drop(id)
    y_test = y_data.iloc[id]

    linear = linear_model.LinearRegression()
    linear.fit(x_train, y_train)
    pred = linear.predict(x_test)
    mse_sum +=mean_squared_error(y_test, pred)
mse = mse_sum/len(x_data)
print(mse)

In [None]:
alphas =  np.array([0.01,0.1,0.5,1,1.5,2,5,10,20,30,50,100,200,300])
lasso = Lasso(normalize= True)
coefs = []
for a in alphas:
    lasso.set_params(alpha=a)
    lasso.fit(x_data,y_data)
    coefs.append(lasso.coef_)
    
colors_list = ['red', 'brown','green', 'blue', 'orange','pink','purple','grey']
fig,ax = plt.subplots()
for i in range(len(coefs[0])):
    ax.plot(alphas,[pt[i] for pt in coefs], color=colors_list[i])
plt.legend(data[["X1","X2","X3","X4","X5","X6","X7","X8"]].columns, loc='upper right')
ax.set_xscale('log')
plt.xlabel('log(\u03BB)')
plt.ylabel('coefs')
plt.axis('tight')
plt.title("Lasso Regression")
plt.autoscale(enable=True)
plt.show()

In [None]:
alphas = np.linspace(0,20,201)
lasso_res = loocv(x_data, y_data,len(x_data),alphas,lasso)
lasso_res.plot(x='\u03BB',y='Cross-Validation Score',kind='line')
plt.xscale('log')
plt.xlabel('log(\u03BB)')
plt.ylabel('coefs')
plt.axis('tight')
plt.title('Lasso LOOCV')
plt.autoscale(enable=True)
plt.show()