In [1]:
#Linear Regression

import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
inputs = np.load("data/regression/inputs.npy")
labels = np.load("data/regression/labels.npy")
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2, random_state=42)

## LinearRegression
The model of choice is base LinearRegression

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

linearReg = LinearRegression().fit(X_train, y_train)
y_pred = linearReg.predict(X_test)
r2_score(y_pred, y_test)

0.9125921264582036

## RidgeRegression
The model of choice here is Ridge Regression

In [4]:
from sklearn.linear_model import Ridge

ridgeReg = Ridge(alpha=1.0)
ridgeRegFit = ridgeReg.fit(X_train, y_train)
y_pred_ridge = ridgeRegFit.predict(X_test)
r2_score(y_pred_ridge, y_test)

0.9129271221104671

## LassoRegression
The model of choice here is base Lasso Regression

In [5]:
from sklearn.linear_model import Lasso
lasso = Lasso(tol=10)
lassoFit = lasso.fit(X_train, y_train.ravel())
y_pred_lasso = lassoFit.predict(X_test)
r2_score(y_pred_lasso, y_test)

0.9070877636361698

In [6]:
from sklearn.model_selection import cross_val_score

print('Cross Validation Score:')
print(f'Linear: {cross_val_score(linearReg, inputs, labels)}')
print(f'Ridge: {cross_val_score(ridgeRegFit, inputs, labels)}')
print(f'Lasso: {cross_val_score(lassoFit, inputs, labels)}')

Cross Validation Score:
Linear: [0.8810947  0.88581652 0.88975659 0.92146822 0.88368981]
Ridge: [0.88137519 0.88583137 0.89018479 0.92147377 0.88438868]
Lasso: [0.8839591  0.87955345 0.89054068 0.90802675 0.88338246]


## Optimization

### GridSearchCV

In [7]:
import pandas as pd
from sklearn import metrics


def test(models, data, iterations = 1000):
    results = {}
    for i in models:
        r2_train = []
        r2_test = []
        for j in range(iterations):
            X_train, X_test, y_train, y_test = train_test_split(data['X'],
                                                                data['Y'],
                                                                test_size= 0.2)
            r2_test.append(metrics.r2_score(y_test,
                                            models[i].fit(X_train,
                                                         y_train).predict(X_test)))
            r2_train.append(metrics.r2_score(y_train,
                                             models[i].fit(X_train,
                                                          y_train).predict(X_train)))
        results[i] = [np.mean(r2_train), np.mean(r2_test)]
    return pd.DataFrame(results)

In [8]:
from sklearn.linear_model import Lasso

models = {'OLS': LinearRegression(),
         'Lasso': Lasso(tol=100),
         'Ridge': Ridge(tol=100),}

test(models, data={'X': inputs, 'Y':labels})

Unnamed: 0,OLS,Lasso,Ridge
0,0.898586,0.894403,0.898441
1,0.892087,0.889853,0.892749


In [9]:
from sklearn.model_selection import GridSearchCV

lasso_params = {'alpha':[0.02, 0.024, 0.025, 0.026, 0.03]}
ridge_params = {'alpha':[200, 230, 250,265, 270, 275, 290, 300, 500]}

models2 = {'OLS': LinearRegression(),
           'Lasso': GridSearchCV(Lasso(tol=100),
                               param_grid=lasso_params).fit(inputs, labels).best_estimator_,
           'Ridge': GridSearchCV(Ridge(tol=100),
                               param_grid=ridge_params).fit(inputs, labels).best_estimator_,}


In [10]:
test(models2, data={'X': inputs, 'Y':labels})

Unnamed: 0,OLS,Lasso,Ridge
0,0.898739,0.89378,0.89748
1,0.89138,0.889258,0.892673


### Builtin Cross Validation Estimator

In [11]:
from sklearn.linear_model import LassoCV
lasso = LassoCV(tol=0.001)
lassoFit = lasso.fit(X_train, y_train.ravel())
y_pred_lasso = lassoFit.predict(X_test)
r2_score(y_pred_lasso, y_test)

0.9111681248964668

In [12]:
from sklearn.linear_model import RidgeCV
ridge = RidgeCV()
ridgeFit = ridge.fit(X_train, y_train.ravel())
y_pred_ridge = ridgeFit.predict(X_test)
r2_score(y_pred_ridge, y_test)

0.9128766246831654

In [13]:

print('Cross Validation Score:')
print(f'RidgeCV: {cross_val_score(ridgeRegFit, inputs, labels.ravel())}')
print(f'LassoCV: {cross_val_score(lassoFit, inputs, labels.ravel())}')

Cross Validation Score:
RidgeCV: [0.88137519 0.88583137 0.89018479 0.92147377 0.88438868]
LassoCV: [0.88413828 0.88172001 0.89442328 0.9216698  0.88497   ]
