In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

In [2]:
# Ignoring Warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Importing Data
df = pd.read_csv('data.csv')

In [4]:
# Dropping ID Column
df = df.drop('ID', axis=1)

In [5]:
# Data Set Dimensions
df.shape

(864863, 2)

In [6]:
# Null Count in Data Set
df.isnull().sum()

T_degC    10963
Salnty    47354
dtype: int64

In [7]:
# Dropping Null Values
df.dropna(axis=0, how='any', inplace=True)

In [8]:
# Null Count in Data Set after dropping Null
df.isnull().sum()

T_degC    0
Salnty    0
dtype: int64

In [9]:
# Dimensions of Dataset after removing all Null containing Rows
df.shape

(814247, 2)

In [10]:
# Resetting Index
df.reset_index(drop=True, inplace=True)

In [11]:
# Dataset
df.head()

Unnamed: 0,T_degC,Salnty
0,10.5,33.44
1,10.46,33.44
2,10.46,33.437
3,10.45,33.42
4,10.45,33.421


In [12]:
# Feature Set and Class Set
X = df.iloc[0:, 0].values
y = df.iloc[0:, 1].values

In [13]:
# Reshaping Feature Set
X = X.reshape(-1, 1)

In [14]:
# Linear Regression
lin_reg = LinearRegression()

In [15]:
# Mean Square Error
mse = cross_val_score(lin_reg, X, y, scoring='neg_mean_squared_error', cv=5)
mean_mse = np.mean(mse)
mean_mse

-0.1748181221229268

In [16]:
# Lasso Regression
lasso = Lasso()

In [17]:
# Performing Hyper-Parameter Tuning

# Setting Parameters
parameters = {
    'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 50, 65, 80, 90, 100]
}

# Performing Grid Search on Parameters
lasso_regressor = GridSearchCV(
    estimator=lasso, 
    param_grid=parameters, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, cv=5
)

# Fitting the Model on Data
lasso_regressor.fit(X, y)

GridSearchCV(cv=5, estimator=Lasso(), n_jobs=-1,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 50, 65, 80, 90, 100]},
             scoring='neg_mean_squared_error')

In [18]:
# Best Fit Model Constraints
(lasso_regressor.best_params_, lasso_regressor.best_score_)

({'alpha': 0.01}, -0.17476260712980246)

In [19]:
# All Scores
lasso_regressor.cv_results_

{'mean_fit_time': array([1.75997295, 0.15045028, 0.15369263, 0.14940567, 0.14297466,
        0.13535333, 0.15339413, 0.13271418, 0.13021379, 0.13518443,
        0.13573403, 0.16693459, 0.13327603, 0.12789745, 0.1292469 ,
        0.13469334, 0.13189182]),
 'std_fit_time': array([3.01243745, 0.00889538, 0.00798242, 0.01059907, 0.00313816,
        0.0106145 , 0.01803683, 0.01129662, 0.00951872, 0.00958291,
        0.00890077, 0.02362811, 0.01609951, 0.00512614, 0.00663212,
        0.01212141, 0.00507562]),
 'mean_score_time': array([0.01158442, 0.0160625 , 0.0137238 , 0.01553345, 0.01060705,
        0.01672325, 0.01327662, 0.01440921, 0.01462255, 0.01314745,
        0.01306357, 0.02021084, 0.01148753, 0.01285319, 0.01249638,
        0.01271486, 0.01540651]),
 'std_score_time': array([0.00262658, 0.00333764, 0.00267285, 0.00218953, 0.00083312,
        0.00778847, 0.00172307, 0.00189645, 0.00223966, 0.00169897,
        0.00131549, 0.00536519, 0.00247399, 0.00104741, 0.0017369 ,
        0.00