In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge

In [2]:
# Ignoring Warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Importing Data
df = pd.read_csv('data.csv')

In [4]:
# Dropping ID Column
df = df.drop('ID', axis=1)

In [5]:
# Data Set Dimensions
df.shape

(864863, 2)

In [6]:
# Null Count in Data Set
df.isnull().sum()

T_degC    10963
Salnty    47354
dtype: int64

In [7]:
# Dropping Null Values
df.dropna(axis=0, how='any', inplace=True)

In [8]:
# Null Count in Data Set after dropping Null
df.isnull().sum()

T_degC    0
Salnty    0
dtype: int64

In [9]:
# Dimensions of Dataset after removing all Null containing Rows
df.shape

(814247, 2)

In [10]:
# Resetting Index
df.reset_index(drop=True, inplace=True)

In [11]:
# Dataset
df.head()

Unnamed: 0,T_degC,Salnty
0,10.5,33.44
1,10.46,33.44
2,10.46,33.437
3,10.45,33.42
4,10.45,33.421


In [12]:
# Feature Set and Class Set
X = df.iloc[0:, 0].values
y = df.iloc[0:, 1].values

In [13]:
# Reshaping Feature Set
X = X.reshape(-1, 1)

In [14]:
# Linear Regression
lin_reg = LinearRegression()

In [15]:
# Mean Square Error
mse = cross_val_score(lin_reg, X, y, scoring='neg_mean_squared_error', cv=5)
mean_mse = np.mean(mse)
mean_mse

-0.1748181221229268

In [16]:
# Ridge Regression
ridge = Ridge()

In [17]:
# Performing Hyper-Parameter Tuning

# Setting Parameters
parameters = {
    'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 50, 65, 80, 90, 100]
}

# Performing Grid Search on Parameters
ridge_regressor = GridSearchCV(
    estimator=ridge, 
    param_grid=parameters, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, cv=5
)

# Fitting the Model on Data
ridge_regressor.fit(X, y)

GridSearchCV(cv=5, estimator=Ridge(), n_jobs=-1,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 50, 65, 80, 90, 100]},
             scoring='neg_mean_squared_error')

In [18]:
# Best Fit Model Constraints
(ridge_regressor.best_params_, ridge_regressor.best_score_)

({'alpha': 100}, -0.17481798588184377)

In [19]:
# All Scores
ridge_regressor.cv_results_

{'mean_fit_time': array([0.0823051 , 0.09537344, 0.10454311, 0.08693705, 0.09922848,
        0.10411987, 0.09793496, 0.10348382, 0.08182235, 0.10781336,
        0.08409147, 0.09736767, 0.10905814, 0.09561691, 0.09388289,
        0.09170752, 0.09492865]),
 'std_fit_time': array([0.00629788, 0.01817962, 0.0109034 , 0.00911574, 0.01651858,
        0.01017171, 0.01374294, 0.01360631, 0.005833  , 0.00859432,
        0.0028393 , 0.01876403, 0.00332936, 0.00925701, 0.0135169 ,
        0.0097038 , 0.01474106]),
 'mean_score_time': array([0.00877147, 0.01174784, 0.01150756, 0.01227884, 0.01167102,
        0.00979028, 0.01280742, 0.0137681 , 0.01106868, 0.01545343,
        0.0102778 , 0.01415401, 0.01249862, 0.01558137, 0.01217694,
        0.01677361, 0.00846858]),
 'std_score_time': array([0.00113884, 0.00457239, 0.00434915, 0.00619575, 0.00234813,
        0.00147166, 0.00546454, 0.00276625, 0.00357376, 0.00375042,
        0.00209151, 0.00362235, 0.00341644, 0.00526228, 0.00264618,
        0.00