# Building Redge Regression From Scratch

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
teams = pd.read_csv("teams.csv")

In [10]:
# Split the dataset to train/test set, test set is compromised of 20%.
train,test = train_test_split(teams,test_size=0.2, random_state=1)

In [7]:
predictors = ["athletes", "events"]
target = ["medals"]

In [8]:
X = train[predictors].copy()
y = train[target].copy()

In [9]:
X

Unnamed: 0,athletes,events
1322,6,6
1872,119,80
953,4,4
1117,2,2
1993,43,25
...,...,...
1791,40,25
1096,36,23
1932,719,245
235,13,11


In [11]:
y

Unnamed: 0,medals
1322,0
1872,5
953,0
1117,0
1993,0
...,...
1791,1
1096,1
1932,264
235,0


In [13]:
# Standardize the dataset in order to work with it consistently.
x_mean = X.mean()
x_std = X.std()

In [14]:
X = (X - x_mean) / x_std

In [15]:
# Initialize the y intercept to 1.
X["intercept"] = 1

In [16]:
X = X[["intercept"] +predictors]

In [17]:
X.describe()

Unnamed: 0,intercept,athletes,events
count,1611.0,1611.0,1611.0
mean,1.0,-1.3386770000000001e-17,1.287852e-18
std,0.0,1.0,1.0
min,1.0,-0.5768883,-0.714393
25%,1.0,-0.5297371,-0.6123079
50%,1.0,-0.4197174,-0.4489717
75%,1.0,-0.02679027,0.183956
max,1.0,6.008571,4.634867


In [18]:
X.T

Unnamed: 0,1322,1872,953,1117,1993,385,1287,1831,0,1159,...,960,847,1669,715,905,1791,1096,1932,235,1061
intercept,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
athletes,-0.537596,0.35042,-0.553313,-0.56903,-0.246829,-0.482586,-0.537596,0.138239,-0.521879,-0.152527,...,-0.199678,-0.160386,-0.529737,-0.529737,-0.341132,-0.270405,-0.301839,5.065546,-0.482586,-0.19182
events,-0.612308,0.898552,-0.653142,-0.693976,-0.224384,-0.571474,-0.612308,0.102288,-0.571474,-0.163133,...,-0.285636,-0.101882,-0.612308,-0.591891,-0.367304,-0.224384,-0.265219,4.267361,-0.510223,0.041037


In [22]:
alpha = 2
I = np.identity(X.shape[1])
penalty = alpha * I
penalty

array([[2., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]])

In [25]:
# Conventionally the first element of the penalty matrix is set to be 0.
# It canbe set to other numbers.
penalty[0][0] = 0

In [24]:
penalty

array([[0., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]])

In [26]:
# Turn the ridge algorithm into code.
B = np.linalg.inv(X.T @ X + penalty) @ X.T @ y

In [27]:
B

Unnamed: 0,medals
0,10.691496
1,61.857734
2,-34.63292


In [28]:
B.index = ['intercept','athletes','events']

In [29]:
B

Unnamed: 0,medals
intercept,10.691496
athletes,61.857734
events,-34.63292


In [32]:
test_X = test[predictors]
test_X = (test_X - x_mean)/x_std
test_X['intercept'] = 1
test_X = test_X[['intercept'] + predictors]

predictions = test_X @ B

In [33]:
test_X

Unnamed: 0,intercept,athletes,events
309,1,-0.553313,-0.653142
285,1,0.594035,1.000637
919,1,-0.144668,0.102288
120,1,0.146098,0.531045
585,1,-0.301839,-0.122299
...,...,...,...
541,1,-0.380425,-0.408138
1863,1,-0.191820,0.143122
622,1,-0.058224,0.388126
1070,1,-0.569030,-0.693976


In [34]:
predictions

Unnamed: 0,medals
309,-0.914959
285,12.782156
919,-1.799893
120,1.337116
585,-3.744014
...,...
541,1.294285
1863,-6.130765
622,-6.352080
1070,-0.472980


In [35]:
def ridge_fit(train, predictors, target, alpha):
    X = train[predictors].copy()
    y = train[target].copy()
    
    x_mean = X.mean()
    x_std = X.std()
    
    X = (X - x_mean) / x_std
    X['intercept'] = 1
    X = X[['intercept'] + predictors]
    
    penalty = alpha * np.identity(X.shape[1])
    penalty[0][0] = 0
    
    B = np.linalg.inv(X.T @ X + penalty) @ X.T @ y
    B.index = ['intercept','athletes','events']
    return B, x_mean, x_std

In [45]:
def ridge_predict(test, predictors, x_mean, x_std, B):
    test_X = test[predictors]
    test_X = (test_X - x_mean) / x_std
    test_X['intercept'] = 1
    test_X = test_X[['intercept'] + predictors]
    
    predictions = test_X @ B
    return predictions

In [37]:
from sklearn.linear_model import Ridge
ridge = Ridge(alpha = alpha)

In [38]:
ridge.fit(X[predictors],y)

Ridge(alpha=2)

In [39]:
ridge.coef_

array([[ 61.85773366, -34.63292036]])

In [40]:
ridge.intercept_

array([10.69149597])

In [41]:
sklearn_predictions = ridge.predict(test_X[predictors])

In [42]:
predictions - sklearn_predictions

Unnamed: 0,medals
309,3.053113e-14
285,-1.243450e-13
919,-9.281464e-14
120,-1.332268e-13
585,-7.460699e-14
...,...
541,7.105427e-15
1863,-1.323386e-13
622,-1.705303e-13
1070,3.558265e-14


In [43]:
from sklearn.metrics import mean_absolute_error

In [51]:
# Try out different lambda(alpha) values.
mae = []
alphas = [10**i for i in range(-2, 4)]

for alpha in alphas:
    B, x_mean, x_std = ridge_fit(train, predictors, target, alpha)
    predictions = ridge_predict(test_X, predictors, x_mean, x_std,B)
    
    mae.append(mean_absolute_error(test[target],predictions))

In [52]:
mae

[12.435561847668394,
 12.432362611419403,
 12.402257619900604,
 13.35880207713496,
 16.752145518401402,
 14.854922695049884]

In [50]:
alphas

[0.01, 0.1, 1, 10, 100, 1000]