- analysis borrowed from Deepika Singh https://www.pluralsight.com/guides/linear-lasso-ridge-regression-scikit-learn

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('unemployment.csv')
df.drop('date', axis=1, inplace=True)
print(df.shape)
df.describe()

(574, 5)


Unnamed: 0,pce,pop,psavert,uempmed,unemploy
count,574.0,574.0,574.0,574.0,574.0
mean,4820.092683,257159.652662,8.567247,8.608711,7771.310105
std,3556.803613,36682.398508,2.964179,4.106645,2641.95918
min,506.7,198712.0,2.2,4.0,2685.0
25%,1578.3,224896.0,6.4,6.0,6284.0
50%,3936.85,253060.0,8.4,7.5,7494.0
75%,7626.325,290290.75,11.1,9.1,8685.5
max,12193.8,320402.295,17.3,25.2,15352.0


In [3]:
X = df.drop('unemploy', axis=1).values
Y = df['unemploy'].values.reshape(-1, 1)

In [4]:
X.shape

(574, 4)

In [59]:
Y.shape

(574, 1)

In [29]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [30]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [49]:
lr = LinearRegression()
lr.fit(X_train, Y_train)
print("coefficients", lr.coef_)
print("sum of coefficients", sum(lr.coef_[0]))
print("r^2", lr.score(X_test, Y_test))

coefficients [[-1.62932100e+00  1.70513648e-01  1.87386301e+02  5.68841866e+02]]
sum of coefficients 754.7693594720749
r^2 0.8200050811787633


In [48]:
X_train_scaled, X_test_scaled, Y_train_scaled, Y_test_scaled = train_test_split(X_scaled, Y_scaled)

In [33]:
lr_scaled = LinearRegression()
lr_scaled.fit(X_train, Y_train)
print("coefficients", lr_scaled.coef_)
print("sum of coefficients", sum(lr_scaled.coef_[0]))
print("r^2", lr_scaled.score(X_test, Y_test))

coefficients [[-1.62932100e+00  1.70513648e-01  1.87386301e+02  5.68841866e+02]]
sum of coefficients [-1.62932100e+00  1.70513648e-01  1.87386301e+02  5.68841866e+02]
r^2 0.8200050811787633


In [60]:
rr = Ridge()
rr.fit(X_train_scaled, Y_train_scaled) 
print("coefficients", rr.coef_)
print("sum of coefficients", sum(rr.coef_[0]))
print("r^2", rr.score(X_test_scaled, Y_test_scaled))

coefficients [[-1.72498085  1.85242135  0.14671624  0.88442994]]
sum of coefficients 1.158586693546428
r^2 0.8364473662444374


In [61]:
rr = RidgeCV()
rr.fit(X_train_scaled, Y_train_scaled) 
print("coefficients", rr.coef_)
print("sum of coefficients", sum(rr.coef_[0]))
print("r^2", rr.score(X_test_scaled, Y_test_scaled))

coefficients [[-2.07825003  2.23944363  0.19273588  0.88887506]]
sum of coefficients 1.242804543820299
r^2 0.8412586076723398


In [62]:
rr.alpha_

0.1

In [63]:
lasso = Lasso()
lasso.fit(X_train_scaled, Y_train_scaled) 
print("coefficients", lasso.coef_)
print("sum of coefficients", sum(lasso.coef_))
print("r^2", lasso.score(X_test_scaled, Y_test_scaled))

coefficients [ 0.  0. -0.  0.]
sum of coefficients 0.0
r^2 -0.0046029209699520734


In [64]:
lasso = LassoCV(alphas=(0.1, 1, 10))
lasso.fit(X_train_scaled, Y_train_scaled) 
print("coefficients", lasso.coef_)
print("sum of coefficients", sum(lasso.coef_))
print("r^2", lasso.score(X_test_scaled, Y_test_scaled))
print(lasso.alpha_)

coefficients [ 0.          0.02632588 -0.          0.75595152]
sum of coefficients 0.7822773915857435
r^2 0.7264019145089883
0.1


  y = column_or_1d(y, warn=True)


In [65]:
enet = ElasticNet(max_iter=10000)
enet.fit(X_train_scaled, Y_train_scaled)
print("coefficients", enet.coef_)
print("sum of coefficients", sum(enet.coef_))
print("r^2", enet.score(X_test_scaled, Y_test_scaled))

coefficients [ 0.          0.         -0.          0.25600765]
sum of coefficients 0.2560076537430048
r^2 0.3707873531045631


In [66]:
enet = ElasticNetCV(alphas=(0.1, 1, 10))
enet.fit(X_train_scaled, Y_train_scaled)
print("coefficients", enet.coef_)
print("sum of coefficients", sum(enet.coef_))
print("r^2", enet.score(X_test_scaled, Y_test_scaled))
print(enet.alpha_)

coefficients [ 0.          0.0947263  -0.          0.72092967]
sum of coefficients 0.8156559739627128
r^2 0.7218191225676411
0.1


  y = column_or_1d(y, warn=True)


In [68]:
best_model = None
best_score = 0
best_alpha = None
for alpha in np.arange(0.01, 10, 0.01):
    enet = ElasticNet(alpha=alpha, max_iter=100000)
    enet.fit(X_train_scaled, Y_train_scaled)
    score = enet.score(X_test_scaled, Y_test_scaled)
    if score > best_score:
        best_model = enet
        best_score = score
        best_alpha = alpha
        
print("best alpha", best_alpha)
print("coefficients", best_model.coef_)
print("sum of coefficients", sum(best_model.coef_))
print("r^2", best_model.score(X_test_scaled, Y_test_scaled))

best alpha 0.01
coefficients [-1.08111819  1.1419931   0.05297741  0.87277679]
sum of coefficients 0.9866291097627051
r^2 0.8121010339358256
