- analysis borrowed from Deepika Singh https://www.pluralsight.com/guides/linear-lasso-ridge-regression-scikit-learn

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [2]:
df = pd.read_csv('unemployment.csv')
df.drop('date', axis=1, inplace=True)
print(df.shape)
df.describe()

(574, 5)


Unnamed: 0,pce,pop,psavert,uempmed,unemploy
count,574.0,574.0,574.0,574.0,574.0
mean,4820.092683,257159.652662,8.567247,8.608711,7771.310105
std,3556.803613,36682.398508,2.964179,4.106645,2641.95918
min,506.7,198712.0,2.2,4.0,2685.0
25%,1578.3,224896.0,6.4,6.0,6284.0
50%,3936.85,253060.0,8.4,7.5,7494.0
75%,7626.325,290290.75,11.1,9.1,8685.5
max,12193.8,320402.295,17.3,25.2,15352.0


In [3]:
X = df.drop('unemploy', axis=1)
Y = df['unemploy']

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [6]:
lr = LinearRegression()
lr.fit(X_train, Y_train)
print("coefficients", lr.coef_)
print("sum of coefficients", sum(lr.coef_))
print("r^2", lr.score(X_test, Y_test))

coefficients [-1.60601859e+00  1.66138478e-01  1.97114435e+02  5.81370257e+02]
sum of coefficients 777.0448120714989
r^2 0.8439103546480788


In [7]:
rr = Ridge(alpha=0.05)
rr.fit(X_train, Y_train) 
print("coefficients", rr.coef_)
print("sum of coefficients", sum(rr.coef_))
print("r^2", rr.score(X_test, Y_test))

coefficients [-1.60600021e+00  1.66137385e-01  1.97113584e+02  5.81361079e+02]
sum of coefficients 777.0348003744517
r^2 0.8439110106529504


In [19]:
lasso = Lasso(alpha=0.05)
lasso.fit(X_train, Y_train) 
print("coefficients", lasso.coef_)
print("sum of coefficients", sum(lasso.coef_))
print("r^2", lasso.score(X_test, Y_test))

coefficients [-1.60599380e+00  1.66134091e-01  1.97087138e+02  5.81372079e+02]
sum of coefficients 777.01935810472
r^2 0.8439113567838662


In [9]:
enet = ElasticNet(alpha=0.05, max_iter=10000)
enet.fit(X_train, Y_train)
print("coefficients", enet.coef_)
print("sum of coefficients", sum(enet.coef_))
print("r^2", enet.score(X_test, Y_test))

coefficients [-1.60206416e+00  1.65900784e-01  1.96909196e+02  5.79407527e+02]
sum of coefficients 774.8805605055481
r^2 0.8440470139811491


In [21]:
best_model = None
best_score = 0
best_alpha = None
for alpha in np.arange(0.01, 10, 0.01):
    enet = ElasticNet(alpha=alpha, max_iter=100000)
    enet.fit(X_train, Y_train)
    score = enet.score(X_test, Y_test)
    if score > best_score:
        best_model = enet
        best_score = score
        best_alpha = alpha
        
print("best alpha", best_alpha)
print("coefficients", best_model.coef_)
print("sum of coefficients", sum(best_model.coef_))
print("r^2", best_model.score(X_test, Y_test))

best alpha 0.8200000000000001
coefficients [-1.54363474e+00  1.62210516e-01  1.92272904e+02  5.51271905e+02]
sum of coefficients 742.1633848227409
r^2 0.8450201772116314
