# Penalized Regression Approach

Lasso Regression is demonstrated in this notebook

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn import linear_model

In [3]:
tickers = ['SPY', 'IEF', 'UUP', 'GLD']
data = get_pricing(symbols(tickers), start_date='2007-4-1', end_date='2009-8-1', 
                   fields='close_price', frequency='daily')
data.columns = [ticker.symbol for ticker in data.columns]
data.index.name = 'Date'

In [4]:
data.tail()

Unnamed: 0_level_0,SPY,IEF,UUP,GLD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-07-27 00:00:00+00:00,98.35,89.65,23.42,93.71
2009-07-28 00:00:00+00:00,98.06,89.86,23.49,92.1
2009-07-29 00:00:00+00:00,97.658,89.89,23.67,91.19
2009-07-30 00:00:00+00:00,98.71,90.28,23.62,91.6
2009-07-31 00:00:00+00:00,98.83,91.21,23.32,93.36


In [5]:
data[:-1].tail()

Unnamed: 0_level_0,SPY,IEF,UUP,GLD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-07-24 00:00:00+00:00,98.06,90.12,23.44,93.45
2009-07-27 00:00:00+00:00,98.35,89.65,23.42,93.71
2009-07-28 00:00:00+00:00,98.06,89.86,23.49,92.1
2009-07-29 00:00:00+00:00,97.658,89.89,23.67,91.19
2009-07-30 00:00:00+00:00,98.71,90.28,23.62,91.6


In [6]:
res_1m = []
for e in data.columns:
    res_1m.append(data[e].pct_change(20)[1:])
res_1m = pd.DataFrame(res_1m).T    
res_1m.columns = [e + '_1m' for e in data.columns]
res_1m = res_1m.dropna()

In [7]:
res_3m = []
for e in data.columns:
    res_3m.append(data[e].pct_change(60)[1:])
res_3m = pd.DataFrame(res_3m).T    
res_3m.columns = [e + '_3m' for e in data.columns]
res_3m = res_3m.dropna()

In [8]:
res_6m = []
for e in data.columns:
    res_6m.append(data[e].pct_change(120)[1:])
res_6m = pd.DataFrame(res_6m).T    
res_6m.columns = [e + '_6m' for e in data.columns]
res_6m = res_6m.dropna()

In [9]:
res_12m = []
for e in data.columns:
    res_12m.append(data[e].pct_change(240)[1:])
res_12m = pd.DataFrame(res_12m).T    
res_12m.columns = [e + '_12m' for e in data.columns]
res_12m = res_12m.dropna()

In [10]:
res = res_1m.join(res_3m).join(res_6m).join(res_12m)
res = res.dropna()
res.head()

Unnamed: 0_level_0,SPY_1m,IEF_1m,UUP_1m,GLD_1m,SPY_3m,IEF_3m,UUP_3m,GLD_3m,SPY_6m,IEF_6m,UUP_6m,GLD_6m,SPY_12m,IEF_12m,UUP_12m,GLD_12m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2008-03-14 00:00:00+00:00,-0.042654,0.033119,-0.053538,0.100591,-0.104303,0.063305,-0.070765,0.26489,-0.146656,0.099234,-0.074197,0.365311,-0.078586,0.106803,-0.096029,0.499271
2008-03-17 00:00:00+00:00,-0.05302,0.035958,-0.058601,0.112395,-0.118225,0.065916,-0.069765,0.251356,-0.15157,0.103297,-0.081659,0.373964,-0.096848,0.113428,-0.10492,0.504856
2008-03-18 00:00:00+00:00,-0.012735,0.036334,-0.048955,0.053068,-0.079598,0.050849,-0.061635,0.216448,-0.116075,0.095501,-0.068587,0.332781,-0.059426,0.103823,-0.09334,0.443496
2008-03-19 00:00:00+00:00,-0.042248,0.047023,-0.048127,-0.000965,-0.105619,0.061826,-0.061953,0.184211,-0.139901,0.105528,-0.070475,0.292314,-0.083436,0.11689,-0.08925,0.393627
2008-03-20 00:00:00+00:00,-0.008129,0.036785,-0.030591,-0.036568,-0.099894,0.070646,-0.053906,0.121738,-0.123267,0.101645,-0.058584,0.236274,-0.063126,0.122919,-0.085043,0.348949


In [11]:
y = data['SPY'].pct_change()[1:][len(data['SPY'].pct_change()) - len(res):]
y.head()

Date
2008-03-17 00:00:00+00:00   -0.010456
2008-03-18 00:00:00+00:00    0.042381
2008-03-19 00:00:00+00:00   -0.022204
2008-03-20 00:00:00+00:00    0.023217
2008-03-24 00:00:00+00:00    0.015373
Freq: C, Name: SPY, dtype: float64

In [12]:
len(y)

348

In [13]:
y.head()

Date
2008-03-17 00:00:00+00:00   -0.010456
2008-03-18 00:00:00+00:00    0.042381
2008-03-19 00:00:00+00:00   -0.022204
2008-03-20 00:00:00+00:00    0.023217
2008-03-24 00:00:00+00:00    0.015373
Freq: C, Name: SPY, dtype: float64

In [14]:
y.tail()

Date
2009-07-27 00:00:00+00:00    0.002957
2009-07-28 00:00:00+00:00   -0.002949
2009-07-29 00:00:00+00:00   -0.004100
2009-07-30 00:00:00+00:00    0.010772
2009-07-31 00:00:00+00:00    0.001216
Freq: C, Name: SPY, dtype: float64

In [15]:
X = res.shift(1).dropna().copy()
X.head()

Unnamed: 0_level_0,SPY_1m,IEF_1m,UUP_1m,GLD_1m,SPY_3m,IEF_3m,UUP_3m,GLD_3m,SPY_6m,IEF_6m,UUP_6m,GLD_6m,SPY_12m,IEF_12m,UUP_12m,GLD_12m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2008-03-17 00:00:00+00:00,-0.042654,0.033119,-0.053538,0.100591,-0.104303,0.063305,-0.070765,0.26489,-0.146656,0.099234,-0.074197,0.365311,-0.078586,0.106803,-0.096029,0.499271
2008-03-18 00:00:00+00:00,-0.05302,0.035958,-0.058601,0.112395,-0.118225,0.065916,-0.069765,0.251356,-0.15157,0.103297,-0.081659,0.373964,-0.096848,0.113428,-0.10492,0.504856
2008-03-19 00:00:00+00:00,-0.012735,0.036334,-0.048955,0.053068,-0.079598,0.050849,-0.061635,0.216448,-0.116075,0.095501,-0.068587,0.332781,-0.059426,0.103823,-0.09334,0.443496
2008-03-20 00:00:00+00:00,-0.042248,0.047023,-0.048127,-0.000965,-0.105619,0.061826,-0.061953,0.184211,-0.139901,0.105528,-0.070475,0.292314,-0.083436,0.11689,-0.08925,0.393627
2008-03-24 00:00:00+00:00,-0.008129,0.036785,-0.030591,-0.036568,-0.099894,0.070646,-0.053906,0.121738,-0.123267,0.101645,-0.058584,0.236274,-0.063126,0.122919,-0.085043,0.348949


In [16]:
len(X)

348

In [17]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y)

In [18]:
reg = linear_model.Lasso(alpha = 0.001, normalize = True)
reg.fit(X_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=True, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [19]:
reg.coef_

array([-0.07336425,  0.        , -0.        , -0.13713848, -0.16293966,
       -0.        , -0.20379279, -0.01855312,  0.        , -0.03874942,
       -0.11470801, -0.00925334, -0.28321425,  0.10731897, -0.        ,
        0.12026823])

In [20]:
reg.intercept_

-3.2075857701468427e-17