# __Final Project__

---

<br>

__DATA 5610__ <br>
Author:      Tyler J. Brough <br>
Last Update: May 2, 2022 <br>

---

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from arch.bootstrap import SPA

In [2]:
%matplotlib inline

<br>
<br>

## __Step 0: Set Initial Conditions__

<br>

In [3]:
## Set the investors risk-aversion parameter
γ = 2.0

## Total observations
n_total = 384

## Initial training observations
n_train = 120

## Prediction observations
n_predict = n_total - n_train

## Number of models
M = 4

<br>
<br>

## __Step 1: Read Data__

<br>

In [4]:
#### 1. Read in the cleaned data
df = pd.read_csv('final_project_cleaned_data.csv')

In [5]:
df.shape

(384, 12)

In [6]:
## Panic if not true
assert df.shape[0] == n_total

In [7]:
df.head()

Unnamed: 0,date,dp,dy,ep,bm,ntis,tbl,ltr,tms,dfy,infl,ret
0,199001,-3.464718,-3.443527,-2.737775,0.390455,-0.012334,0.0763,-0.0006,0.0053,0.0096,0.001589,-0.067661
1,199002,-3.385516,-3.456816,-2.68412,0.414971,-0.013897,0.0764,-0.0343,0.0101,0.0095,0.010309,0.013381
2,199003,-3.386188,-3.377685,-2.710584,0.409173,-0.011729,0.0774,-0.0025,0.0102,0.0092,0.00471,0.026588
3,199004,-3.402375,-3.378409,-2.75284,0.471334,-0.010291,0.079,-0.0044,0.0099,0.0084,0.005469,-0.024504
4,199005,-3.36501,-3.392265,-2.731913,0.480284,-0.010149,0.0777,-0.0202,0.0147,0.0084,0.001554,0.097419


In [8]:
df.tail()

Unnamed: 0,date,dp,dy,ep,bm,ntis,tbl,ltr,tms,dfy,infl,ret
379,202108,-4.322196,-4.299703,-3.286608,0.187005,0.016079,0.0005,0.0305,0.0127,0.0067,0.004811,0.0306
380,202109,-4.342866,-4.314288,-3.282043,0.184756,0.014846,0.0005,-0.0035,0.0123,0.0069,0.002066,-0.046076
381,202110,-4.286281,-4.335018,-3.201224,0.193036,0.015598,0.0004,-0.025,0.0133,0.007,0.002716,0.07051
382,202111,-4.346731,-4.279873,-3.22626,0.182389,0.013368,0.0005,0.0051,0.0153,0.0067,0.008308,-0.007256
383,202112,-4.331997,-4.340366,-3.177747,0.189455,0.01564,0.0005,0.021,0.0151,0.0066,0.004913,0.043485


<br>
<br>

## __Step 2: Define the Loss Function__

<br>

In [9]:
## Define the isoelastic utility (power) loss function using a lambda function
loss = lambda w, a : ((-w**(1.0 - a)) / (1.0 - a))

In [10]:
## Sanity checks
loss(0.95, 2.0)

1.0526315789473684

In [11]:
loss(1.05, 2.0)

0.9523809523809523

<br>
<br>

## __Step 3: Calculate the Benchmark Losses__

<br>

In [12]:
r = df.ret.values[(n_train + 1):]
s = +1.0
bm_ret = 1.0 + (s * r)
bm_losses = loss(bm_ret, γ)

In [13]:
bm_losses.shape

(263,)

In [14]:
bm_losses[:25]

array([1.0177682 , 0.91034222, 1.03261515, 1.02281282, 0.97445087,
       1.01296083, 0.94109321, 1.05495025, 1.00413704, 1.08491405,
       0.99431253, 0.96864027, 1.10005192, 1.06804059, 0.9277944 ,
       0.99317687, 1.02512163, 1.00940462, 1.06773155, 1.0873832 ,
       0.98083449, 0.92696705, 0.99103019, 1.01453216, 1.01986805])

<br>
<br>

## __Step 4: Calculate the Model Losses__

### __(a) Initialize Data and Storage__

<br>

In [15]:
X = df[['dp', 'dy', 'ep', 'bm', 'ntis', 'tbl', 'ltr', 'tms', 'dfy', 'infl']].to_numpy()

In [16]:
X.shape

(384, 10)

In [17]:
Y = df['ret'].to_numpy()

In [18]:
Y.shape

(384,)

In [19]:
model_losses = np.zeros((n_predict - 1, M))

In [20]:
model_losses.shape

(263, 4)

<br>

### __(b) Kitchen Sink Regression Model__

<br>

In [21]:
## signals array
s = np.zeros(n_predict - 1)

## model number
m = 0

## Moving window predictions
for i in range(n_predict - 1):
    
    # set the indices
    ibeg = i
    iend = n_train + i
    
    
    # subset the data
    x = X[ibeg:iend,:]
    y = Y[ibeg:iend]
    
    # run the regression
    reg = LinearRegression().fit(x, y)
    
    # prediction
    x_pred = X[(iend + 1), :].reshape(1, -1)
    s[i] = np.sign(reg.predict(x_pred))
    
    # model loss
    w = 1.0 + (s[i] * r[i])
    model_losses[i, m] = loss(w, γ)

<br>

### __(c) Lasso Regression__

<br>

In [22]:
## Lasso penalty parameters
α = [0.0001, 0.0002, 0.001]

## Loop over the remaining models
for m in range(1, M):
    
    # signals array
    s = np.zeros(n_predict - 1)

    # Moving window predictions
    for i in range(n_predict - 1):
    
        # set the indices
        ibeg = i
        iend = n_train + i
    
        # subset the data
        x = X[ibeg:iend,:]
        y = Y[ibeg:iend]
    
        # run the regression
        reg = Lasso(alpha=α[m-1]).fit(x, y)
    
        # prediction
        x_pred = X[(iend + 1), :].reshape(1, -1)
        s[i] = np.sign(reg.predict(x_pred))
    
        # model loss
        w = 1.0 + (s[i] * r[i])
        model_losses[i, m] = loss(w, γ)

<br>
<br>

## __Step 5: Run the SPA Test__

<br>

In [23]:
spa = SPA(bm_losses, model_losses, reps=10_000)
spa.compute()
spa.pvalues

lower         0.6478
consistent    0.8463
upper         0.8463
dtype: float64

<br>


#### __NB:__ ___Make sure you know how to interpret these results!!!___