# Model Development

In [26]:
%load_ext autoreload
%autoreload 2
p = print

import os

import pandas as pd

from sklearn.linear_model import LinearRegression, Lasso, MultiTaskLassoCV, Ridge, RidgeCV, MultiTaskElasticNet, MultiTaskElasticNetCV, ElasticNet, ElasticNetCV
from sklearn.metrics import mean_absolute_error, mean_squared_error

from crypr.util import get_project_path

import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
"""
Import Data.
"""
SYM = 'BTC'
Ty = 1
Tx = 72
MAX_LAG = 72
wavelet = 'haar_smooth'
data_dir = os.path.join(get_project_path(), 'data', 'processed')
models_dir = os.path.join(get_project_path(), 'models')

X_train = pd.read_csv(os.path.join(data_dir, 'X_train_{}_tx{}_ty{}_flag{}.csv'.format(SYM, Tx, Ty, MAX_LAG)))
Y_train = pd.read_csv(os.path.join(data_dir, 'y_train_{}_tx{}_ty{}_flag{}.csv'.format(SYM, Tx, Ty, MAX_LAG)))
X_test = pd.read_csv(os.path.join(data_dir, 'X_test_{}_tx{}_ty{}_flag{}.csv'.format(SYM, Tx, Ty, MAX_LAG)))
Y_test = pd.read_csv(os.path.join(data_dir, 'y_test_{}_tx{}_ty{}_flag{}.csv'.format(SYM, Tx, Ty, MAX_LAG)))

N_FEATURES = int(X_train.shape[1]/Tx)

p(X_train.shape)
X_train.head()

(5563, 1224)


Unnamed: 0,var1(t-72),var2(t-72),var3(t-72),var4(t-72),var5(t-72),var6(t-72),var7(t-72),var8(t-72),var9(t-72),var10(t-72),...,var8(t-1),var9(t-1),var10(t-1),var11(t-1),var12(t-1),var13(t-1),var14(t-1),var15(t-1),var16(t-1),var17(t-1)
0,13007702.37,1938.1,6677.9,6698.74,6674.83,6663.81,-0.045374,24430240.0,3635.216667,19890710.0,...,11275720.0,1713.281667,8984999.0,1362.789167,9462649.0,1433.0275,11766740.0,1789.942083,13479860.0,2042.300278
1,9378691.92,1399.77,6674.92,6688.86,6687.7,6671.52,0.192814,13536820.0,2016.126667,19900980.0,...,9744978.0,1481.57,8991674.0,1364.466667,8517821.0,1289.8175,11083820.0,1684.964167,13391040.0,2029.368611
2,8424503.46,1254.9,6687.78,6696.86,6693.25,6686.64,0.082988,11255090.0,1677.52,17924970.0,...,8338396.0,1266.54,8918546.0,1353.785,8440196.0,1278.243333,10842230.0,1647.623333,13349900.0,2023.472639
3,11276851.07,1681.92,6693.36,6697.32,6694.09,6673.0,0.01255,11233410.0,1674.92,17636220.0,...,8216904.0,1250.025,9514413.0,1445.685833,8599417.0,1303.322917,10888180.0,1654.636875,13412890.0,2033.546944
4,27926930.49,4202.29,6694.09,6697.69,6598.38,6562.33,-1.429769,13421750.0,2007.793333,19221090.0,...,7977636.0,1214.376667,9550916.0,1451.7975,8613844.0,1305.853333,10861040.0,1650.345417,13353620.0,2025.028333


# Let's try a simple linear regression model

In [18]:
lr_model = LinearRegression()
lr_model = lr_model.fit(X_train, Y_train)
lr_predict = lr_model.predict(X_test)

p(mean_absolute_error(y_pred=lr_predict, y_true=Y_test))
p(mean_squared_error(y_pred=lr_predict, y_true=Y_test))

0.35858397810378945
0.33283840653349356


In [21]:
# Save model
with open(os.path.join(models_dir, 'linear_model_{}.pkl'.format(SYM)), 'wb') as output_file:
    s = pickle.dump(lr_model, output_file)

# Other Linear Regression

## Lasso

In [23]:
lasso_params = {
    'alpha': [0.01],
}

lasso_model = Lasso(alpha=0.01, max_iter=10000)
lasso_model = lasso_model.fit(X=X_train, y=Y_train)

lasso_predict = lasso_model.predict(X_test)

p(mean_absolute_error(lasso_predict, Y_test))
p(mean_squared_error(lasso_predict, Y_test))

0.26849368264462337
0.25568177806059206




## Ridge


In [25]:
ridge_model = Ridge(alpha=0.01)
ridge_model = ridge_model.fit(X=X_train, y=Y_train)

ridge_predict = ridge_model.predict(X_test)

p(mean_absolute_error(ridge_predict, Y_test))
p(mean_squared_error(ridge_predict, Y_test))

0.2920772362272177
0.2816230918105076


## Elastic Net

In [27]:
enet_params = {
    'alpha': [1e-7],
}

enet_model = MultiTaskElasticNetCV(alphas=enet_params['alpha'])
enet_model = enet_model.fit(X=X_train, y=Y_train)

enet_predict = enet_model.predict(X_test)

p(mean_absolute_error(enet_predict, Y_test))
p(mean_squared_error(enet_predict, Y_test))



0.26252514468639
0.2532051706741615


