# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

from sklearn.preprocessing import StandardScaler
from scipy.stats.mstats import winsorize

from statistics import quantiles

from sklearn.metrics import mean_absolute_error, mean_squared_error

import os
#import warnings
#warnings.simplefilter(action = "module", category=FutureWarning)


%matplotlib inline

# Stored Variables (From Model Files)

## LSTM Evaluation Variables

In [2]:
%store -r lstm_crypto_mse_dict
%store -r lstm_crypto_rmse_dict
%store -r lstm_crypto_mae_dict

%store -r lstm_sp500_mse_dict
%store -r lstm_sp500_rmse_dict
%store -r lstm_sp500_mae_dict

%store -r lstm_comm_mse_dict
%store -r lstm_comm_rmse_dict
%store -r lstm_comm_mae_dict

## GRU Evaluation Variables

In [3]:
%store -r gru_crypto_mse_dict
%store -r gru_crypto_rmse_dict
%store -r gru_crypto_mae_dict

%store -r gru_sp500_mse_dict
%store -r gru_sp500_rmse_dict
%store -r gru_sp500_mae_dict

%store -r gru_comm_mse_dict
%store -r gru_comm_rmse_dict
%store -r gru_comm_mae_dict

## ARIMA Evaluation Variables

In [4]:
%store -r arima_crypto_mse_dict
%store -r arima_crypto_rmse_dict
%store -r arima_crypto_mae_dict

%store -r arima_sp500_mse_dict
%store -r arima_sp500_rmse_dict
%store -r arima_sp500_mae_dict

%store -r arima_comm_mse_dict
%store -r arima_comm_rmse_dict
%store -r arima_comm_mae_dict

In [5]:
arima_crypto_mse_dict = dict(arima_crypto_mse_dict)
arima_crypto_rmse_dict = dict(arima_crypto_rmse_dict)
arima_crypto_mae_dict = dict(arima_crypto_mae_dict)

arima_sp500_mse_dict = dict(arima_sp500_mse_dict)
arima_sp500_rmse_dict = dict(arima_sp500_rmse_dict)
arima_sp500_mae_dict = dict(arima_sp500_mae_dict)

arima_comm_mse_dict = dict(arima_comm_mse_dict)
arima_comm_rmse_dict = dict(arima_comm_rmse_dict)
arima_comm_mae_dict = dict(arima_comm_mae_dict)

# Creating DataFrames

## Crypto DataFrame

In [6]:
folder_path = os.path.abspath("C:\\Users\\Corey Feld\\Documents\\Year2Semester1\\FIN611\\ResearchProject\\CryptoData")
files = os.listdir(folder_path)

df_crypto = pd.DataFrame()

for file in files:
    df_temp = pd.read_csv("C:\\Users\\Corey Feld\\Documents\\Year2Semester1\\FIN611\\ResearchProject\\CryptoData\\" + file)
    df_temp["Date"] = pd.to_datetime(df_temp["Date"]).dt.date
    df_temp["PercentChange"] = df_temp["Close"].pct_change() * 100
    df_temp["StockName"] = str(file[:-4])
    df_temp = df_temp[["Date", "PercentChange", "StockName"]]
    df_crypto = pd.concat([df_crypto, df_temp])
df_crypto = pd.pivot_table(data = df_crypto, values = "PercentChange", index = "Date", columns = "StockName")
df_crypto.head()

StockName,Basic Attention Token,Bitcoin Gold,Dash,Decentraland,Decred,Enjin Coin,Gnosis,Holo,KuCoin Token,Loopring,...,coin_ChainLink,coin_Dogecoin,coin_EOS,coin_Ethereum,coin_Iota,coin_Litecoin,coin_Monero,coin_Stellar,coin_Tron,coin_XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-04-30,,,,,,,,,,,...,,,,,,-1.993886,,,,
2013-05-01,,,,,,,,,,,...,,,,,,-11.532211,,,,
2013-05-02,,,,,,,,,,,...,,,,,,-11.287262,,,,
2013-05-03,,,,,,,,,,,...,,,,,,-9.699642,,,,
2013-05-04,,,,,,,,,,,...,,,,,,14.276614,,,,


## SP500 DataFrame

In [7]:
folder_path = os.path.abspath("C:\\Users\\Corey Feld\\Documents\\Year2Semester1\\FIN611\\ResearchProject\\SP500Data")
files = os.listdir(folder_path)

df_sp500 = pd.DataFrame()

for file in files:
    df_temp = pd.read_csv("C:\\Users\\Corey Feld\\Documents\\Year2Semester1\\FIN611\\ResearchProject\\SP500Data\\" + file)
    df_temp["Date"] = pd.to_datetime(df_temp["Date"]).dt.date
    df_temp["PercentChange"] = df_temp["Close"].pct_change() * 100
    df_temp["StockName"] = str(file[:-4])
    df_temp = df_temp[["Date", "PercentChange", "StockName"]]
    df_sp500 = pd.concat([df_sp500, df_temp])
df_sp500 = pd.pivot_table(data = df_sp500, values = "PercentChange", index = "Date", columns = "StockName")
df_sp500.head()

StockName,AAPL,ABBV,ACN,AMD,AMZN,AVGO,BAC,BRK-B,COST,CSCO,...,PFE,PG,TMO,TSLA,UNH,V,WMT,XOM,^IXIC,^RUT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-05,-2.505933,-0.416598,0.520474,-0.722022,-0.502363,-3.345512,0.0,0.382409,0.244453,-0.45437,...,0.719874,,-0.172977,0.00895,0.188907,0.752972,2.375527,0.852058,-0.237802,0.164163
2016-01-06,-1.95697,0.017434,-0.195386,-8.727273,-0.179873,-3.075924,-2.13025,0.060954,-0.925402,-1.065048,...,-1.771283,,-0.765286,-1.964817,-1.019885,-1.311132,1.001273,-0.832056,-1.138121,-1.447169
2016-01-07,-4.220457,-0.296274,-2.93657,-9.163347,-3.905796,-3.181027,-3.606965,-1.40867,-2.290937,-2.306805,...,-0.66435,,-2.284466,-1.547662,-2.94398,-1.96625,2.328875,-1.600617,-3.02599,-2.723032
2016-01-08,0.528776,-2.726791,-0.968139,-6.140351,-0.146389,-0.658664,-1.935484,-0.888163,-1.750424,-2.479335,...,-1.273887,,-0.111677,-2.156272,-1.721824,-1.233235,-2.291247,-2.020203,-0.976671,-1.725579
2016-01-11,1.619224,-3.180595,1.048886,9.345794,1.760969,-0.124808,0.723684,0.023376,1.715864,1.977397,...,0.225808,,0.536673,-1.492891,-0.526509,1.427005,1.070192,-1.338867,-0.121449,-0.411004


## Commodity DataFrame

In [8]:
folder_path = os.path.abspath("C:\\Users\\Corey Feld\\Documents\\Year2Semester1\\FIN611\\ResearchProject\\CommodityData")
files = os.listdir(folder_path)

df_comm = pd.DataFrame()

for file in files:
    df_temp = pd.read_csv("C:\\Users\Corey Feld\Documents\Year2Semester1\FIN611\ResearchProject\CommodityData\\" + file)
    df_temp["Date"] = pd.to_datetime(df_temp["Date"]).dt.date
    df_temp["PercentChange"] = df_temp["Change %"].str[:-1].astype("float64")
    df_temp.drop(["Change %"], axis = 1, inplace = True)
    df_temp["StockName"] = str(file[:-28])
    df_temp = df_temp[["Date", "PercentChange", "StockName"]]
    df_comm = pd.concat([df_comm, df_temp])

#df_comm.columns = df_comm.columns.str.strip("'")
df_comm = pd.pivot_table(data = df_comm, values = "PercentChange", index = "Date", columns = "StockName")
df_comm.head()

StockName,Brent Oil,Canola,Class III Milk,Cobalt,Copper,Crude Oil WTI,Feeder Cattle,Gasoline RBOB,Gold,Heating Oil,...,Silver,Tin,US Cocoa,US Coffee C,US Corn,US Cotton #2,US Soybeans,US Sugar #11,US Wheat,Zinc
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,-0.16,-0.57,0.0,-1.28,-2.6,-0.31,0.43,-0.04,1.41,2.33,...,0.28,-0.55,-2.59,-2.17,-2.02,-1.15,-0.77,-1.77,-2.5,-2.55
2016-01-05,-2.15,0.51,-0.44,0.0,0.77,-0.92,0.1,-2.63,0.3,-0.1,...,0.94,-1.87,-2.02,-0.71,0.43,0.17,0.38,-2.67,0.65,0.42
2016-01-06,-6.01,0.38,0.15,0.0,-0.36,-2.13,0.32,-7.55,1.25,-3.96,...,0.04,-2.61,-2.52,-2.4,0.07,-0.89,0.92,-1.03,0.33,-2.07
2016-01-07,-1.4,-1.69,0.37,0.0,-3.16,-1.73,-2.67,-1.36,1.46,-1.4,...,2.63,-0.69,-0.6,-1.23,-0.07,-1.25,0.14,2.29,1.24,-3.34
2016-01-08,-0.59,0.7,0.81,0.0,0.0,-0.15,-2.68,-1.6,-0.9,-1.27,...,-2.97,0.15,1.72,0.5,1.13,0.2,0.29,-1.97,2.13,0.94


## Total Assets DataFrame

In [9]:
df_assets = pd.concat([df_crypto, df_sp500, df_comm], axis=1)
print(df_assets.shape)
df_assets.head()

(3440, 100)


StockName,Basic Attention Token,Bitcoin Gold,Dash,Decentraland,Decred,Enjin Coin,Gnosis,Holo,KuCoin Token,Loopring,...,Silver,Tin,US Cocoa,US Coffee C,US Corn,US Cotton #2,US Soybeans,US Sugar #11,US Wheat,Zinc
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-04-30,,,,,,,,,,,...,,,,,,,,,,
2013-05-01,,,,,,,,,,,...,,,,,,,,,,
2013-05-02,,,,,,,,,,,...,,,,,,,,,,
2013-05-03,,,,,,,,,,,...,,,,,,,,,,
2013-05-04,,,,,,,,,,,...,,,,,,,,,,


# Statistics

## Moment 1 (Mean)

In [10]:
assets_mean_dict = {}

for asset in df_assets:
    df_assets[asset].dropna(inplace = True)
    assets_mean_dict[asset] = abs(np.mean(df_assets[asset]))
assets_mean_dict = sorted(assets_mean_dict.items(), key = lambda x:x[0])
#print(assets_mean_dict)

In [11]:
means_list = list(zip(*assets_mean_dict))[1]

q1_mean = np.quantile(means_list, 0.2)
q2_mean = np.quantile(means_list, 0.4)
q3_mean = np.quantile(means_list, 0.6)
q4_mean = np.quantile(means_list, 0.8)
#print(q1_mean, q2_mean, q3_mean, q4_mean)

In [12]:
mean_groups_dict = {}

for i in range(len(assets_mean_dict)):
    if assets_mean_dict[i][1] < q1_mean:
        mean_groups_dict[(assets_mean_dict[i][0])] = 1
    elif assets_mean_dict[i][1] >= q1_mean and assets_mean_dict[i][1] < q2_mean:
        mean_groups_dict[(assets_mean_dict[i][0])] = 2
    elif assets_mean_dict[i][1] >= q2_mean and assets_mean_dict[i][1] < q3_mean:
        mean_groups_dict[(assets_mean_dict[i][0])] = 3
    elif assets_mean_dict[i][1] >= q3_mean and assets_mean_dict[i][1] < q4_mean:
        mean_groups_dict[(assets_mean_dict[i][0])] = 4
    else:
        mean_groups_dict[(assets_mean_dict[i][0])] = 5

mean_groups_dict = sorted(mean_groups_dict.items(), key = lambda x:x[0])
mean_groups_dict

[('AAPL', 4),
 ('ABBV', 3),
 ('ACN', 3),
 ('AMD', 4),
 ('AMZN', 3),
 ('AVGO', 3),
 ('BAC', 2),
 ('BRK-B', 2),
 ('Basic Attention Token', 5),
 ('Bitcoin Gold', 4),
 ('Brent Oil', 3),
 ('COST', 3),
 ('CSCO', 2),
 ('CVX', 2),
 ('Canola', 2),
 ('Class III Milk', 2),
 ('Cobalt', 3),
 ('Copper', 1),
 ('Crude Oil WTI', 4),
 ('DIS', 1),
 ('Dash', 3),
 ('Decentraland', 5),
 ('Decred', 4),
 ('Enjin Coin', 5),
 ('Feeder Cattle', 1),
 ('GOOG', 3),
 ('GOOGL', 3),
 ('Gasoline RBOB', 5),
 ('Gnosis', 4),
 ('Gold', 1),
 ('HD', 2),
 ('Heating Oil', 3),
 ('Holo', 4),
 ('JNJ', 1),
 ('JPM', 2),
 ('KO', 1),
 ('KuCoin Token', 5),
 ('LLY', 3),
 ('Lead', 1),
 ('Lean Hogs', 3),
 ('Live Cattle', 1),
 ('London Gas Oil', 4),
 ('Loopring', 5),
 ('Lumber', 3),
 ('MA', 3),
 ('MCD', 2),
 ('META', 2),
 ('MRK', 2),
 ('MSFT', 4),
 ('NEM', 4),
 ('NVDA', 4),
 ('Natural Gas', 4),
 ('Neo', 4),
 ('Nickel', 3),
 ('Oats', 3),
 ('Orange Juice', 2),
 ('PEP', 1),
 ('PFE', 1),
 ('PG', 1),
 ('Palladium', 3),
 ('Platinum', 1),
 ('Qtu

## Moment 2 (Variance)

In [13]:
assets_variance_dict = {}

for asset in df_assets:
    df_assets[asset].dropna(inplace = True)
    assets_variance_dict[asset] = np.var(df_assets[asset])
assets_variance_dict = sorted(assets_variance_dict.items(), key = lambda x:x[1])
#print(assets_variance_dict)

In [14]:
variance_list = list(zip(*assets_variance_dict))[1]

q1_variance = np.quantile(variance_list, 0.2)
q2_variance = np.quantile(variance_list, 0.4)
q3_variance = np.quantile(variance_list, 0.6)
q4_variance = np.quantile(variance_list, 0.8)
print(q1_variance, q2_variance, q3_variance, q4_variance)

2.3668077512423618 3.148391763760603 7.448963752113573 51.15290893899943


In [15]:
variance_groups_dict = {}

for i in range(len(assets_variance_dict)):
    if assets_variance_dict[i][1] < q1_variance:
        variance_groups_dict[(assets_variance_dict[i][0])] = 1
    elif assets_variance_dict[i][1] >= q1_variance and assets_variance_dict[i][1] < q2_variance:
        variance_groups_dict[(assets_variance_dict[i][0])] = 2
    elif assets_variance_dict[i][1] >= q2_variance and assets_variance_dict[i][1] < q3_variance:
        variance_groups_dict[(assets_variance_dict[i][0])] = 3
    elif assets_variance_dict[i][1] >= q3_variance and assets_variance_dict[i][1] < q4_variance:
        variance_groups_dict[(assets_variance_dict[i][0])] = 4
    else:
        variance_groups_dict[(assets_variance_dict[i][0])] = 5

variance_groups_dict = sorted(variance_groups_dict.items(), key = lambda x:x[0])
#variance_groups_dict

## Moment 3 (Skewness)

In [16]:
assets_skewness_dict = {}

for asset in df_assets:
    df_assets[asset].dropna(inplace = True)
    assets_skewness_dict[asset] = abs(df_assets[asset].skew())
assets_skewness_dict = sorted(assets_skewness_dict.items(), key = lambda x:x[1])
#print(assets_skewness_dict)

In [17]:
skewness_list = list(zip(*assets_skewness_dict))[1]

q1_skewness = np.quantile(skewness_list, 0.2)
q2_skewness = np.quantile(skewness_list, 0.4)
q3_skewness = np.quantile(skewness_list, 0.6)
q4_skewness = np.quantile(skewness_list, 0.8)
print(q1_skewness, q2_skewness, q3_skewness, q4_skewness)

0.20254751218643294 0.36936485990086565 0.8318460003866596 1.5296539433142446


In [18]:
skewness_groups_dict = {}

for i in range(len(assets_skewness_dict)):
    if assets_skewness_dict[i][1] < q1_skewness:
        skewness_groups_dict[(assets_skewness_dict[i][0])] = 1
    elif assets_skewness_dict[i][1] >= q1_skewness and assets_skewness_dict[i][1] < q2_skewness:
        skewness_groups_dict[(assets_skewness_dict[i][0])] = 2
    elif assets_skewness_dict[i][1] >= q2_skewness and assets_skewness_dict[i][1] < q3_skewness:
        skewness_groups_dict[(assets_skewness_dict[i][0])] = 3
    elif assets_skewness_dict[i][1] >= q3_skewness and assets_skewness_dict[i][1] < q4_skewness:
        skewness_groups_dict[(assets_skewness_dict[i][0])] = 4
    else:
        skewness_groups_dict[(assets_skewness_dict[i][0])] = 5

skewness_groups_dict = sorted(skewness_groups_dict.items(), key = lambda x:x[0])
#skewness_groups_dict

## Moment 4 (Kurtosis)

In [19]:
assets_kurtosis_dict = {}

for asset in df_assets:
    df_assets[asset].dropna(inplace = True)
    assets_kurtosis_dict[asset] = df_assets[asset].kurtosis()
assets_kurtosis_dict = sorted(assets_kurtosis_dict.items(), key = lambda x:x[1])
#print(assets_kurtosis_dict)

In [20]:
kurtosis_list = list(zip(*assets_kurtosis_dict))[1]

q1_kurtosis = np.quantile(kurtosis_list, 0.2)
q2_kurtosis = np.quantile(kurtosis_list, 0.4)
q3_kurtosis = np.quantile(kurtosis_list, 0.6)
q4_kurtosis = np.quantile(kurtosis_list, 0.8)
print(q1_kurtosis, q2_kurtosis, q3_kurtosis, q4_kurtosis)

5.669806374988849 9.424470970227881 12.85746357168278 23.786879326743744


In [21]:
kurtosis_groups_dict = {}

for i in range(len(assets_kurtosis_dict)):
    if assets_kurtosis_dict[i][1] < q1_kurtosis:
        kurtosis_groups_dict[(assets_kurtosis_dict[i][0])] = 1
    elif assets_kurtosis_dict[i][1] >= q1_kurtosis and assets_kurtosis_dict[i][1] < q2_kurtosis:
        kurtosis_groups_dict[(assets_kurtosis_dict[i][0])] = 2
    elif assets_kurtosis_dict[i][1] >= q2_kurtosis and assets_kurtosis_dict[i][1] < q3_kurtosis:
        kurtosis_groups_dict[(assets_kurtosis_dict[i][0])] = 3
    elif assets_kurtosis_dict[i][1] >= q3_kurtosis and assets_kurtosis_dict[i][1] < q4_kurtosis:
        kurtosis_groups_dict[(assets_kurtosis_dict[i][0])] = 4
    else:
        kurtosis_groups_dict[(assets_kurtosis_dict[i][0])] = 5

kurtosis_groups_dict = sorted(kurtosis_groups_dict.items(), key = lambda x:x[0])
#kurtosis_groups_dict

# Universally Used Metrics (Asset Names & Moment Groups)

In [22]:
asset_names = []

for i in range(len(mean_groups_dict)):
    asset_names.append(mean_groups_dict[i][0])

In [23]:
asset_groups_mom1 = []
asset_groups_mom2 = []
asset_groups_mom3 = []
asset_groups_mom4 = []

for i in range(len(mean_groups_dict)):
    asset_groups_mom1.append(mean_groups_dict[i][1])
    asset_groups_mom2.append(variance_groups_dict[i][1])
    asset_groups_mom3.append(skewness_groups_dict[i][1])
    asset_groups_mom4.append(kurtosis_groups_dict[i][1])

# LSTM DataFrames

## LSTM Evaluation Metrics

In [24]:
lstm_combined_mse_dict = {**lstm_crypto_mse_dict, **lstm_sp500_mse_dict, **lstm_comm_mse_dict}
lstm_combined_mse_dict = sorted(lstm_combined_mse_dict.items(), key = lambda x:x[0])
#print(lstm_combined_mse_dict)

lstm_combined_rmse_dict = {**lstm_crypto_rmse_dict, **lstm_sp500_rmse_dict, **lstm_comm_rmse_dict}
lstm_combined_rmse_dict = sorted(lstm_combined_rmse_dict.items(), key = lambda x:x[0])
#print(lstm_combined_rmse_dict)

lstm_combined_mae_dict = {**lstm_crypto_mae_dict, **lstm_sp500_mae_dict, **lstm_comm_mae_dict}
lstm_combined_mae_dict = sorted(lstm_combined_mae_dict.items(), key = lambda x:x[0])
#print(lstm_combined_mae_dict)

In [25]:
lstm_asset_mse_vals = []
lstm_asset_rmse_vals = []
lstm_asset_mae_vals = []

for i in range(len(mean_groups_dict)):
    lstm_asset_mse_vals.append(lstm_combined_mse_dict[i][1])
    lstm_asset_rmse_vals.append(lstm_combined_rmse_dict[i][1])
    lstm_asset_mae_vals.append(lstm_combined_mae_dict[i][1])

## LSTM Moment 1 DataFrame

In [26]:
lstm_mom1_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom1, lstm_asset_mse_vals, 
                                     lstm_asset_rmse_vals, lstm_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
lstm_mom1_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,4,2.294940,1.514906,1.323736
1,ABBV,3,1.153376,1.073953,0.905876
2,ACN,3,1.345338,1.159887,1.029212
3,AMD,4,8.088597,2.844046,2.518444
4,AMZN,3,3.003061,1.732934,1.553076
...,...,...,...,...,...
95,coin_Litecoin,5,15.314218,3.913338,3.309051
96,coin_Monero,5,18.651182,4.318701,3.658176
97,coin_Stellar,5,20.302743,4.505856,3.862240
98,coin_Tron,5,25.839992,5.083305,4.345023


In [27]:
lstm_mom1_mse_avg = lstm_mom1_df.groupby("Group")["MSE"].mean()
lstm_mom1_rmse_avg = lstm_mom1_df.groupby("Group")["RMSE"].mean()
lstm_mom1_mae_avg = lstm_mom1_df.groupby("Group")["MAE"].mean()

lstm_mom1_eval_groups = pd.concat([lstm_mom1_mse_avg, lstm_mom1_rmse_avg, lstm_mom1_mae_avg], 
                                        axis = 1).to_csv("lstm_mom1_eval_groups.csv")

## LSTM Moment 2 DataFrame

In [28]:
lstm_mom2_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom2, lstm_asset_mse_vals, lstm_asset_rmse_vals, 
                                     lstm_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
lstm_mom2_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,3,2.294940,1.514906,1.323736
1,ABBV,2,1.153376,1.073953,0.905876
2,ACN,2,1.345338,1.159887,1.029212
3,AMD,4,8.088597,2.844046,2.518444
4,AMZN,3,3.003061,1.732934,1.553076
...,...,...,...,...,...
95,coin_Litecoin,4,15.314218,3.913338,3.309051
96,coin_Monero,4,18.651182,4.318701,3.658176
97,coin_Stellar,5,20.302743,4.505856,3.862240
98,coin_Tron,5,25.839992,5.083305,4.345023


In [29]:
lstm_mom2_mse_avg = lstm_mom2_df.groupby("Group")["MSE"].mean()
lstm_mom2_rmse_avg = lstm_mom2_df.groupby("Group")["RMSE"].mean()
lstm_mom2_mae_avg = lstm_mom2_df.groupby("Group")["MAE"].mean()

lstm_mom2_eval_groups = pd.concat([lstm_mom2_mse_avg, lstm_mom2_rmse_avg, lstm_mom2_mae_avg], 
                                        axis = 1).to_csv("lstm_mom2_eval_groups.csv")

## LSTM Moment 3 DataFrame

In [30]:
lstm_mom3_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom3, lstm_asset_mse_vals, lstm_asset_rmse_vals, 
                                     lstm_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
lstm_mom3_df.head()

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,2.29494,1.514906,1.323736
1,ABBV,3,1.153376,1.073953,0.905876
2,ACN,1,1.345338,1.159887,1.029212
3,AMD,4,8.088597,2.844046,2.518444
4,AMZN,1,3.003061,1.732934,1.553076


In [31]:
lstm_mom3_mse_avg = lstm_mom3_df.groupby("Group")["MSE"].mean()
lstm_mom3_rmse_avg = lstm_mom3_df.groupby("Group")["RMSE"].mean()
lstm_mom3_mae_avg = lstm_mom3_df.groupby("Group")["MAE"].mean()

lstm_mom3_eval_groups = pd.concat([lstm_mom3_mse_avg, lstm_mom3_rmse_avg, lstm_mom3_mae_avg], 
                                        axis = 1).to_csv("lstm_mom3_eval_groups.csv")

## LSTM Moment 4 DataFrame

In [32]:
lstm_mom4_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom4, lstm_asset_mse_vals, lstm_asset_rmse_vals, 
                                     lstm_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
lstm_mom4_df.head()

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,2.29494,1.514906,1.323736
1,ABBV,3,1.153376,1.073953,0.905876
2,ACN,2,1.345338,1.159887,1.029212
3,AMD,4,8.088597,2.844046,2.518444
4,AMZN,2,3.003061,1.732934,1.553076


In [33]:
lstm_mom4_mse_avg = lstm_mom4_df.groupby("Group")["MSE"].mean()
lstm_mom4_rmse_avg = lstm_mom4_df.groupby("Group")["RMSE"].mean()
lstm_mom4_mae_avg = lstm_mom4_df.groupby("Group")["MAE"].mean()

lstm_mom4_eval_groups = pd.concat([lstm_mom4_mse_avg, lstm_mom4_rmse_avg, lstm_mom4_mae_avg], 
                                        axis = 1).to_csv("lstm_mom4_eval_groups.csv")

# GRU DataFrames

## GRU Evaluation Metrics

In [34]:
gru_combined_mse_dict = {**gru_crypto_mse_dict, **gru_sp500_mse_dict, **gru_comm_mse_dict}
gru_combined_mse_dict = sorted(gru_combined_mse_dict.items(), key = lambda x:x[0])
#print(gru_combined_mse_dict)

gru_combined_rmse_dict = {**gru_crypto_rmse_dict, **gru_sp500_rmse_dict, **gru_comm_rmse_dict}
gru_combined_rmse_dict = sorted(gru_combined_rmse_dict.items(), key = lambda x:x[0])
#print(gru_combined_rmse_dict)

gru_combined_mae_dict = {**gru_crypto_mae_dict, **gru_sp500_mae_dict, **gru_comm_mae_dict}
gru_combined_mae_dict = sorted(gru_combined_mae_dict.items(), key = lambda x:x[0])
#print(gru_combined_mae_dict)

In [35]:
gru_asset_mse_vals = []
gru_asset_rmse_vals = []
gru_asset_mae_vals = []

for i in range(len(mean_groups_dict)):
    gru_asset_mse_vals.append(gru_combined_mse_dict[i][1])
    gru_asset_rmse_vals.append(gru_combined_rmse_dict[i][1])
    gru_asset_mae_vals.append(gru_combined_mae_dict[i][1])

## GRU Moment 1 DataFrame

In [36]:
gru_mom1_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom1, gru_asset_mse_vals, 
                                     gru_asset_rmse_vals, gru_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
gru_mom1_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,4,1.580714,1.257264,1.097734
1,ABBV,3,1.012965,1.006462,0.847742
2,ACN,3,1.671898,1.293019,1.150371
3,AMD,4,1.509501,1.228617,1.083721
4,AMZN,3,1.888823,1.374345,1.231389
...,...,...,...,...,...
95,coin_Litecoin,5,1.400514,1.183433,1.004632
96,coin_Monero,5,0.997551,0.998775,0.840366
97,coin_Stellar,5,1.265603,1.124990,0.963348
98,coin_Tron,5,1.255626,1.120547,0.958490


In [37]:
gru_mom1_mse_avg = gru_mom1_df.groupby("Group")["MSE"].mean()
gru_mom1_rmse_avg = gru_mom1_df.groupby("Group")["RMSE"].mean()
gru_mom1_mae_avg = gru_mom1_df.groupby("Group")["MAE"].mean()

gru_mom1_eval_groups = pd.concat([gru_mom1_mse_avg, gru_mom1_rmse_avg, gru_mom1_mae_avg], 
                                        axis = 1).to_csv("gru_mom1_eval_groups.csv")

## GRU Moment 2 DataFrame

In [38]:
gru_mom2_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom2, gru_asset_mse_vals, 
                                     gru_asset_rmse_vals, gru_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
gru_mom2_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,3,1.580714,1.257264,1.097734
1,ABBV,2,1.012965,1.006462,0.847742
2,ACN,2,1.671898,1.293019,1.150371
3,AMD,4,1.509501,1.228617,1.083721
4,AMZN,3,1.888823,1.374345,1.231389
...,...,...,...,...,...
95,coin_Litecoin,4,1.400514,1.183433,1.004632
96,coin_Monero,4,0.997551,0.998775,0.840366
97,coin_Stellar,5,1.265603,1.124990,0.963348
98,coin_Tron,5,1.255626,1.120547,0.958490


In [39]:
gru_mom2_mse_avg = gru_mom2_df.groupby("Group")["MSE"].mean()
gru_mom2_rmse_avg = gru_mom2_df.groupby("Group")["RMSE"].mean()
gru_mom2_mae_avg = gru_mom2_df.groupby("Group")["MAE"].mean()

gru_mom2_eval_groups = pd.concat([gru_mom2_mse_avg, gru_mom2_rmse_avg, gru_mom2_mae_avg], 
                                        axis = 1).to_csv("gru_mom2_eval_groups.csv")

## GRU Moment 3 DataFrame

In [40]:
gru_mom3_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom3, gru_asset_mse_vals, 
                                     gru_asset_rmse_vals, gru_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
gru_mom3_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,1.580714,1.257264,1.097734
1,ABBV,3,1.012965,1.006462,0.847742
2,ACN,1,1.671898,1.293019,1.150371
3,AMD,4,1.509501,1.228617,1.083721
4,AMZN,1,1.888823,1.374345,1.231389
...,...,...,...,...,...
95,coin_Litecoin,5,1.400514,1.183433,1.004632
96,coin_Monero,4,0.997551,0.998775,0.840366
97,coin_Stellar,5,1.265603,1.124990,0.963348
98,coin_Tron,5,1.255626,1.120547,0.958490


In [41]:
gru_mom3_mse_avg = gru_mom3_df.groupby("Group")["MSE"].mean()
gru_mom3_rmse_avg = gru_mom3_df.groupby("Group")["RMSE"].mean()
gru_mom3_mae_avg = gru_mom3_df.groupby("Group")["MAE"].mean()

gru_mom3_eval_groups = pd.concat([gru_mom3_mse_avg, gru_mom3_rmse_avg, gru_mom3_mae_avg], 
                                        axis = 1).to_csv("gru_mom3_eval_groups.csv")

## GRU Moment 4 DataFrame

In [42]:
gru_mom4_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom4, gru_asset_mse_vals, 
                                     gru_asset_rmse_vals, gru_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
gru_mom4_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,1.580714,1.257264,1.097734
1,ABBV,3,1.012965,1.006462,0.847742
2,ACN,2,1.671898,1.293019,1.150371
3,AMD,4,1.509501,1.228617,1.083721
4,AMZN,2,1.888823,1.374345,1.231389
...,...,...,...,...,...
95,coin_Litecoin,5,1.400514,1.183433,1.004632
96,coin_Monero,4,0.997551,0.998775,0.840366
97,coin_Stellar,5,1.265603,1.124990,0.963348
98,coin_Tron,5,1.255626,1.120547,0.958490


In [43]:
gru_mom4_mse_avg = gru_mom4_df.groupby("Group")["MSE"].mean()
gru_mom4_rmse_avg = gru_mom4_df.groupby("Group")["RMSE"].mean()
gru_mom4_mae_avg = gru_mom4_df.groupby("Group")["MAE"].mean()

gru_mom4_eval_groups = pd.concat([gru_mom4_mse_avg, gru_mom4_rmse_avg, gru_mom4_mae_avg], 
                                        axis = 1).to_csv("gru_mom4_eval_groups.csv")

# ARIMA DataFrames

## ARIMA Evaluation Metrics

In [44]:
arima_combined_mse_dict = {**arima_crypto_mse_dict, **arima_sp500_mse_dict, **arima_comm_mse_dict}
arima_combined_mse_dict = sorted(arima_combined_mse_dict.items(), key = lambda x:x[0])
#print(arima_combined_mse_dict)

arima_combined_rmse_dict = {**arima_crypto_rmse_dict, **arima_sp500_rmse_dict, **arima_comm_rmse_dict}
arima_combined_rmse_dict = sorted(arima_combined_rmse_dict.items(), key = lambda x:x[0])
#print(arima_combined_rmse_dict)

arima_combined_mae_dict = {**arima_crypto_mae_dict, **arima_sp500_mae_dict, **arima_comm_mae_dict}
arima_combined_mae_dict = sorted(arima_combined_mae_dict.items(), key = lambda x:x[0])
#print(arima_combined_mae_dict)

In [45]:
arima_asset_mse_vals = []
arima_asset_rmse_vals = []
arima_asset_mae_vals = []

for i in range(len(mean_groups_dict)):
    arima_asset_mse_vals.append(arima_combined_mse_dict[i][1])
    arima_asset_rmse_vals.append(arima_combined_rmse_dict[i][1])
    arima_asset_mae_vals.append(arima_combined_mae_dict[i][1])

## ARIMA Moment 1 DataFrame

In [46]:
arima_mom1_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom1, arima_asset_mse_vals, 
                                     arima_asset_rmse_vals, arima_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
arima_mom1_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,4,2.293078,1.514291,1.321760
1,ABBV,3,1.145801,1.070421,0.905796
2,ACN,3,1.356554,1.164712,1.036188
3,AMD,4,8.080155,2.842561,2.507193
4,AMZN,3,2.989990,1.729159,1.553753
...,...,...,...,...,...
95,coin_Litecoin,5,15.281474,3.909153,3.310639
96,coin_Monero,5,18.116454,4.256343,3.615504
97,coin_Stellar,5,20.317312,4.507473,3.865373
98,coin_Tron,5,25.436706,5.043481,4.315018


In [47]:
arima_mom1_mse_avg = arima_mom1_df.groupby("Group")["MSE"].mean()
arima_mom1_rmse_avg = arima_mom1_df.groupby("Group")["RMSE"].mean()
arima_mom1_mae_avg = arima_mom1_df.groupby("Group")["MAE"].mean()

arima_mom1_eval_groups = pd.concat([arima_mom1_mse_avg, arima_mom1_rmse_avg, arima_mom1_mae_avg], 
                                        axis = 1).to_csv("arima_mom1_eval_groups.csv")

## ARIMA Moment 2 DataFrame

In [48]:
arima_mom2_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom2, arima_asset_mse_vals, 
                                     arima_asset_rmse_vals, arima_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
arima_mom2_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,3,2.293078,1.514291,1.321760
1,ABBV,2,1.145801,1.070421,0.905796
2,ACN,2,1.356554,1.164712,1.036188
3,AMD,4,8.080155,2.842561,2.507193
4,AMZN,3,2.989990,1.729159,1.553753
...,...,...,...,...,...
95,coin_Litecoin,4,15.281474,3.909153,3.310639
96,coin_Monero,4,18.116454,4.256343,3.615504
97,coin_Stellar,5,20.317312,4.507473,3.865373
98,coin_Tron,5,25.436706,5.043481,4.315018


In [49]:
arima_mom2_mse_avg = arima_mom2_df.groupby("Group")["MSE"].mean()
arima_mom2_rmse_avg = arima_mom2_df.groupby("Group")["RMSE"].mean()
arima_mom2_mae_avg = arima_mom2_df.groupby("Group")["MAE"].mean()

arima_mom2_eval_groups = pd.concat([arima_mom2_mse_avg, arima_mom2_rmse_avg, arima_mom2_mae_avg], 
                                        axis = 1).to_csv("arima_mom2_eval_groups.csv")

## ARIMA Moment 3 DataFrame

In [50]:
arima_mom3_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom3, arima_asset_mse_vals, 
                                     arima_asset_rmse_vals, arima_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
arima_mom3_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,2.293078,1.514291,1.321760
1,ABBV,3,1.145801,1.070421,0.905796
2,ACN,1,1.356554,1.164712,1.036188
3,AMD,4,8.080155,2.842561,2.507193
4,AMZN,1,2.989990,1.729159,1.553753
...,...,...,...,...,...
95,coin_Litecoin,5,15.281474,3.909153,3.310639
96,coin_Monero,4,18.116454,4.256343,3.615504
97,coin_Stellar,5,20.317312,4.507473,3.865373
98,coin_Tron,5,25.436706,5.043481,4.315018


In [51]:
arima_mom3_mse_avg = arima_mom3_df.groupby("Group")["MSE"].mean()
arima_mom3_rmse_avg = arima_mom3_df.groupby("Group")["RMSE"].mean()
arima_mom3_mae_avg = arima_mom3_df.groupby("Group")["MAE"].mean()

arima_mom3_eval_groups = pd.concat([arima_mom3_mse_avg, arima_mom3_rmse_avg, arima_mom3_mae_avg], 
                                        axis = 1).to_csv("arima_mom3_eval_groups.csv")

## ARIMA Moment 4 DataFrame

In [52]:
arima_mom4_df = pd.DataFrame(list(zip(asset_names, asset_groups_mom4, arima_asset_mse_vals, 
                                     arima_asset_rmse_vals, arima_asset_mae_vals)), 
                            columns = ["Name", "Group", "MSE", "RMSE", "MAE"])
arima_mom4_df

Unnamed: 0,Name,Group,MSE,RMSE,MAE
0,AAPL,1,2.293078,1.514291,1.321760
1,ABBV,3,1.145801,1.070421,0.905796
2,ACN,2,1.356554,1.164712,1.036188
3,AMD,4,8.080155,2.842561,2.507193
4,AMZN,2,2.989990,1.729159,1.553753
...,...,...,...,...,...
95,coin_Litecoin,5,15.281474,3.909153,3.310639
96,coin_Monero,4,18.116454,4.256343,3.615504
97,coin_Stellar,5,20.317312,4.507473,3.865373
98,coin_Tron,5,25.436706,5.043481,4.315018


In [53]:
arima_mom4_mse_avg = arima_mom4_df.groupby("Group")["MSE"].mean()
arima_mom4_rmse_avg = arima_mom4_df.groupby("Group")["RMSE"].mean()
arima_mom4_mae_avg = arima_mom4_df.groupby("Group")["MAE"].mean()

arima_mom4_eval_groups = pd.concat([arima_mom4_mse_avg, arima_mom4_rmse_avg, arima_mom4_mae_avg], 
                                        axis = 1).to_csv("arima_mom4_eval_groups.csv")