In [6]:
import numpy as np
import os
import math
import statistics
import json
import pandas as pd
from sklearn.metrics import mean_squared_error

### Random Forest - RMSE list for every company

In [7]:
rmse_list_rf_lagged = {}
directory = os.path.join(".", "dump")
data_type = [('f0', 'datetime64[s]'), ('f1', '<f8'), ('f2', '<f8')]
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[2]
        type = file.split('-')[0]
        if type == 'lagged' and file.endswith('.csv'):
            date,actual,pred = np.loadtxt(filename, delimiter=',', skiprows=1, dtype=data_type, unpack=True)
            avg_actual = statistics.mean(actual)
            rmse = math.sqrt(mean_squared_error(actual, pred))
            rmse_avg_actual_price = rmse / avg_actual
            rmse_list_rf_lagged[company] = {
                'rmse': rmse,
                'rmse_paa': rmse_avg_actual_price
            }

In [8]:
rmse_only = [x['rmse'] for x in rmse_list_rf_lagged.values()]
overall = statistics.mean(rmse_only)
rmse_paa_only = [x['rmse_paa'] for x in rmse_list_rf_lagged.values()]
overall_rmse_paa = statistics.mean(rmse_paa_only)
rmse_list_rf_lagged['overall'] = {'rmse': overall, 'rmse_paa': overall_rmse_paa }
overall_rmse_paa

0.11019319116433902

In [9]:
rmse_list_rf_lagged

{'AAPL': {'rmse': 8.609790515150221, 'rmse_paa': 0.038940107411031974},
 'ADBE': {'rmse': 11.718352543673682, 'rmse_paa': 0.045471442816392704},
 'ADSK': {'rmse': 14.361005014404492, 'rmse_paa': 0.09986932807910005},
 'AMD': {'rmse': 13.608475514239327, 'rmse_paa': 0.5011952249661563},
 'AMZN': {'rmse': 142.36163584265844, 'rmse_paa': 0.0766941886191561},
 'AVGO': {'rmse': 19.36857132943231, 'rmse_paa': 0.08264997749775622},
 'BKNG': {'rmse': 156.78568875101848, 'rmse_paa': 0.08329753575815955},
 'CRM': {'rmse': 9.460240822726801, 'rmse_paa': 0.06361351731990161},
 'CSCO': {'rmse': 2.4573716448980534, 'rmse_paa': 0.05256665491556705},
 'EA': {'rmse': 18.363818865417894, 'rmse_paa': 0.16921988546592046},
 'EBAY': {'rmse': 6.699880426720597, 'rmse_paa': 0.20963330386300513},
 'EXPE': {'rmse': 6.571210809807505, 'rmse_paa': 0.05239991040820894},
 'FB': {'rmse': 27.720636016539288, 'rmse_paa': 0.17495406146494985},
 'FTNT': {'rmse': 9.230537971192284, 'rmse_paa': 0.10892921582609744},
 'GO

In [10]:
df = pd.DataFrame(rmse_list_rf_lagged).T
df.to_csv(os.path.join('.', 'eval', 'rmse_list_rf_lagged.csv'))

In [11]:
df

Unnamed: 0,rmse,rmse_paa
AAPL,8.609791,0.03894
ADBE,11.718353,0.045471
ADSK,14.361005,0.099869
AMD,13.608476,0.501195
AMZN,142.361636,0.076694
AVGO,19.368571,0.08265
BKNG,156.785689,0.083298
CRM,9.460241,0.063614
CSCO,2.457372,0.052567
EA,18.363819,0.16922


### Random Forest - Quarterly Data

In [12]:
rmse_list_rf_quarterly = {}
directory = os.path.join(".", "dump")
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[2]
        type = file.split('-')[0]
        if type == 'quarterly' and file.endswith('.csv'):
            date,actual,pred = np.loadtxt(filename, delimiter=',', skiprows=1, dtype=data_type, unpack=True)
            avg_actual = statistics.mean(actual)
            rmse = math.sqrt(mean_squared_error(actual, pred))
            rmse_avg_actual_price = rmse / avg_actual
            rmse_list_rf_quarterly[company] = {
                'rmse': rmse,
                'rmse_paa': rmse_avg_actual_price
            }
rmse_list_rf_quarterly

{'AAPL': {'rmse': 28.213137323752257, 'rmse_paa': 0.1354447295864076},
 'ADBE': {'rmse': 21.091620732278482, 'rmse_paa': 0.08180190263460808},
 'ADSK': {'rmse': 15.63096712449873, 'rmse_paa': 0.11069131830986692},
 'AMD': {'rmse': 11.690818665525388, 'rmse_paa': 0.5180569575303992},
 'AMZN': {'rmse': 199.47422544948802, 'rmse_paa': 0.10606418652715116},
 'AVGO': {'rmse': 28.42114966568135, 'rmse_paa': 0.1267481935470093},
 'BKNG': {'rmse': 99.87828404722842, 'rmse_paa': 0.05074085562410786},
 'CRM': {'rmse': 18.844226459289494, 'rmse_paa': 0.12699012922569095},
 'CSCO': {'rmse': 2.9357843506138606, 'rmse_paa': 0.06527073350240079},
 'EA': {'rmse': 12.803150589136514, 'rmse_paa': 0.09931962195059145},
 'EBAY': {'rmse': 3.151581998865209, 'rmse_paa': 0.09094433163170935},
 'EXPE': {'rmse': 3.3889951371935707, 'rmse_paa': 0.02610230732584475},
 'FB': {'rmse': 17.015006979556272, 'rmse_paa': 0.09399072728992457},
 'FTNT': {'rmse': 24.04035819397792, 'rmse_paa': 0.3144677166714317},
 'GOOG'

In [13]:
rmse_only = [x['rmse'] for x in rmse_list_rf_quarterly.values()]
overall = statistics.mean(rmse_only)
rmse_paa_only = [x['rmse_paa'] for x in rmse_list_rf_quarterly.values()]
overall_rmse_paa = statistics.mean(rmse_paa_only)
rmse_list_rf_quarterly['overall'] = {'rmse': overall, 'rmse_paa': overall_rmse_paa }
overall_rmse_paa

0.11733476180979059

In [14]:
df_rf_quarterly = pd.DataFrame(rmse_list_rf_quarterly).T
df_rf_quarterly.to_csv(os.path.join('.', 'eval', 'rmse_list_rf_quarterly.csv'))
df_rf_quarterly

Unnamed: 0,rmse,rmse_paa
AAPL,28.213137,0.135445
ADBE,21.091621,0.081802
ADSK,15.630967,0.110691
AMD,11.690819,0.518057
AMZN,199.474225,0.106064
AVGO,28.42115,0.126748
BKNG,99.878284,0.050741
CRM,18.844226,0.12699
CSCO,2.935784,0.065271
EA,12.803151,0.09932


### LSTM - Quarterly data

In [37]:
rmse_lstm_quarterly = {}
directory = os.path.join(".", "quarterly-LSTM")
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[1]
        df = pd.read_csv(filename)
        df.drop(df.tail(1).index, inplace=True)
        actual = df['test_value']
        pred = df['predicted_value']
        avg_actual = statistics.mean(actual)
        rmse = math.sqrt(mean_squared_error(actual, pred))
        rmse_avg_actual_price = rmse / avg_actual
        rmse_lstm_quarterly[company] = {
            'rmse': rmse,
            'rmse_paa': rmse_avg_actual_price
        }

In [38]:
rmse_lstm_quarterly

{'AAPL': {'rmse': 8.597159567944107, 'rmse_paa': 0.03711367533348716},
 'ADBE': {'rmse': 1.1516092011245125, 'rmse_paa': 0.004192903435219862},
 'ADSK': {'rmse': 2.248354836917175, 'rmse_paa': 0.01430344813906189},
 'AMD': {'rmse': 5.110657923424531, 'rmse_paa': 0.2213352132331067},
 'AMZN': {'rmse': 34.78554940893495, 'rmse_paa': 0.01684213561166444},
 'AVGO': {'rmse': 4.702255492154423, 'rmse_paa': 0.019633470590745442},
 'BKNG': {'rmse': 9.9237134644889, 'rmse_paa': 0.004820558723893337},
 'CRM': {'rmse': 1.5104472322127787, 'rmse_paa': 0.009328859736277974},
 'CSCO': {'rmse': 0.42521137901274253, 'rmse_paa': 0.00931792484377405},
 'EA': {'rmse': 3.3605215388613163, 'rmse_paa': 0.023273032316798076},
 'EBAY': {'rmse': 0.9083569685945722, 'rmse_paa': 0.024841782672284655},
 'EXPE': {'rmse': 1.5353119718687407, 'rmse_paa': 0.011408857235128122},
 'FB': {'rmse': 5.674965434726192, 'rmse_paa': 0.02692788134051058},
 'FTNT': {'rmse': 11.565174838188943, 'rmse_paa': 0.11760212198107976},


In [39]:
lstm_rmse_only = [x['rmse'] for x in rmse_lstm_quarterly.values()]
overall = statistics.mean(lstm_rmse_only)
lstm_rmse_paa_only = [x['rmse_paa'] for x in rmse_lstm_quarterly.values()]
overall_paa = statistics.mean(lstm_rmse_paa_only)
rmse_lstm_quarterly['overall'] = { 'rmse': overall, 'rmse_paa': overall_paa }

In [40]:
df2 = pd.DataFrame(rmse_lstm_quarterly).T
df2

Unnamed: 0,rmse,rmse_paa
AAPL,8.59716,0.037114
ADBE,1.151609,0.004193
ADSK,2.248355,0.014303
AMD,5.110658,0.221335
AMZN,34.785549,0.016842
AVGO,4.702255,0.019633
BKNG,9.923713,0.004821
CRM,1.510447,0.009329
CSCO,0.425211,0.009318
EA,3.360522,0.023273


In [41]:
df2.to_csv(os.path.join('.', 'eval', 'rmse_lstm_quarterly.csv'))

### LSTM - Rolling Average

In [46]:
rmse_lstm_rolling = {}
directory = os.path.join(".", "laggedFeatures_LSTM")
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[1]
        df = pd.read_csv(filename)
        df.drop(df.tail(1).index, inplace=True)
        actual = df['test_value']
        pred = df['predicted_value']
        avg_actual = statistics.mean(actual)
        rmse = math.sqrt(mean_squared_error(actual, pred))
        rmse_avg_actual_price = rmse / avg_actual
        rmse_lstm_rolling[company] = {
            'rmse': rmse,
            'rmse_paa': rmse_avg_actual_price
        }

In [47]:
lstm_rmse_only = [x['rmse'] for x in rmse_lstm_rolling.values()]
overall = statistics.mean(lstm_rmse_only)
lstm_rmse_paa_only = [x['rmse_paa'] for x in rmse_lstm_rolling.values()]
overall_paa = statistics.mean(lstm_rmse_paa_only)
rmse_lstm_rolling['overall'] = { 'rmse': overall, 'rmse_paa': overall_paa }

In [48]:
df_lstm_rolling = pd.DataFrame(rmse_lstm_rolling).T
df_lstm_rolling

Unnamed: 0,rmse,rmse_paa
AAPL,2.997015,0.012954
ADBE,2.579961,0.009416
ADSK,2.790428,0.018194
AMD,3.982419,0.155069
AMZN,51.446807,0.025725
AVGO,1.585028,0.006538
BKNG,14.187181,0.007259
CRM,2.587813,0.01625
CSCO,0.620631,0.013224
EA,5.781277,0.051407


In [None]:
df_lstm_rolling.to_csv(os.path.join('.', 'eval', 'rmse_lstm_rolling.csv'))