In [74]:
import numpy as np
import os
import math
import statistics
import json
import pandas as pd
from sklearn.metrics import mean_squared_error

### Random Forest - RMSE list for every company

In [66]:
rmse_list = {}

In [67]:
directory = os.path.join(".", "dump")
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[2]
        date,actual,pred = np.loadtxt(filename, delimiter=',', skiprows=1, dtype=data_type, unpack=True)
        avg_actual = statistics.mean(actual)
        rmse = math.sqrt(mean_squared_error(actual, pred))
        rmse_avg_actual_price = rmse / avg_actual
        rmse_list[company] = {
            'rmse': rmse,
            'rmse_paa': rmse_avg_actual_price
        }

In [68]:
rmse_list

{'AAPL': {'rmse': 28.213137323752257, 'rmse_paa': 0.1354447295864076},
 'ADBE': {'rmse': 21.091620732278482, 'rmse_paa': 0.08180190263460808},
 'ADSK': {'rmse': 15.63096712449873, 'rmse_paa': 0.11069131830986692},
 'AMD': {'rmse': 11.690818665525388, 'rmse_paa': 0.5180569575303992},
 'AMZN': {'rmse': 199.47422544948802, 'rmse_paa': 0.10606418652715116},
 'AVGO': {'rmse': 28.42114966568135, 'rmse_paa': 0.1267481935470093},
 'BKNG': {'rmse': 99.87828404722842, 'rmse_paa': 0.05074085562410786},
 'CRM': {'rmse': 18.844226459289494, 'rmse_paa': 0.12699012922569095},
 'CSCO': {'rmse': 2.9357843506138606, 'rmse_paa': 0.06527073350240079},
 'EA': {'rmse': 12.803150589136514, 'rmse_paa': 0.09931962195059145},
 'EBAY': {'rmse': 3.151581998865209, 'rmse_paa': 0.09094433163170935},
 'EXPE': {'rmse': 3.3889951371935707, 'rmse_paa': 0.02610230732584475},
 'FB': {'rmse': 17.015006979556272, 'rmse_paa': 0.09399072728992457},
 'FTNT': {'rmse': 24.04035819397792, 'rmse_paa': 0.3144677166714317},
 'GOOG'

In [70]:
rmse_only = [x['rmse'] for x in rmse_list.values()]
overall = statistics.mean(rmse_only)
rmse_list['overall'] = overall

In [72]:
rmse_list

{'AAPL': {'rmse': 28.213137323752257, 'rmse_paa': 0.1354447295864076},
 'ADBE': {'rmse': 21.091620732278482, 'rmse_paa': 0.08180190263460808},
 'ADSK': {'rmse': 15.63096712449873, 'rmse_paa': 0.11069131830986692},
 'AMD': {'rmse': 11.690818665525388, 'rmse_paa': 0.5180569575303992},
 'AMZN': {'rmse': 199.47422544948802, 'rmse_paa': 0.10606418652715116},
 'AVGO': {'rmse': 28.42114966568135, 'rmse_paa': 0.1267481935470093},
 'BKNG': {'rmse': 99.87828404722842, 'rmse_paa': 0.05074085562410786},
 'CRM': {'rmse': 18.844226459289494, 'rmse_paa': 0.12699012922569095},
 'CSCO': {'rmse': 2.9357843506138606, 'rmse_paa': 0.06527073350240079},
 'EA': {'rmse': 12.803150589136514, 'rmse_paa': 0.09931962195059145},
 'EBAY': {'rmse': 3.151581998865209, 'rmse_paa': 0.09094433163170935},
 'EXPE': {'rmse': 3.3889951371935707, 'rmse_paa': 0.02610230732584475},
 'FB': {'rmse': 17.015006979556272, 'rmse_paa': 0.09399072728992457},
 'FTNT': {'rmse': 24.04035819397792, 'rmse_paa': 0.3144677166714317},
 'GOOG'

In [73]:
# Serializing json 
json_object = json.dumps(rmse_list, indent = 4)
  
# Writing to sample.json
with open(os.path.join('.', 'eval', 'rmse_list_rf.json'), "w") as outfile:
    outfile.write(json_object)

In [78]:
df = pd.DataFrame(rmse_list).T

In [82]:
df

Unnamed: 0,rmse,rmse_paa
AAPL,28.213137,0.135445
ADBE,21.091621,0.081802
ADSK,15.630967,0.110691
AMD,11.690819,0.518057
AMZN,199.474225,0.106064
AVGO,28.42115,0.126748
BKNG,99.878284,0.050741
CRM,18.844226,0.12699
CSCO,2.935784,0.065271
EA,12.803151,0.09932


In [80]:
df.to_csv(os.path.join('.', 'eval', 'rmse_list_rf.csv'))

### LSTM - RMSE for every company

In [88]:
lstm_rmse_list = {}
directory = os.path.join(".", "dump_lstm")
for root, dirs, files in os.walk(directory):
    for file in files:
        filename = os.path.join(directory, file)
        company = file.split('-')[1]
        df = pd.read_csv(filename)
        df.drop(df.tail(1).index, inplace=True)
        actual = df['test_value']
        pred = df['predicted_value']
        avg_actual = statistics.mean(actual)
        rmse = math.sqrt(mean_squared_error(actual, pred))
        rmse_avg_actual_price = rmse / avg_actual
        lstm_rmse_list[company] = {
            'rmse': rmse,
            'rmse_paa': rmse_avg_actual_price
        }

In [95]:
lstm_rmse_list

{'AAPL': {'rmse': 8.597159567944107, 'rmse_paa': 0.03711367533348716},
 'ADBE': {'rmse': 1.1516092011245125, 'rmse_paa': 0.004192903435219862},
 'ADSK': {'rmse': 2.248354836917175, 'rmse_paa': 0.01430344813906189},
 'AMD': {'rmse': 5.110657923424531, 'rmse_paa': 0.2213352132331067},
 'AMZN': {'rmse': 34.78554940893495, 'rmse_paa': 0.01684213561166444},
 'AVGO': {'rmse': 4.702255492154423, 'rmse_paa': 0.019633470590745442},
 'BKNG': {'rmse': 9.9237134644889, 'rmse_paa': 0.004820558723893337},
 'CRM': {'rmse': 1.5104472322127787, 'rmse_paa': 0.009328859736277974},
 'CSCO': {'rmse': 0.42521137901274253, 'rmse_paa': 0.00931792484377405},
 'EA': {'rmse': 3.3605215388613163, 'rmse_paa': 0.023273032316798076},
 'EBAY': {'rmse': 0.9083569685945722, 'rmse_paa': 0.024841782672284655},
 'EXPE': {'rmse': 1.5353119718687407, 'rmse_paa': 0.011408857235128122},
 'FB': {'rmse': 5.674965434726192, 'rmse_paa': 0.02692788134051058},
 'FTNT': {'rmse': 11.565174838188943, 'rmse_paa': 0.11760212198107976},


In [90]:
lstm_rmse_only = [x['rmse'] for x in lstm_rmse_list.values()]
overall = statistics.mean(lstm_rmse_only)
lstm_rmse_list['overall'] = overall

In [93]:
df2 = pd.DataFrame(lstm_rmse_list).T
df2

Unnamed: 0,rmse,rmse_paa
AAPL,8.59716,0.037114
ADBE,1.151609,0.004193
ADSK,2.248355,0.014303
AMD,5.110658,0.221335
AMZN,34.785549,0.016842
AVGO,4.702255,0.019633
BKNG,9.923713,0.004821
CRM,1.510447,0.009329
CSCO,0.425211,0.009318
EA,3.360522,0.023273


In [94]:
df2.to_csv(os.path.join('.', 'eval', 'rmse_list_lstm.csv'))