In [15]:
import pandas as pd
import numpy as np
import pickle
from datetime import timedelta, datetime

from scipy.interpolate import CubicSpline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error 
from keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import load_model
import plotly.express as px
import re
from datetime import datetime
import sys, os

import cufflinks as cf
import chart_studio.plotly as plotly
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=True, world_readable=False)

In [16]:
# iplot layout
space =  {
            'legend' : {'bgcolor':'#1A1A1C','font':{'color':'#D9D9D9',"size":12}},
            'paper_bgcolor' : '#1A1A1C',
            'plot_bgcolor' : '#1A1A1C',
            "title" : {"font":{"color":"#D9D9D9"},"x":0.5},
            'yaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'xaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'titlefont' : {'color':'#D9D9D9'}
        }

In [17]:
# helper function to model later IMFs as splines
class SplineModel():
    def __init__(self,time_series_generator):
        self.name = "SplineModel"
        self.gen = time_series_generator
    
    def predict(self, x_window, verbose=0):
        result = []
        x_window = np.squeeze(x_window, axis=0)
        last_element_index = x_window.shape[1]-1
        series = x_window[:,last_element_index].reshape(-1)
        cs = CubicSpline(np.arange(len(series)), series)
        next_value = cs(len(series)+1)
        result += [next_value]

        return np.array(result).reshape(1,-1) # 1,-1
    
class ManyToOneTimeSeriesGenerator(TimeseriesGenerator):
  def __getitem__(self, idx):
    x, y = super().__getitem__(idx)
    last_element_index = y.shape[1]-1
    return x, y[:,last_element_index].reshape(1,-1)

In [18]:
absolutepath = os.path.abspath('')
fileDirectory = os.path.dirname(absolutepath)


In [19]:
# read in models

stock_dir = os.listdir(fileDirectory + '/data/LSTM_orig_3rd_export_2020_models')
if '.DS_Store' in stock_dir:
            stock_dir.remove('.DS_Store')
models ={}
for ticker in stock_dir:
        models[ticker] = {}
        tmp_dir = os.listdir(fileDirectory+ f'/data/LSTM_orig_3rd_export_2020_models/'+ f'{ticker}/')
        if '.DS_Store' in tmp_dir:
            tmp_dir.remove('.DS_Store')
        for IMF in tmp_dir:
            # get the IMF
            tmp = re.findall("(IMF\d)", IMF)[0]
            
            if tmp in ['IMF1','IMF2']:
                models[ticker][tmp] = load_model(fileDirectory+ f'/data/LSTM_orig_3rd_export_2020_models/'+ f'{ticker}/'+ f'{IMF}')
            else:
                models[ticker][tmp] = pickle.load(open(fileDirectory+ f'/data/LSTM_orig_3rd_export_2020_models/'+ f'{ticker}/'+ f'{IMF}','rb'))

In [20]:
# example of nested structure
models['HD']['IMF2']

<tensorflow.python.keras.engine.sequential.Sequential at 0x16bf67bb0>

In [21]:
# import generators
datasets = ['train_dataset','validation_dataset','test_dataset','train_generators','validation_generators','test_generators']
data_dict ={}
for d in datasets:
    # not filename typo
    with open(fileDirectory+f'/data/LSTM_orig_3rd_export_2020_data/LSTM_full_3rd_export_2020_models_orig_space_{d}.pkl', 'rb') as fi:
        data_dict[d] = pickle.load(fi)


In [22]:
# example of nested structure
data_dict['train_generators']['HD'][2020]

{'IMF1': <__main__.ManyToOneTimeSeriesGenerator at 0x16bf678e0>,
 'IMF2': <__main__.ManyToOneTimeSeriesGenerator at 0x16bf67100>,
 'IMF3': <__main__.ManyToOneTimeSeriesGenerator at 0x16bc8cf70>,
 'IMF4': <__main__.ManyToOneTimeSeriesGenerator at 0x16bc8ce80>,
 'IMF5': <__main__.ManyToOneTimeSeriesGenerator at 0x16bc8cbe0>,
 'IMF6': <__main__.ManyToOneTimeSeriesGenerator at 0x16bc8cc10>}

In [23]:
features_in_order = ['Open', 'High', 'Low', 'Volume', 'Close'] # target feature must be the last one here
target_feature = 'Close'

# Predicting
### Note year = 2020 and data is std scale transformed in original space

In [24]:
# predicting

results = {}

for ticker in models:
    results[ticker] = {}

    # initializing results dicitionary
    for feature in features_in_order:
        if feature != target_feature:
            continue
        results[ticker][feature] = {}
        for imf_level in models[ticker]:
            results[ticker][feature][imf_level] = {
                'real_train': [],
                'predicted_train': [],
                'real_validation': [],
                'predicted_validation': [],
                'real_test': [],
                'predicted_test': [],
                'x_axis_train': [],
                'x_axis_validation': [],
                'x_axis_test': []
            }

    for imf_level in models[ticker]:
        model = models[ticker][imf_level]
        
        print(f'Predicting: [{ticker}][{imf_level}]')

        cur_train_gen = data_dict['train_generators'][ticker][2020][imf_level]
        cur_validation_gen = data_dict['validation_generators'][ticker][2020][imf_level]
        cur_test_gen = data_dict['test_generators'][ticker][2020][imf_level]

        # predicting train
        day_counter = 0
        for i in range(len(cur_train_gen)):
            x, y = cur_train_gen[i]
            yhat = model.predict(x, verbose=0)

            for j in range(yhat.shape[1]):
                results[ticker][target_feature][imf_level]['real_train'] += [y[:,j][0]]
                results[ticker][target_feature][imf_level]['predicted_train'] += [yhat[:,j][0]]
                results[ticker][target_feature][imf_level]['x_axis_train'] += [day_counter]
            day_counter += 1

        # predicting validation
        for i in range(len(cur_validation_gen)):
            x, y = cur_validation_gen[i]
            yhat = model.predict(x, verbose=0)

            for j in range(yhat.shape[1]):
                results[ticker][target_feature][imf_level]['real_validation'] += [y[:,j][0]]
                results[ticker][target_feature][imf_level]['predicted_validation'] += [yhat[:,j][0]]
                results[ticker][target_feature][imf_level]['x_axis_validation'] += [day_counter]
            day_counter += 1

        # predicting test
        for i in range(len(cur_test_gen)):
            x, y = cur_test_gen[i]
            yhat = model.predict(x, verbose=0)

            for j in range(yhat.shape[1]):
                results[ticker][target_feature][imf_level]['real_test'] += [y[:,j][0]]
                results[ticker][target_feature][imf_level]['predicted_test'] += [yhat[:,j][0]]
                results[ticker][target_feature][imf_level]['x_axis_test'] += [day_counter]
            day_counter += 1


Predicting: [HD][IMF5]
Predicting: [HD][IMF1]
Predicting: [HD][IMF4]
Predicting: [HD][IMF2]
Predicting: [HD][IMF6]
Predicting: [HD][IMF3]
Predicting: [AMAT][IMF4]
Predicting: [AMAT][IMF3]
Predicting: [AMAT][IMF2]
Predicting: [AMAT][IMF6]
Predicting: [AMAT][IMF1]
Predicting: [AMAT][IMF5]
Predicting: [JNJ][IMF6]
Predicting: [JNJ][IMF1]
Predicting: [JNJ][IMF3]
Predicting: [JNJ][IMF4]
Predicting: [JNJ][IMF2]
Predicting: [JNJ][IMF5]
Predicting: [MSFT][IMF4]
Predicting: [MSFT][IMF6]
Predicting: [MSFT][IMF3]
Predicting: [MSFT][IMF2]
Predicting: [MSFT][IMF5]
Predicting: [MSFT][IMF1]
Predicting: [XOM][IMF5]
Predicting: [XOM][IMF2]
Predicting: [XOM][IMF4]
Predicting: [XOM][IMF1]
Predicting: [XOM][IMF3]
Predicting: [XOM][IMF6]
Predicting: [CARR][IMF3]
Predicting: [CARR][IMF1]
Predicting: [CARR][IMF4]
Predicting: [CARR][IMF5]
Predicting: [CARR][IMF2]
Predicting: [UNH][IMF4]
Predicting: [UNH][IMF3]
Predicting: [UNH][IMF1]
Predicting: [UNH][IMF6]
Predicting: [UNH][IMF5]
Predicting: [UNH][IMF2]
Predi

In [25]:
# organizing imf prediction results, concatenating train, validation and test
concatenated_results = {}

for ticker in results:
    concatenated_results[ticker] = {}
    for feature in results[ticker]:
        concatenated_results[ticker][feature] = {}
        for imf_level in results[ticker][feature]:
            
            df_result = pd.DataFrame.from_dict(results[ticker][feature][imf_level], orient='index').T
            df_train = df_result[['real_train','predicted_train','x_axis_train']].set_index('x_axis_train').dropna(axis=0)
            df_train.index.name = 'x'
            df_validation = df_result[['real_validation','predicted_validation','x_axis_validation']].set_index('x_axis_validation').dropna(axis=0)
            df_validation.index.name = 'x'
            df_test = df_result[['real_test','predicted_test','x_axis_test']].set_index('x_axis_test').dropna(axis=0)
            df_test.index.name = 'x'

            df_concatenated = pd.concat([df_train,df_validation,df_test], axis=1)

            concatenated_results[ticker][feature][imf_level] = df_concatenated

In [38]:
concatenated_results['MSFT']['Close']['IMF1']['predicted_test']

x
0.0           NaN
1.0           NaN
2.0           NaN
3.0           NaN
4.0           NaN
           ...   
236.0    0.000124
237.0    0.000128
238.0    0.000125
239.0    0.000123
240.0    0.000125
Name: predicted_test, Length: 241, dtype: float64

In [26]:
# plotting partial result
plot_ticker = 'MSFT'
plot_feature = target_feature
plot_imf = 'IMF1'

concatenated_results[plot_ticker][plot_feature][plot_imf].iplot(title=f'{plot_ticker} {plot_feature} {plot_imf}', asFigure=True, layout=space)

# Recomposing prediction by arithmetically adding the IMF curves

In [27]:
# re import scalers Orig transformed
with open(fileDirectory + f'/data/scalers_orig_2021-11-20_16.04.49.pkl', 'rb') as f:
    scalers = pickle.load(f)

In [28]:
scalers['HD'][2020].keys()

dict_keys(['Open', 'High', 'Low', 'Volume', 'Close'])

In [29]:
# recomposing prediction by arithmetically adding the IMF curves

final_prediction_results = {}
max_window_size = 10

for ticker in concatenated_results:
    final_prediction_results[ticker] = {}
    for feature in concatenated_results[ticker]:
        addition_train = None
        addition_validation = None
        addition_test = None

        addition_real_train = None
        addition_real_validation = None
        addition_real_test = None

        # recomposing predictions
        for imf_level in concatenated_results[ticker][feature]:
            # adding test
            can_sum = True
            if addition_test is None:
                addition_test = concatenated_results[ticker][feature][imf_level]['predicted_test'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['predicted_test'].values
                cur_length = addition_test.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_test = addition_test[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_test = np.add(addition_test,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding train
            can_sum = True
            if addition_train is None:
                addition_train = concatenated_results[ticker][feature][imf_level]['predicted_train'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['predicted_train'].values
                cur_length = addition_train.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_train = addition_train[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_train = np.add(addition_train,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding validation
            can_sum = True
            if addition_validation is None:
                addition_validation = concatenated_results[ticker][feature][imf_level]['predicted_validation'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['predicted_validation'].values
                cur_length = addition_validation.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_validation = addition_validation[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_validation = np.add(addition_validation,np_array_to_be_added)

        # recomposing real
        for imf_level in concatenated_results[ticker][feature]:
            # adding test
            can_sum = True
            if addition_real_test is None:
                addition_real_test = concatenated_results[ticker][feature][imf_level]['real_test'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['real_test'].values
                cur_length = addition_real_test.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_real_test = addition_real_test[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_real_test = np.add(addition_real_test,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding train
            can_sum = True
            if addition_real_train is None:
                addition_real_train = concatenated_results[ticker][feature][imf_level]['real_train'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['real_train'].values
                cur_length = addition_real_train.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_real_train = addition_real_train[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_real_train = np.add(addition_real_train,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding validation
            can_sum = True
            if addition_real_validation is None:
                addition_real_validation = concatenated_results[ticker][feature][imf_level]['real_validation'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['real_validation'].values
                cur_length = addition_real_validation.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_real_validation = addition_real_validation[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_real_validation = np.add(addition_real_validation,np_array_to_be_added)
        
        scaler = scalers[ticker][2020][feature]

        final_prediction_results[ticker][feature] = {
            'train_predicted': scaler.inverse_transform(addition_train.reshape(-1,1)).reshape(-1),
            'validation_predicted': scaler.inverse_transform(addition_validation.reshape(-1,1)).reshape(-1),
            'test_predicted': scaler.inverse_transform(addition_test.reshape(-1,1)).reshape(-1),
            'train_real': scaler.inverse_transform(addition_real_train.reshape(-1,1)).reshape(-1),
            'validation_real': scaler.inverse_transform(addition_real_validation.reshape(-1,1)).reshape(-1),
            'test_real': scaler.inverse_transform(addition_real_test.reshape(-1,1)).reshape(-1)
        }

In [49]:
#scalers['MSFT'][2020]['Close']
np.exp(final_prediction_results['MSFT']['Close']['train_real'])

array([160.08999634, 162.08999634, 161.33999634, 163.27999878,
       162.13000488, 163.17999268, 166.16999817, 167.1000061 ,
       166.5       , 165.69999695, 166.72000122, 165.03999329,
       162.27999878, 165.46000671, 168.03999329, 172.77999878,
       170.22999573, 174.38000488, 180.11999512, 179.8999939 ,
       183.63000488, 183.88999939, 188.69999695, 184.44000244,
       184.71000671, 183.71000671, 185.3500061 , 187.22999573,
       187.27999878, 184.41999817, 178.58999634, 170.88999939,
       168.07000732, 170.16999817, 158.17999268, 162.00999451,
       172.78999329, 164.50999451, 170.55000305, 166.27000427,
       161.57000732, 150.61999512, 160.91999817, 153.63000488,
       139.05999756, 158.83000183, 135.41999817, 146.57000732,
       140.3999939 , 142.71000671, 137.3500061 , 135.97999573,
       148.33999634, 146.91999817, 156.11000061, 149.69999695,
       160.22999573, 157.71000671, 152.11000061, 155.25999451,
       153.83000183, 165.27000427, 163.49000549, 165.13

In [30]:
# plotting final result
plot_ticker = 'MSFT'
plot_feature = 'Close'

pd.DataFrame.from_dict(final_prediction_results[plot_ticker][plot_feature]).iplot(title=f'{plot_ticker} {plot_feature} {plot_imf}', layout=space)

In [39]:
final_prediction_results[plot_ticker][plot_feature]

{'train_predicted': array([5.09710137, 5.10228939, 5.10699611, 5.11188501, 5.11405852,
        5.11284427, 5.11386887, 5.11627646, 5.11703297, 5.11751117,
        5.11940131, 5.12327174, 5.12902396, 5.13847928, 5.15279262,
        5.16922945, 5.18225822, 5.19750474, 5.21201392, 5.22375244,
        5.23196874, 5.23562102, 5.23614764, 5.23880233, 5.23682435,
        5.23001357, 5.22033211, 5.21034696, 5.19853693, 5.18524436,
        5.17144513, 5.15924138, 5.14851358, 5.13978983, 5.13367641,
        5.12904667, 5.12040965, 5.10875538, 5.09467083, 5.07820448,
        5.06365239, 5.04777476, 5.03120777, 5.01467959, 4.99877128,
        4.98549328, 4.97217331, 4.95580852, 4.94282347, 4.95292529,
        4.97099405, 4.99413785, 5.03157193, 5.06489564, 5.05855206,
        5.05105519, 5.0395331 , 5.02666198, 5.05594571, 5.06991772,
        5.09147519, 5.12073021, 5.12403675, 5.13727957, 5.15239186,
        5.15849716, 5.16308957, 5.16784082, 5.16832172, 5.1634259 ,
        5.1571429 , 5.1575795

# Calculating accuracy metrics

In [92]:
# This is carried over from previous notebook:
# financial_forecasting_analysis/notebooks/01_DNN_LTSM_modeling.ipynb
imfs_to_predict_with_neural = ['IMF1', 'IMF2']

experiment_time = '17_16_29_11_20_2021'

In [98]:
adj_close_accuracies = {}
accuracies_detailed = {}

for ticker in final_prediction_results:
    adj_close_accuracies[ticker] = {}
    accuracies_detailed[ticker] = {}
    for feature in final_prediction_results[ticker]:

        y_train = final_prediction_results[ticker][feature]['train_predicted'][~np.isnan(final_prediction_results[ticker][feature]['train_predicted'])]
        yhat_train = final_prediction_results[ticker][feature]['train_real'][~np.isnan(final_prediction_results[ticker][feature]['train_real'])]

        y_validation = final_prediction_results[ticker][feature]['validation_predicted'][~np.isnan(final_prediction_results[ticker][feature]['validation_predicted'])]
        yhat_validation = final_prediction_results[ticker][feature]['validation_real'][~np.isnan(final_prediction_results[ticker][feature]['validation_real'])]

        y_test = final_prediction_results[ticker][feature]['test_predicted'][~np.isnan(final_prediction_results[ticker][feature]['test_predicted'])]
        yhat_test = final_prediction_results[ticker][feature]['test_real'][~np.isnan(final_prediction_results[ticker][feature]['test_real'])]
        accuracies_detailed[ticker][feature] = {
            'mse':{
                'train':mean_squared_error(y_train,yhat_train),
                'validation':mean_squared_error(y_validation,yhat_validation),
                'test':mean_squared_error(y_test,yhat_test),
            },
            'mape':{
                'train':np.mean(np.abs((y_train - yhat_train) / y_train)) * 100,
                'validation':np.mean(np.abs((y_validation - yhat_validation) / y_validation)) * 100,
                'test':np.mean(np.abs((y_test - yhat_test) / y_test)) * 100,
            }
        }

        if feature == 'Close':
            adj_close_accuracies[ticker] = {
                'mse': mean_squared_error(y_test,yhat_test),
                'mape': np.mean(np.abs((y_validation - yhat_validation) / y_validation)) * 100
            }

# pd.DataFrame.from_dict(accuracies_detailed[plot_ticker][plot_feature])
df_close_accuracies = pd.DataFrame.from_dict(adj_close_accuracies).T
df_close_accuracies.to_csv(fileDirectory + f"/data/DNN_metrics/{'_'.join(imfs_to_predict_with_neural)}_full_spline_{experiment_time}.csv", sep=',', encoding='utf-8')
df_close_accuracies

Unnamed: 0,mse,mape
HD,3.909088e-08,0.003525
AMAT,4.399288e-07,0.00805
JNJ,5.04385e-09,0.000778
MSFT,1.192086e-07,0.002703
XOM,6.479802e-08,0.007094
CARR,2.995169e-07,0.007409
UNH,9.473827e-08,0.003909
WAT,8.090326e-07,0.006639
ADSK,9.615571e-08,0.004869
MTD,1.182767e-06,0.010045
