In [None]:
# Dependencias
!pip install plotly
!pip install cufflinks
!pip install chart_studio
!pip install ipywidgets
!pip install yfinance
!pip install EMD-signal==1.0.0
!pip install sklearn
!pip install keras
!pip install tensorflow

In [None]:
# imports e definições

from PyEMD import CEEMDAN

import os
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
from datetime import timedelta, datetime

%matplotlib inline
from scipy.interpolate import CubicSpline

import cufflinks as cf
import chart_studio.plotly as plotly
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=True, world_readable=False)

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error 

from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense, LSTM, LeakyReLU, CuDNNLSTM, Activation
from keras.activations import tanh
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, CSVLogger

import yfinance as yf

from google.colab import drive
from sqlalchemy import create_engine

import IPython


class SplineModel():
    def __init__(self):#,time_series_generator):
        self.name = "SplineModel"
        #self.gen = time_series_generator
    
    def predict(self, x_window, days_ahead):   # x_window is the actual x 
        window_size = x_window.shape[1]        # x_window.shape = (1,window_size,5)
        if window_size >= days_ahead:
          # Original SplineModel

          result = []
          x_window = np.squeeze(x_window, axis=0)
          last_element_index = x_window.shape[1]-1
          series = x_window[:,last_element_index].reshape(-1)
          cs = CubicSpline(np.arange(len(series)), series)
          for i in range(days_ahead):
            next_value = cs(len(series) + i + 1)
            result += [next_value]

          return np.array(result).reshape(1,-1)
        
        else:
          # Predicts x_window, then repeats these values to populate days_ahead values
          window_size = 5

          result = []
          prediction = []
          x_window = np.squeeze(x_window, axis=0)
          last_element_index = x_window.shape[1]-1
          series = x_window[:,last_element_index].reshape(-1)
          cs = CubicSpline(np.arange(len(series)), series)
          for i in range(window_size):
            next_value = cs(len(series) + i + 1)
            prediction += [next_value]

          while len(result) < days_ahead:
            for j in range(len(prediction)):
              result.append(prediction[j])

              if len(result) == days_ahead:
                break
              
            break # just predicting the next 5, so as to not overpopulate the results

          return np.array(result).reshape(1,-1)


# convert history into inputs and outputs
def to_multi_step(dataset, n_out):
    X, y = list(), list()
    last_element_index = dataset.shape[1]-1
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(dataset)):
        # define the end of the input sequence
        in_end = in_start
        out_end = in_end + n_out
        
        # ensure we have enough data for this instance
        if out_end <= len(dataset):
            X.append(dataset[in_start, :])
            y.append(dataset[in_end:out_end, last_element_index])
        # move along one time step
        in_start += 1

    return np.array(X), np.array(y)

# Plotting definitions
space =  {
            'legend' : {'bgcolor':'#1A1A1C','font':{'color':'#D9D9D9',"size":12}},
            'paper_bgcolor' : '#1A1A1C',
            'plot_bgcolor' : '#1A1A1C',
            "title" : {"font":{"color":"#D9D9D9"},"x":0.5},
            'yaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'xaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'titlefont' : {'color':'#D9D9D9'}
        }


# Needed to display results in Colab at least
# From https://stackoverflow.com/questions/52859983/interactive-matplotlib-figures-in-google-colab

def configure_plotly_browser_state():
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))

def get_ceemdan(ticker, imf, connection):
  table_name = ticker + '_' + imf
  query = 'SELECT * from ' + table_name

  result = connection.execute(query)
  df = pd.DataFrame(result.fetchall())
  df.columns = result.keys()
  df.set_index('Date', inplace=True)

  return df

def create_directories(stock, imf_level, verbose=False):
  # Assumes that the cell bellow was properly executed

  base_path = '/mymodels'

  stock_dir = f'{base_path}/{stock}/'
  imf_dir = f'{base_path}/{stock}/{imf_level}/'
  checkpoint_dir = f'{base_path}/{stock}/{imf_level}/Checkpoints'

  directories = [stock_dir, imf_dir, checkpoint_dir]

  for path in directories:
    if not os.path.exists(path):
      os.mkdir(path)
      
      if verbose:
        print("Directory " , path ,  " created ")

    elif verbose:
      print("Directory " , path ,  " already exists")


def EWA(input_list):
  # Expects a list
  frame = pd.DataFrame(input_list)
  frame['ewma'] = frame.ewm(span=len(input_list), min_periods=len(input_list), adjust=False).mean()

  inter_list = list(frame['ewma'])

  return inter_list[-1]

def pandas_to_multi_step(dataset, days_ahead, window_size):
    # Expects a pandas dataframe

    index_list = list(dataset.index)

    X, y = list(), list()
    target_feature_index = 'Close'
    index_count = 0
    # step over the entire history one time step at a time
    for _ in range(len(index_list)):
      if index_count + window_size + days_ahead - 1 <= dataset.shape[0] - 1:   # the -1 is to not get the last day as X, since there would be no y available
          # define the end of the input sequence
          in_start = index_list[index_count]
          in_end = index_list[index_count + window_size - 1]

          out_start = index_list[index_count + window_size]
          out_end = index_list[index_count + window_size + days_ahead - 1]
          
          # ensure we have enough data for this item
          X.append(dataset.loc[in_start:in_end, :])
          y.append(dataset.loc[out_start:out_end, target_feature_index])

      elif index_count == 0:
        print(f'Not enough data for days_ahead={days_ahead}')
        return None, None

      # move along one time step
      index_count += 1

    return X, y

In [None]:
# Google Drive and CEEMDAN decompositions connections

date_range = '13_14_15'

drive.mount('/content/gdrive')
!ln -s /content/gdrive/Shareddrives/TCC/Implementação/Capitulo3_Treinamento/db/Close /mydbs
!ln -s /content/gdrive/Shareddrives/TCC/Implementação/Capitulo3_Treinamento/models/13_14_15 /mymodels
my_conn = create_engine(f"sqlite:////mydbs/{date_range}.db")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
ln: failed to create symbolic link '/mydbs/Close': File exists


In [None]:
# Getting the data
stocks_list = ['ABEV3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBSE3.SA', 'BRAP4.SA', 'BRFS3.SA', 'BRKM5.SA', 'BRML3.SA', 'BRPR3.SA', 'BRSR6.SA', 'AMER3.SA', 'CCRO3.SA', 'CESP6.SA', 'CIEL3.SA', 'CMIG4.SA', 'CPFE3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CSNA3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'EMBR3.SA', 'ENBR3.SA', 'EQTL3.SA', 'YDUQ3.SA', 'EVEN3.SA', 'EZTC3.SA', 'FIBR3.SA', 'GFSA3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HGTX3.SA', 'HYPE3.SA', 'IGTA3.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'COGN3.SA', 'LAME4.SA', 'LIGT3.SA', 'LREN3.SA', 'MDIA3.SA', 'MGLU3.SA', 'MILS3.SA', 'MMXM3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'ODPV3.SA', 'OIBR3.SA', 'PCAR3.SA', 'PDGR3.SA', 'PETR3.SA', 'POMO4.SA', 'PSSA3.SA', 'ENAT3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAPT4.SA', 'RENT3.SA', 'RSID3.SA', 'SANB3.SA', 'SBSP3.SA', 'SULA11.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'VLID3.SA', 'VVAR11.SA', 'WEGE3.SA' ]
#stocks_list = ['ABEV3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBSE3.SA', 'BRAP4.SA', 'BRFS3.SA', 'BRKM5.SA', 'BRML3.SA', 'BRPR3.SA', 'BRSR6.SA', 'AMER3.SA', 'CCRO3.SA', 'CESP6.SA', 'CIEL3.SA', 'CMIG4.SA', 'CPFE3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CSNA3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'EMBR3.SA', 'ENBR3.SA', 'EQTL3.SA', 'YDUQ3.SA', 'EVEN3.SA', 'EZTC3.SA', 'FIBR3.SA', 'GFSA3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HYPE3.SA', 'IGTA3.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'COGN3.SA', 'LAME4.SA', 'LIGT3.SA']
stocks_list = ['LREN3.SA', 'MDIA3.SA', 'MGLU3.SA', 'MILS3.SA', 'MMXM3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'ODPV3.SA', 'OIBR3.SA', 'PCAR3.SA', 'PDGR3.SA', 'PETR3.SA', 'POMO4.SA', 'PSSA3.SA', 'ENAT3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAPT4.SA', 'RENT3.SA', 'RSID3.SA', 'SANB3.SA', 'SBSP3.SA', 'SULA11.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'VLID3.SA', 'VVAR11.SA', 'WEGE3.SA']
imf_list = ['IMF1', 'IMF2', 'IMF3', 'IMF4', 'IMF5', 'IMF6', 'IMF7', 'IMF8', 'IMF9', 'IMF10']

target_feature_max_imf_level = {}
db_dataset = {}
for stock in stocks_list:
  stock = stock.replace('.SA', '')
  db_dataset[stock] = {}
  for imf_level in imf_list:
    try:
      db_dataset[stock][imf_level] = get_ceemdan(stock, imf_level, my_conn)

    except:
      print(f'No {imf_level} for {stock}')

      target_feature_max_imf_level[stock] = imf_level
      break

# Converting the data to numpy array
numpy_dataset = {}
for key in db_dataset.keys():
  numpy_dataset[key] = {}
  for imf in db_dataset[key].keys():
    numpy_dataset[key][imf] = db_dataset[key][imf].to_numpy()

No IMF8 for LREN3
No IMF8 for MDIA3
No IMF7 for MGLU3
No IMF7 for MILS3
No IMF7 for MMXM3
No IMF8 for MRFG3
No IMF7 for MRVE3
No IMF7 for MULT3
No IMF8 for ODPV3
No IMF7 for OIBR3
No IMF8 for PCAR3
No IMF7 for PDGR3
No IMF8 for PETR3
No IMF7 for POMO4
No IMF7 for PSSA3
No IMF7 for ENAT3
No IMF7 for QUAL3
No IMF8 for RADL3
No IMF7 for RAPT4
No IMF8 for RENT3
No IMF7 for RSID3
No IMF8 for SANB3
No IMF8 for SBSP3
No IMF8 for SULA11
No IMF8 for TAEE11
No IMF7 for TIMS3
No IMF8 for TOTS3
No IMF8 for UGPA3
No IMF7 for USIM5
No IMF7 for VALE3
No IMF8 for VIVT3
No IMF7 for VLID3
No IMF7 for VVAR11
No IMF7 for WEGE3


In [None]:
train_indexes = {}
val_indexes = {}

train_dataset = {}
val_dataset = {}

date_train_x = {}
date_train_y = {}

days_ahead = 223
max_window_size = 10              
windows_sizes_for_imf_level = {   # These are the number of previous days to predict days_ahead days
    'IMF1': 4,        
    'IMF2': 6,
    'IMF3': 8,
    'IMF4': 8,
    'IMF5': 8,
    'IMF6': 10,
    'IMF7': 10,
    'IMF8': 10,
    'Rsd': 8,
    'DEFAULT': 8
}

for stock in stocks_list:
  stock = stock.replace('.SA', '')
  train_indexes[stock] = {}
  val_indexes[stock] = {}

  train_dataset[stock] = {}
  val_dataset[stock] = {}

  date_train_x[stock] = {}
  date_train_y[stock] = {}

  for imf_level in db_dataset[stock].keys():
    train_indexes[stock][imf_level] = []
    val_indexes[stock][imf_level] = []

    index_list = list(db_dataset[stock][imf_level].index)
    for count, index in enumerate(index_list):
      if '2015' not in index:
        train_indexes[stock][imf_level].append(index)

    train_dataset[stock][imf_level] = db_dataset[stock][imf_level].loc[train_indexes[stock][imf_level][0]:train_indexes[stock][imf_level][-1], :]
    date_train_x[stock][imf_level], date_train_y[stock][imf_level]           = pandas_to_multi_step(train_dataset[stock][imf_level], days_ahead, windows_sizes_for_imf_level[imf_level])

    last_train_x_first_day = date_train_x[stock][imf_level][-1].index[0]      # Each train_x element has 10 days. This is the first day of the last element.

    val_indexes[stock][imf_level] = list(index_list[(index_list.index(last_train_x_first_day) + 1) :])

    val_dataset[stock][imf_level] = db_dataset[stock][imf_level].loc[val_indexes[stock][imf_level][0]:val_indexes[stock][imf_level][-1], :]

In [None]:
"""

a = len(train_indexes['ABEV3']['IMF1'])
b = len(val_indexes['ABEV3']['IMF1'])
c = len(db_dataset['ABEV3']['IMF1'])
c - a - b

"""

# it is exactly the days_ahead
# could it be that the maximum days_ahead is the number of days in the middle year?

"\n\na = len(train_indexes['ABEV3']['IMF1'])\nb = len(val_indexes['ABEV3']['IMF1'])\nc = len(db_dataset['ABEV3']['IMF1'])\nc - a - b\n\n"

In [None]:
# Pre processing

days_ahead = 223      # Maximum value for our use, in this current case (15-16-17)
batch_size = 1

train_x = {}
train_y = {}
validation_x = {}
validation_y = {}

date_train_x = {}
date_train_y = {}
date_validation_x = {}
date_validation_y = {}

max_window_size = 10              
windows_sizes_for_imf_level = {   # These are the number of previous days to predict days_ahead days
    'IMF1': 4,        
    'IMF2': 6,
    'IMF3': 8,
    'IMF4': 8,
    'IMF5': 8,
    'IMF6': 10,
    'IMF7': 10,
    'IMF8': 10,
    'Rsd': 8,
    'DEFAULT': 8
}
for stock in train_dataset.keys():
  train_x[stock] = {}
  train_y[stock] = {}
  validation_x[stock] = {}
  validation_y[stock] = {}

  date_train_x[stock] = {}
  date_train_y[stock] = {}
  date_validation_x[stock] = {}
  date_validation_y[stock] = {}


  for imf_level in train_dataset[stock].keys():
    if imf_level in windows_sizes_for_imf_level:
      window_size = windows_sizes_for_imf_level[imf_level]
    else:
      window_size = windows_sizes_for_imf_level['DEFAULT']

    date_train_x[stock][imf_level], date_train_y[stock][imf_level]           = pandas_to_multi_step(train_dataset[stock][imf_level], days_ahead, window_size)
    date_validation_x[stock][imf_level], date_validation_y[stock][imf_level] = pandas_to_multi_step(val_dataset[stock][imf_level], days_ahead, window_size)


    while '2015' in date_validation_x[stock][imf_level][-1].index[-1]:      # To remove 2017 from the x validation values
      date_validation_x[stock][imf_level].pop(-1)
      date_validation_y[stock][imf_level].pop(-1)


    train_x[stock][imf_level] = []
    train_y[stock][imf_level] = []

    validation_x[stock][imf_level] = []
    validation_y[stock][imf_level] = []

    for j in range(len(date_train_x[stock][imf_level])):
      # just to make it more readable in a single loop
      #numpy_x = np.expand_dims(date_train_x[stock][imf_level][j].to_numpy(), axis=0) it deals with this in the input apparently
      #numpy_y = np.expand_dims(date_train_y[stock][imf_level][j].to_numpy(), axis=0)    
      numpy_x = date_train_x[stock][imf_level][j]
      numpy_y = date_train_y[stock][imf_level][j]

      train_x[stock][imf_level].append(numpy_x.astype('float32'))   # it was float64, maybe this would help with training time
      train_y[stock][imf_level].append(numpy_y.astype('float32'))

    for j in range(len(date_validation_x[stock][imf_level])):
      # just to make it more readable in a single loop
      #numpy_x = np.expand_dims(date_validation_x[stock][imf_level][j].to_numpy(), axis=0) # since batch is 1, this should suffice
      #numpy_y = np.expand_dims(date_validation_y[stock][imf_level][j].to_numpy(), axis=0) #  but the expanded dimension should have size? equal to the batch size

      numpy_x = date_validation_x[stock][imf_level][j].to_numpy() # since batch is 1, this should suffice
      numpy_y = date_validation_y[stock][imf_level][j].to_numpy() #  but the expanded dimension should have size? equal to the batch size

      validation_x[stock][imf_level].append(numpy_x.astype('float32'))
      validation_y[stock][imf_level].append(numpy_y.astype('float32'))

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
element_index = 0

print(f'val_dataset = {val_dataset[stock][imf_level]["Close"]}')
print(f'val_x first = {date_validation_x[stock][imf_level][0]}')
print(f'val_x last = {date_validation_x[stock][imf_level][-1]}')
print(f'val_y first = {date_validation_y[stock][imf_level][0]}')
print(f'val_y last = {date_validation_y[stock][imf_level][-1]}')
print('')
print(f'train_dataset = {train_dataset[stock][imf_level]["Close"]}')
print(f'train_x first = {date_train_x[stock][imf_level][0]}')
print(f'train_x last = {date_train_x[stock][imf_level][-1]}')
print(f'train_y first = {date_train_y[stock][imf_level][0]}')
print(f'train_y last = {date_train_y[stock][imf_level][-1]}')

"""

'\n\nstock = \'ABEV3\'\nimf_level = \'IMF1\'\nelement_index = 0\n\nprint(f\'val_dataset = {val_dataset[stock][imf_level]["Close"]}\')\nprint(f\'val_x first = {date_validation_x[stock][imf_level][0]}\')\nprint(f\'val_x last = {date_validation_x[stock][imf_level][-1]}\')\nprint(f\'val_y first = {date_validation_y[stock][imf_level][0]}\')\nprint(f\'val_y last = {date_validation_y[stock][imf_level][-1]}\')\nprint(\'\')\nprint(f\'train_dataset = {train_dataset[stock][imf_level]["Close"]}\')\nprint(f\'train_x first = {date_train_x[stock][imf_level][0]}\')\nprint(f\'train_x last = {date_train_x[stock][imf_level][-1]}\')\nprint(f\'train_y first = {date_train_y[stock][imf_level][0]}\')\nprint(f\'train_y last = {date_train_y[stock][imf_level][-1]}\')\n\n'

In [None]:
# Simple load model from saved file

import os
from keras.models import load_model

models = {}
for stock in stocks_list:
  stock = stock.replace('.SA', '')
  models[stock] = {}

print(models)


dir = '/mymodels/'
dir = '/content/gdrive/Shareddrives/TCC/Implementação/Capitulo3_Treinamento/models/14_15_16/'
imfs = ['IMF1', 'IMF2', 'IMF3']

for stock in stocks_list:
  stock = stock.replace('.SA', '')
  for imf in imfs:
    for file_name in os.listdir(dir + stock + '/' + imf):
      if '.h5' in file_name:
        print(file_name)
        if 'model' in file_name:
          models[stock][imf] = load_model(dir + stock + '/' + imf + '/' + file_name)

#print(models)

# Populating the models dictionary
imfs_to_predict_with_neural = ['IMF1', 'IMF2', 'IMF3']
for stock in train_x.keys():
  for imf_level in train_x[stock].keys():
    if imf_level not in imfs_to_predict_with_neural:
      model = SplineModel()
      models[stock][imf_level] = model   

In [None]:
"""
 Ok, now comes the fun part
 What do you have at this point?

  db_database            - total IMFs decomposition database

  train/val datasets     - properly created according to dates
  train/val indexes      - list of dates in the datasets (aligned with the datasets)

  train/val_x/y          - actual numpy arrays (what goes into the models)
  date_train/val_x/y     - actual inputs/outputs dately aligned

  models                 - trained models

 Now.. what do you want?

 If at all possible, you'd like to assign a:

  real_train/val         - actual IMF value
  predicted_train/val    - predicted IMF value (after EWA and shit)
  x_axis_train/val       - could it be the date? like datetime, or string
                           (originally it's an integer)

  For each date in the whooole db_database

 AND, after that, you want the exact same thing, but after recomposing the IMFs

 BUT, before that, you need to deal with all the results you have (and create them)

  For each train/val_x/y element (let's call it model_element)

  element_index             - integer aligned with the train/val_x/y that generated this data
  real_train/val_x/y        - actual train/val_x/y numpy array
  predicted_train/val_x/y   - predicted IMF numpy array (same size as real_train/val)
  date_train/val_x/y        - date equivalent of the numpy arrays
 
 Then, I need to convert these to be indexed by date, not by model_element

"""

full_results = {}

for stock in models:
    full_results[stock] = {}

    # creating full results dicitionary

    for imf_level in models[stock]:
        full_results[stock][imf_level] = []
        total_train_elements = len(train_x[stock][imf_level])
        total_val_elements = len(validation_x[stock][imf_level])

        for element_index in range(total_train_elements + total_val_elements):# - 249): # 249 is the number of days in 2016, which appear twice here.
          full_results[stock][imf_level].append({                                    # Once at the end of the train, and another at the beginning of the validation
              # train x
              'real_train_x': [],
              #'predicted_train_x': [], Doesn't exist
              'date_train_x': [],

              # train y
              'real_train_y': [],
              'predicted_train_y': [],
              'date_train_y': [],

              # validation x
              'real_val_x': [],
              #'predicted_val_x': [], Doesn't exist
              'date_val_x': [],

              # validation y
              'real_val_y': [],
              'predicted_val_y': [],
              'date_val_y': []
          })

In [None]:
total_train_elements + total_val_elements

487

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
val = 0
train = -1

print(f'validation_x = {date_validation_x[stock][imf_level][val]}')
print('')
print(f'train_x = {date_train_x[stock][imf_level][val]}')
print('')
print(f'train_y = {date_train_y[stock][imf_level][val]}')

"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\nval = 0\ntrain = -1\n\nprint(f'validation_x = {date_validation_x[stock][imf_level][val]}')\nprint('')\nprint(f'train_x = {date_train_x[stock][imf_level][val]}')\nprint('')\nprint(f'train_y = {date_train_y[stock][imf_level][val]}')\n\n"

In [None]:
"""

model = models[stock]['IMF2']
model.reset_states()
print(model.name)

"""

"\n\nmodel = models[stock]['IMF2']\nmodel.reset_states()\nprint(model.name)\n\n"

In [None]:
# Populating the full_results
for stock in models:
    for imf_level in models[stock]:
      print(f'Predicting: [{stock}][{imf_level}]')
      model = models[stock][imf_level]

      if 'sequential' in model.name:
        model.reset_states()
      
      total_train_elements = len(train_x[stock][imf_level])
      total_val_elements = len(validation_x[stock][imf_level])

      print('Predicting train')
      for train_element_index in range(total_train_elements):
        cur_train_x       = np.array(train_x[stock][imf_level])
        cur_train_y       = np.array(train_y[stock][imf_level])
        cur_date_train_x  = date_train_x[stock][imf_level]
        cur_date_train_y  = date_train_y[stock][imf_level]

        x, y = np.expand_dims(cur_train_x[train_element_index], axis=0), cur_train_y[train_element_index]
        

        first_date_x = cur_date_train_x[train_element_index].index[0]
        #print(f'current_first_day x= {first_date_x}')


        if model.name == 'SplineModel': 
          #cur_train_x = np.array(cur_train_x)

          #x, y = cur_train_x[train_element_index], cur_train_y[train_element_index]

          days_ahead = len(y)

          yhat = model.predict(x, days_ahead)

        else:
          yhat = model.predict(x, verbose=0)



        element_index = train_element_index   # They start with the same element

        full_results[stock][imf_level][element_index]['real_train_x'] = x
        full_results[stock][imf_level][element_index]['date_train_x'] = cur_date_train_x[train_element_index]

        full_results[stock][imf_level][element_index]['real_train_y'] = y
        full_results[stock][imf_level][element_index]['predicted_train_y'] = yhat
        full_results[stock][imf_level][element_index]['date_train_y'] = cur_date_train_y[train_element_index]


      print('Predicting validation')

      # predicting validation
      for val_element_index in range(total_val_elements):
        cur_val_x         = np.array(validation_x[stock][imf_level])
        cur_val_y         = np.array(validation_y[stock][imf_level])
        cur_date_val_x    = date_validation_x[stock][imf_level]
        cur_date_val_y    = date_validation_y[stock][imf_level]

        x, y = np.expand_dims(cur_val_x[val_element_index], axis=0), cur_val_y[val_element_index]        # validation_y cannot be treated the same as train_y, they have different dimensions in order to train

        first_date_x = cur_date_val_x[val_element_index].index[0]
        #print(f'current_first_day x= {first_date_x}')

        if model.name == 'SplineModel': 
          days_ahead = len(y)
          yhat = model.predict(x, days_ahead)

        else:
          yhat = model.predict(x, verbose=0)

        # After the loop above, train_element_index is the last of its kind
        # So the next element should be train_element_index + 1
        # But val_element_index starts at 0
        # And after that, it's just val_element_index
        element_index = train_element_index + val_element_index + 1 

        full_results[stock][imf_level][element_index]['real_val_x'] = x
        full_results[stock][imf_level][element_index]['date_val_x'] = cur_date_val_x[val_element_index]

        full_results[stock][imf_level][element_index]['real_val_y'] = y
        full_results[stock][imf_level][element_index]['predicted_val_y'] = yhat
        full_results[stock][imf_level][element_index]['date_val_y'] = cur_date_val_y[val_element_index]              

In [None]:
#len(cur_val_y[val_element_index])

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
element_index = 0
cur_date_val_x[element_index]

"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\nelement_index = 0\ncur_date_val_x[element_index]\n\n"

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
element_index = -1

#for index in full_results[stock][imf_level][element_index]['date_train_x'].index:
#  print(index)

full_results[stock][imf_level][element_index].keys()
print(full_results[stock][imf_level][element_index]['date_val_y'])
#print(full_results[stock][imf_level][element_index]['date_train_y'])


"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\nelement_index = -1\n\n#for index in full_results[stock][imf_level][element_index]['date_train_x'].index:\n#  print(index)\n\nfull_results[stock][imf_level][element_index].keys()\nprint(full_results[stock][imf_level][element_index]['date_val_y'])\n#print(full_results[stock][imf_level][element_index]['date_train_y'])\n\n\n"

In [None]:
# Creating date_full_results

date_full_results = {}

date_indexes = {}

for stock in models:
  date_full_results[stock] = {}
  date_indexes[stock] = {}

  for imf_level in models[stock]:
    date_full_results[stock][imf_level] = []

    overlap_indexes = list(train_indexes[stock][imf_level]) # So as to alter the original train_indexes
    overlap_indexes.extend(val_indexes[stock][imf_level])   # Has duplicates

    date_indexes[stock][imf_level] = list(dict.fromkeys(overlap_indexes))                  # Cool pythonic way of remove duplicates

    total_dates = len(date_indexes[stock][imf_level])

    for count, date_index in enumerate(date_indexes[stock][imf_level]):
      date_full_results[stock][imf_level].append({
          'real_train': [],
          'predicted_train': [],

          'real_validation': [],
          'predicted_validation': [],
          
          'x_axis_train': [],
          'x_axis_validation': [],
      })

        # There will be days with both train and validation values
        # But the train values will be "y" values and validation values "x" values

      if date_index in train_indexes[stock][imf_level]:
        date_full_results[stock][imf_level][count]['x_axis_train'] = date_index

      if date_index in val_indexes[stock][imf_level]:
        date_full_results[stock][imf_level][count]['x_axis_validation'] = date_index

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
date_count = 400
date_full_results[stock][imf_level][date_count]

"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\ndate_count = 400\ndate_full_results[stock][imf_level][date_count]\n\n"

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
cur_date_count = 150
element_index = -1

cur_element = full_results[stock][imf_level][element_index]
cur_element['predicted_val_y']
val_indexes[stock][imf_level]

"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\ncur_date_count = 150\nelement_index = -1\n\ncur_element = full_results[stock][imf_level][element_index]\ncur_element['predicted_val_y']\nval_indexes[stock][imf_level]\n\n"

In [None]:
# Populating date_full_results with the predicted values

for stock in models:
  for imf_level in models[stock]:

    total_train_elements = len(train_x[stock][imf_level])
    total_val_elements = len(validation_x[stock][imf_level])

    cur_train_indexes = train_indexes[stock][imf_level]
    cur_val_indexes = val_indexes[stock][imf_level]

    cur_overlap_indexes = []
    for index in cur_train_indexes:
      if index in cur_val_indexes:
        cur_overlap_indexes.append(index)

    overlap_num = len(cur_overlap_indexes)

    # Train loop

    for train_element_index in range(total_train_elements):
      element_index = train_element_index
      cur_element = full_results[stock][imf_level][element_index]

      for cur_predicted_count, date_index in enumerate(cur_element['date_train_y'].index):
        cur_date_count = cur_train_indexes.index(date_index)

        if imf_level not in imfs_to_predict_with_neural:
          # The Splines predict only 5 days ahead
          try:
            date_full_results[stock][imf_level][cur_date_count]['predicted_train'] += [cur_element['predicted_train_y'][0][cur_predicted_count]]

          except:
            # print(f"Splines don't have all the values for date_train_y, since it predicts only the next 5 days")
            # This is a loop that goes through all the availables days in date_train_y
            # Since the Splines don't predict all {days_ahead} days, the continue here just ignores this instance, without changing the loop
            continue

        else:
          date_full_results[stock][imf_level][cur_date_count]['predicted_train'] += [cur_element['predicted_train_y'][0][cur_predicted_count]]  # The [0] is because the first shape must be 1 to be used in the training

        

    # Validation loop

    for val_element_index in range(total_val_elements):
      element_index = train_element_index + 1 + val_element_index
      cur_element = full_results[stock][imf_level][element_index]

      for cur_predicted_count, date_index in enumerate(cur_element['date_val_y'].index):
        cur_date_count = cur_val_indexes.index(date_index) + len(cur_train_indexes) - overlap_num

        if imf_level not in imfs_to_predict_with_neural:
          # The Splines predict only 5 days ahead
          try:
            date_full_results[stock][imf_level][cur_date_count]['predicted_validation'] += [cur_element['predicted_val_y'][0][cur_predicted_count]]

          except:  
            #print(f"Splines don't have all the values for date_train_y, since it predicts only the next 5 days")
            continue

        else:
          date_full_results[stock][imf_level][cur_date_count]['predicted_validation'] += [cur_element['predicted_val_y'][0][cur_predicted_count]]


    date_full_results[stock][imf_level][cur_date_count]['predicted_train'].reverse()      
    date_full_results[stock][imf_level][cur_date_count]['predicted_validation'].reverse()

# Populating date_full_results with the real values

for stock in models:
  for imf_level in models[stock]:
    cur_train_dataset = train_dataset[stock][imf_level]
    cur_val_dataset = val_dataset[stock][imf_level]

    for current_train_index in range(len(cur_train_dataset)):
      current_index = current_train_index
      date_full_results[stock][imf_level][current_index]['real_train'] = [cur_train_dataset.iloc[current_train_index]['Close']]
    
    for current_val_index in range(len(cur_val_dataset)):
      current_index = current_train_index + current_val_index + 1 - overlap_num
      date_full_results[stock][imf_level][current_index]['real_validation'] = [cur_val_dataset.iloc[current_val_index]['Close']]

In [None]:
"""

# Values seem too high
# I believe the Splines just doesn't care and maybe isn't too good to predicted 249 days (since it's a cubic polynomial)

stock = 'ABEV3'
imf_level = 'IMF1'
cur_date_count = 400

print(len(date_full_results[stock][imf_level][cur_date_count]['predicted_validation']))
print(len(date_full_results[stock][imf_level][cur_date_count]['predicted_train']))
date_full_results[stock][imf_level][cur_date_count]['predicted_validation']

"""

"\n\n# Values seem too high\n# I believe the Splines just doesn't care and maybe isn't too good to predicted 249 days (since it's a cubic polynomial)\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\ncur_date_count = 400\n\nprint(len(date_full_results[stock][imf_level][cur_date_count]['predicted_validation']))\nprint(len(date_full_results[stock][imf_level][cur_date_count]['predicted_train']))\ndate_full_results[stock][imf_level][cur_date_count]['predicted_validation']\n\n"

In [None]:
# Creating date_results

date_results = {}

for stock in models:
  date_results[stock] = {}

  for imf_level in models[stock]:
    date_results[stock][imf_level] = []

    total_train_dates = len(train_indexes[stock][imf_level])
    total_val_dates = len(val_indexes[stock][imf_level])

    for count_train, date_train_index in enumerate(train_indexes[stock][imf_level]):
      date_results[stock][imf_level].append({
          'real_train': [],
          'predicted_train': [],

          'real_validation': [],
          'predicted_validation': [],
          
          'x_axis_train': [],
          'x_axis_validation': [],
      })

      count = count_train

      date_results[stock][imf_level][count]['x_axis_train'] = date_train_index

    for count_val, date_val_index in enumerate(val_indexes[stock][imf_level]):
      date_results[stock][imf_level].append({
          'real_train': [],
          'predicted_train': [],

          'real_validation': [],
          'predicted_validation': [],
          
          'x_axis_train': [],
          'x_axis_validation': [],
      })

      count = count_train + count_val + 1

      date_results[stock][imf_level][count]['x_axis_validation'] = date_val_index

In [None]:
# Populating date_results

for stock in date_full_results:
  for imf_level in date_full_results[stock]:
    for day_count in range(len(date_full_results[stock][imf_level])):
      for key in date_full_results[stock][imf_level][day_count].keys():
        if 'predicted' not in key:    
          # Copying real values
          date_results[stock][imf_level][day_count][key] = [date_full_results[stock][imf_level][day_count][key]] 

        elif len(date_full_results[stock][imf_level][day_count][key]) >= 1:
          date_results[stock][imf_level][day_count][key] = [EWA(date_full_results[stock][imf_level][day_count][key])]

In [None]:
"""

stock = 'ABEV3'
imf_level = 'IMF1'
cur_date_count = -1

date_full_results[stock][imf_level][cur_date_count]

"""

"\n\nstock = 'ABEV3'\nimf_level = 'IMF1'\ncur_date_count = -1\n\ndate_full_results[stock][imf_level][cur_date_count]\n\n"

In [None]:
# Creating original results format

features_in_order = ['Open', 'High', 'Low', 'Volume', 'Adj Close']
target_feature = 'Adj Close'

results = {}

for stock in models:
    results[stock] = {}

    # initializing results dicitionary
    for feature in features_in_order:         
        if feature != target_feature:
          continue       

        results[stock][feature] = {}

        for imf_level in models[stock]:
            results[stock][feature][imf_level] = {
                'real_train': [],
                'predicted_train': [],
                'x_axis_train': [],
                'real_validation': [],
                'predicted_validation': [],
                'x_axis_validation': [],
                'real_test': [],
                'predicted_test': [],
                'x_axis_test': []
            }

In [None]:
# Copying from date_results and assigning numbers instead of date strings

for stock in results:
  for imf_level in results[stock][target_feature]:

    print(f'Train Loop')
    print(f'stock={stock}')
    print(f'imf={imf_level}')

    # Train loop

    for train_day in range(len(train_dataset[stock][imf_level])):
      results[stock][target_feature][imf_level]['real_train'] += date_results[stock][imf_level][train_day]['real_train'][0] 
      results[stock][target_feature][imf_level]['predicted_train'] += date_results[stock][imf_level][train_day]['predicted_train']
      results[stock][target_feature][imf_level]['x_axis_train'] += [train_day]
      #print(train_day)
    
    

    # I'm not worried with perfomance here, but with results. So yes, this piece of code again..
    cur_overlap_indexes = []
    for index in cur_train_indexes:
      if index in cur_val_indexes:
        cur_overlap_indexes.append(index)

    overlap_num = len(cur_overlap_indexes)

    # Validation loop

    for val_day in range(len(val_dataset[stock][imf_level])):
      day = train_day + val_day + 1 - overlap_num
      results[stock][target_feature][imf_level]['real_validation']      += date_results[stock][imf_level][day]['real_validation'][0] 
      results[stock][target_feature][imf_level]['predicted_validation'] += date_results[stock][imf_level][day]['predicted_validation']
      results[stock][target_feature][imf_level]['x_axis_validation'] += [day]

Train Loop
stock=LREN3
imf=IMF1
Train Loop
stock=LREN3
imf=IMF2
Train Loop
stock=LREN3
imf=IMF3
Train Loop
stock=LREN3
imf=IMF4
Train Loop
stock=LREN3
imf=IMF5
Train Loop
stock=LREN3
imf=IMF6
Train Loop
stock=LREN3
imf=IMF7
Train Loop
stock=MDIA3
imf=IMF1
Train Loop
stock=MDIA3
imf=IMF2
Train Loop
stock=MDIA3
imf=IMF3
Train Loop
stock=MDIA3
imf=IMF4
Train Loop
stock=MDIA3
imf=IMF5
Train Loop
stock=MDIA3
imf=IMF6
Train Loop
stock=MDIA3
imf=IMF7
Train Loop
stock=MGLU3
imf=IMF1
Train Loop
stock=MGLU3
imf=IMF2
Train Loop
stock=MGLU3
imf=IMF3
Train Loop
stock=MGLU3
imf=IMF4
Train Loop
stock=MGLU3
imf=IMF5
Train Loop
stock=MGLU3
imf=IMF6
Train Loop
stock=MILS3
imf=IMF1
Train Loop
stock=MILS3
imf=IMF2
Train Loop
stock=MILS3
imf=IMF3
Train Loop
stock=MILS3
imf=IMF4
Train Loop
stock=MILS3
imf=IMF5
Train Loop
stock=MILS3
imf=IMF6
Train Loop
stock=MMXM3
imf=IMF1
Train Loop
stock=MMXM3
imf=IMF2
Train Loop
stock=MMXM3
imf=IMF3
Train Loop
stock=MMXM3
imf=IMF4
Train Loop
stock=MMXM3
imf=IMF5
Train Lo

In [None]:
"""

stock = 'ABEV3'
target_feature = 'Close'
imf_level = 'IMF1'
day = 1

results[stock][target_feature][imf_level]['predicted_validation']
date_results[stock][imf_level][day]['predicted_validation']

"""

"\n\nstock = 'ABEV3'\ntarget_feature = 'Close'\nimf_level = 'IMF1'\nday = 1\n\nresults[stock][target_feature][imf_level]['predicted_validation']\ndate_results[stock][imf_level][day]['predicted_validation']\n\n"

In [None]:
# organizing imf prediction results, concatenating train, validation
concatenated_results = {}

for stock in results:
    concatenated_results[stock] = {}
    for feature in results[stock]:
        concatenated_results[stock][feature] = {}
        for imf_level in results[stock][feature]:
            df_result = pd.DataFrame.from_dict(results[stock][feature][imf_level], orient='index').T
            df_train = df_result[['real_train','predicted_train','x_axis_train']].set_index('x_axis_train').dropna(axis=0)
            df_train.index.name = 'x'
            df_validation = df_result[['real_validation','predicted_validation','x_axis_validation']].set_index('x_axis_validation').dropna(axis=0)
            df_validation.index.name = 'x'
            #df_test = df_result[['real_test','predicted_test','x_axis_test']].set_index('x_axis_test').dropna(axis=0)
            #df_test.index.name = 'x'

            #df_concatenated = pd.concat([df_train,df_validation,df_test], axis=1)
            df_concatenated = pd.concat([df_train,df_validation], axis=1)

            concatenated_results[stock][feature][imf_level] = df_concatenated

In [None]:
# Rebuilding scalers dictionary (could save them in a db too)
# https://stackoverflow.com/questions/41993565/save-minmaxscaler-model-in-sklearn

# Webscrapping
features_in_order = ['Open', 'High', 'Low', 'Volume', 'Adj Close'] # target feature must be the last one here
target_feature = 'Adj Close'

#stocks_list = ['ABEV3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBSE3.SA', 'BRAP4.SA', 'BRFS3.SA', 'BRKM5.SA', 'BRML3.SA', 'BRPR3.SA', 'BRSR6.SA', 'AMER3.SA', 'CCRO3.SA', 'CESP6.SA', 'CIEL3.SA', 'CMIG4.SA', 'CPFE3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CSNA3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'EMBR3.SA', 'ENBR3.SA', 'EQTL3.SA', 'YDUQ3.SA', 'EVEN3.SA', 'EZTC3.SA', 'FIBR3.SA', 'GFSA3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HGTX3.SA', 'HYPE3.SA', 'IGTA3.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'COGN3.SA', 'LAME4.SA', 'LIGT3.SA', 'LREN3.SA', 'MDIA3.SA', 'MGLU3.SA', 'MILS3.SA', 'MMXM3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'ODPV3.SA', 'OIBR3.SA', 'PCAR3.SA', 'PDGR3.SA', 'PETR3.SA', 'POMO4.SA', 'PSSA3.SA', 'ENAT3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAPT4.SA', 'RENT3.SA', 'RSID3.SA', 'SANB3.SA', 'SBSP3.SA', 'SULA11.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'VLID3.SA', 'VVAR11.SA', 'WEGE3.SA' ]

def get_stock_infos(stock, start_date, end_date):
  if '.SA' not in stock:
    stock = stock + '.SA'
  yf.pdr_override()
  data = pdr.get_data_yahoo(stock, start_date, end_date).dropna()[features_in_order]
  return data

start_datetime = datetime(year=2015, month=1, day=1)
end_datetime = datetime(year=2017, month=12, day=31)


stocks = {}
count = 0
for stock in stocks_list:
  stocks[stock.replace('.SA', '')] = get_stock_infos(stock, start_datetime, end_datetime)
  stocks_list[count] = stock.replace('.SA', '')
  count += 1 


ceemdan = CEEMDAN()   # They add noise in the paper.. should you add it too?
decomposed_data = {}
decomposed_stock_features_series = {}
scalers = {}
for stock in stocks_list:
  if stock == 'HGTX3':
    continue

  # loop das acoes
  print(f'{stock} Decompondo...')
  stock_dataframe = stocks[stock]
  decomposed_stock_features_series[stock] = {}
  scalers[stock] = {}
  for column in stock_dataframe.columns:
    # loop das features
    scaler = MinMaxScaler()
    decomposed_stock_features_series[stock][column] = {}
    #series = stock_dataframe[column].values                                          # (247,) 1D array
    series = stock_dataframe[column].values.reshape(-1,1)                             # (247, 1) 2D array
    scaler.fit(series)                                                                # Compute the minimum and maximum to be used for later scaling.
    scalers[stock][column] = scaler                                                   # MinMaxScaler(copy=True, feature_range=(0, 1))
    stock_feature_time_series = np.frombuffer(scaler.transform(series))               # scaler.transform(series) is the 2D normalized series and frombuffer makes it 1D
    stock_feature_time_series_imfs = ceemdan(stock_feature_time_series, max_imf=10)   # (5, 247) 247 is the number of values and 5 the number of imfs (which varies depending on the series)
    for i, imf_series in enumerate(stock_feature_time_series_imfs):
      # loop das IMF
      if i < len(stock_feature_time_series_imfs):                                     # If (5, 247), len is 247
        decomposed_stock_features_series[stock][column][f'IMF{i+1}'] = imf_series
      else:
        decomposed_stock_features_series[stock][column][f'Rsd'] = imf_series  

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [None]:
# recomposing prediction by arithmetically adding the IMF curves

final_prediction_results = {}
max_window_size = 15

for ticker in concatenated_results:
    if ticker == 'HGTX3':
      continue

    final_prediction_results[ticker] = {}
    for feature in concatenated_results[ticker]:
        addition_train = None
        addition_validation = None

        addition_real_train = None
        addition_real_validation = None

        # recomposing predictions
        for imf_level in concatenated_results[ticker][feature]:
            # adding train
            can_sum = True
            if addition_train is None:
                addition_train = concatenated_results[ticker][feature][imf_level]['predicted_train'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['predicted_train'].values
                cur_length = addition_train.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_train = addition_train[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_train = np.add(addition_train,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding validation
            can_sum = True
            if addition_validation is None:
                addition_validation = concatenated_results[ticker][feature][imf_level]['predicted_validation'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['predicted_validation'].values
                cur_length = addition_validation.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_validation = addition_validation[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_validation = np.add(addition_validation,np_array_to_be_added)

        # recomposing real
        for imf_level in concatenated_results[ticker][feature]:
            # adding train
            can_sum = True
            if addition_real_train is None:
                addition_real_train = concatenated_results[ticker][feature][imf_level]['real_train'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['real_train'].values
                cur_length = addition_real_train.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_real_train = addition_real_train[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_real_train = np.add(addition_real_train,np_array_to_be_added)

        for imf_level in concatenated_results[ticker][feature]:
            # adding validation
            can_sum = True
            if addition_real_validation is None:
                addition_real_validation = concatenated_results[ticker][feature][imf_level]['real_validation'].values
            else:
                np_array_to_be_added = concatenated_results[ticker][feature][imf_level]['real_validation'].values
                cur_length = addition_real_validation.shape[0]
                next_np_array_length = np_array_to_be_added.shape[0]
                if cur_length < next_np_array_length:
                    if next_np_array_length-cur_length < max_window_size:
                        np_array_to_be_added = np_array_to_be_added[next_np_array_length-cur_length:]
                    else:
                        can_sum = False
                else: 
                    if cur_length-next_np_array_length < max_window_size:
                        addition_real_validation = addition_real_validation[cur_length-next_np_array_length:]
                    else:
                        can_sum = False
                
                if can_sum:
                    addition_real_validation = np.add(addition_real_validation,np_array_to_be_added)
        
        if feature == 'Close':
            scaler_feature = 'Adj Close'

        else:
            scaler_feature = feature

        scaler = scalers[ticker][scaler_feature]

        final_prediction_results[ticker][feature] = {
            'train_predicted': scaler.inverse_transform(addition_train.reshape(-1,1)).reshape(-1),
            'validation_predicted': scaler.inverse_transform(addition_validation.reshape(-1,1)).reshape(-1),
            'train_real': scaler.inverse_transform(addition_real_train.reshape(-1,1)).reshape(-1),
            'validation_real': scaler.inverse_transform(addition_real_validation.reshape(-1,1)).reshape(-1),
        }

In [None]:
"""
 # plotting final result

plot_ticker = 'HYPE3'
plot_feature = 'Close'

configure_plotly_browser_state()
pd.DataFrame.from_dict(final_prediction_results[plot_ticker][plot_feature]).iplot(title=f'{plot_ticker} {plot_feature}', layout=space)
#print(final_prediction_results[plot_ticker][plot_feature])
"""


"\n # plotting final result\n\nplot_ticker = 'HYPE3'\nplot_feature = 'Close'\n\nconfigure_plotly_browser_state()\npd.DataFrame.from_dict(final_prediction_results[plot_ticker][plot_feature]).iplot(title=f'{plot_ticker} {plot_feature}', layout=space)\n#print(final_prediction_results[plot_ticker][plot_feature])\n"

In [None]:
# calculating accuracy metrics

adj_close_accuracies = {}
accuracies_detailed = {}

for ticker in final_prediction_results:
    adj_close_accuracies[ticker] = {}
    accuracies_detailed[ticker] = {}
    for feature in final_prediction_results[ticker]:

        y_train = final_prediction_results[ticker][feature]['train_predicted'][~np.isnan(final_prediction_results[ticker][feature]['train_predicted'])]
        yhat_train = final_prediction_results[ticker][feature]['train_real'][~np.isnan(final_prediction_results[ticker][feature]['train_real'])]

        y_validation = final_prediction_results[ticker][feature]['validation_predicted'][~np.isnan(final_prediction_results[ticker][feature]['validation_predicted'])]
        yhat_validation = final_prediction_results[ticker][feature]['validation_real'][~np.isnan(final_prediction_results[ticker][feature]['validation_real'])]


        accuracies_detailed[ticker][feature] = {
            'mse':{
                'train':mean_squared_error(y_train,yhat_train),
                'validation':mean_squared_error(y_validation,yhat_validation),
            },
            'mape':{
                'train':np.mean(np.abs((y_train - yhat_train) / y_train)) * 100,
                'validation':np.mean(np.abs((y_validation - yhat_validation) / y_validation)) * 100,
            }
        }

        if feature == 'Adj Close':
            adj_close_accuracies[ticker] = {
                'mse': mean_squared_error(y_validation,yhat_validation),
                'mape': np.mean(np.abs((y_validation - yhat_validation) / y_validation)) * 100
            }

# pd.DataFrame.from_dict(accuracies_detailed[plot_ticker][plot_feature])
df_close_accuracies = pd.DataFrame.from_dict(adj_close_accuracies).T
df_close_accuracies

Unnamed: 0,mse,mape
LREN3,0.814664,6.729241
MDIA3,2.214096,6.676713
MGLU3,0.004267,336.882054
MILS3,0.030374,5.599466
MMXM3,0.003226,1.309905
MRFG3,0.034747,4.018801
MRVE3,0.138622,7.311065
MULT3,0.789002,6.177706
ODPV3,0.277341,5.629112
OIBR3,0.003622,6.520355


In [None]:
# Write file from acuracy
df_close_accuracies.to_csv('14_15_16.csv')

In [None]:
# Write the data from dictionary
import pickle

a_file = open("14_15_16.pkl", "wb")
pickle.dump(final_prediction_results, a_file)
a_file.close()

In [None]:
a_file = open("14_15_16.pkl", "rb")
output = pickle.load(a_file)
print(output)

{'LREN3': {'Adj Close': {'train_predicted': array([10.74160901, 10.78148453, 10.66344595, 10.5365606 , 10.37435393,
       10.34275361, 10.38386071, 10.41997131, 10.4144975 , 10.44567873,
       10.4275036 , 10.52076562, 10.49344847, 10.39417128, 10.42892819,
       10.37660691, 10.33314124, 10.29055507, 10.22894535, 10.3384106 ,
       10.30453655, 10.34116528, 10.33221216, 10.28599417, 10.18181066,
       10.18407069, 10.21358124, 10.24397516, 10.29740756, 10.38364559,
       10.40420521, 10.39919956, 10.47319315, 10.46641146, 10.42790573,
       10.46676165, 10.48906775, 10.50001758, 10.48416963, 10.36460569,
       10.27510697, 10.22367768, 10.22279401, 10.24148072, 10.27505495,
       10.26431428, 10.33480665, 10.40112801, 10.39012132, 10.42000551,
       10.40121136, 10.40839593, 10.43010636, 10.39785893, 10.42822651,
       10.41688433, 10.44529127, 10.40767882, 10.34300111, 10.36340156,
       10.34936894, 10.39936763, 10.4334569 , 10.43394678, 10.42116052,
       10.43630832, 