In [None]:
# Dependencias
!pip install plotly
!pip install cufflinks
!pip install chart_studio
!pip install ipywidgets
!pip install yfinance
!pip install EMD-signal==1.0.0
!pip install sklearn
!pip install keras
!pip install tensorflow

In [2]:
# imports e definições

from PyEMD import CEEMDAN

import os
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
from datetime import timedelta, datetime

%matplotlib inline
from scipy.interpolate import CubicSpline

import cufflinks as cf
import chart_studio.plotly as plotly
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=True, world_readable=False)

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error 

from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense, LSTM, LeakyReLU, CuDNNLSTM, Activation
from keras.activations import tanh
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, CSVLogger

import yfinance as yf

from google.colab import drive
from sqlalchemy import create_engine

import IPython


class SplineModel():
    def __init__(self):#,time_series_generator):
        self.name = "SplineModel"
        #self.gen = time_series_generator
    
    def predict(self, x_window, days_ahead):   # x_window is the actual x 
        window_size = x_window.shape[1]        # x_window.shape = (1,window_size,5)
        if window_size >= days_ahead:
          # Original SplineModel

          result = []
          x_window = np.squeeze(x_window, axis=0)
          last_element_index = x_window.shape[1]-1
          series = x_window[:,last_element_index].reshape(-1)
          cs = CubicSpline(np.arange(len(series)), series)
          for i in range(days_ahead):
            next_value = cs(len(series) + i + 1)
            result += [next_value]

          return np.array(result).reshape(1,-1)
        
        else:
          # Predicts x_window, then repeats these values to populate days_ahead values
          window_size = 5

          result = []
          prediction = []
          x_window = np.squeeze(x_window, axis=0)
          last_element_index = x_window.shape[1]-1
          series = x_window[:,last_element_index].reshape(-1)
          cs = CubicSpline(np.arange(len(series)), series)
          for i in range(window_size):
            next_value = cs(len(series) + i + 1)
            prediction += [next_value]

          while len(result) < days_ahead:
            for j in range(len(prediction)):
              result.append(prediction[j])

              if len(result) == days_ahead:
                break
              
            break # just predicting the next 5, so as to not overpopulate the results

          return np.array(result).reshape(1,-1)


# convert history into inputs and outputs
def to_multi_step(dataset, n_out):
    X, y = list(), list()
    last_element_index = dataset.shape[1]-1
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(dataset)):
        # define the end of the input sequence
        in_end = in_start
        out_end = in_end + n_out
        
        # ensure we have enough data for this instance
        if out_end <= len(dataset):
            X.append(dataset[in_start, :])
            y.append(dataset[in_end:out_end, last_element_index])
        # move along one time step
        in_start += 1

    return np.array(X), np.array(y)

# Plotting definitions
space =  {
            'legend' : {'bgcolor':'#1A1A1C','font':{'color':'#D9D9D9',"size":12}},
            'paper_bgcolor' : '#1A1A1C',
            'plot_bgcolor' : '#1A1A1C',
            "title" : {"font":{"color":"#D9D9D9"},"x":0.5},
            'yaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'xaxis' : {
                'tickfont' : {'color':'#C2C2C2', "size":12},
                'gridcolor' : '#434343',
                'titlefont' : {'color':'#D9D9D9'},
                'zerolinecolor' : '#666570',
                'showgrid' : True
            },
            'titlefont' : {'color':'#D9D9D9'}
        }


# Needed to display results in Colab at least
# From https://stackoverflow.com/questions/52859983/interactive-matplotlib-figures-in-google-colab

def configure_plotly_browser_state():
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))

def get_ceemdan(ticker, imf, connection):
  table_name = ticker + '_' + imf
  query = 'SELECT * from ' + table_name

  result = connection.execute(query)
  df = pd.DataFrame(result.fetchall())
  df.columns = result.keys()
  df.set_index('Date', inplace=True)

  return df

def create_directories(stock, imf_level, verbose=False):
  # Assumes that the cell bellow was properly executed

  base_path = '/mymodels'

  stock_dir = f'{base_path}/{stock}/'
  imf_dir = f'{base_path}/{stock}/{imf_level}/'
  checkpoint_dir = f'{base_path}/{stock}/{imf_level}/Checkpoints'

  directories = [stock_dir, imf_dir, checkpoint_dir]

  for path in directories:
    if not os.path.exists(path):
      os.mkdir(path)
      
      if verbose:
        print("Directory " , path ,  " created ")

    elif verbose:
      print("Directory " , path ,  " already exists")


def EWA(input_list):
  # Expects a list
  frame = pd.DataFrame(input_list)
  frame['ewma'] = frame.ewm(span=len(input_list), min_periods=len(input_list), adjust=False).mean()

  inter_list = list(frame['ewma'])

  return inter_list[-1]

def pandas_to_multi_step(dataset, days_ahead, window_size):
    # Expects a pandas dataframe

    index_list = list(dataset.index)

    X, y = list(), list()
    target_feature_index = 'Close'
    index_count = 0
    # step over the entire history one time step at a time
    for _ in range(len(index_list)):
      if index_count + window_size + days_ahead - 1 <= dataset.shape[0] - 1:   # the -1 is to not get the last day as X, since there would be no y available
          # define the end of the input sequence
          in_start = index_list[index_count]
          in_end = index_list[index_count + window_size - 1]

          out_start = index_list[index_count + window_size]
          out_end = index_list[index_count + window_size + days_ahead - 1]
          
          # ensure we have enough data for this item
          X.append(dataset.loc[in_start:in_end, :])
          y.append(dataset.loc[out_start:out_end, target_feature_index])

      elif index_count == 0:
        print(f'Not enough data for days_ahead={days_ahead}')
        return None, None

      # move along one time step
      index_count += 1

    return X, y

In [3]:
import pickle
a_file = open("Last_LIGT3_14_15_16.pkl", "rb")
first_output = pickle.load(a_file)

b_file = open("Last_14_15_16.pkl", "rb")
second_output = pickle.load(b_file)

In [4]:
print(first_output)

{'ABEV3': {'Adj Close': {'train_predicted': array([12.49719402, 12.19201984, 12.18274944, 12.26031864, 12.16436095,
       12.18396522, 12.16088253, 12.23738228, 12.17154654, 12.269811  ,
       12.4319587 , 12.31098185, 12.24374454, 12.2234919 , 12.29210946,
       12.31736268, 12.4184431 , 12.45109116, 12.36794885, 12.57351808,
       12.57234675, 12.59856081, 12.53642476, 12.51854449, 12.61948903,
       12.24184478, 12.39862203, 12.499712  , 12.38397375, 12.17184539,
       12.32543511, 12.57589028, 12.63401151, 12.57372938, 12.45341529,
       12.42757589, 12.22164641, 12.36486288, 12.37893264, 12.32688063,
       12.33848203, 12.39458014, 12.55324538, 12.61593273, 12.48248897,
       12.34325519, 12.4616261 , 12.48211574, 12.46503914, 12.28602422,
       12.33076314, 12.38135718, 12.28293354, 12.42568573, 12.59234108,
       12.51450452, 12.21895253, 12.24124369, 12.3709052 , 12.5533703 ,
       12.45929802, 12.50333021, 12.41316613, 12.44237227, 12.61172712,
       12.51377181, 

In [5]:
stocks_list = ['ABEV3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBSE3.SA', 'BRAP4.SA', 'BRFS3.SA', 'BRKM5.SA', 'BRML3.SA', 'BRPR3.SA', 'BRSR6.SA', 'AMER3.SA', 'CCRO3.SA', 'CESP6.SA', 'CIEL3.SA', 'CMIG4.SA', 'CPFE3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CSNA3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'EMBR3.SA', 'ENBR3.SA', 'EQTL3.SA', 'YDUQ3.SA', 'EVEN3.SA', 'EZTC3.SA', 'FIBR3.SA', 'GFSA3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HYPE3.SA', 'IGTA3.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'COGN3.SA', 'LAME4.SA', 'LIGT3.SA', 'LREN3.SA', 'MDIA3.SA', 'MGLU3.SA', 'MILS3.SA', 'MMXM3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'ODPV3.SA', 'OIBR3.SA', 'PCAR3.SA', 'PDGR3.SA', 'PETR3.SA', 'POMO4.SA', 'PSSA3.SA', 'ENAT3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAPT4.SA', 'RENT3.SA', 'RSID3.SA', 'SANB3.SA', 'SBSP3.SA', 'SULA11.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'VLID3.SA', 'VVAR11.SA', 'WEGE3.SA' ]
a_stocks = ['ABEV3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBSE3.SA', 'BRAP4.SA', 'BRFS3.SA', 'BRKM5.SA', 'BRML3.SA', 'BRPR3.SA', 'BRSR6.SA', 'AMER3.SA', 'CCRO3.SA', 'CESP6.SA', 'CIEL3.SA', 'CMIG4.SA', 'CPFE3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CSNA3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'EMBR3.SA', 'ENBR3.SA', 'EQTL3.SA', 'YDUQ3.SA', 'EVEN3.SA', 'EZTC3.SA', 'FIBR3.SA', 'GFSA3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HYPE3.SA', 'IGTA3.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'COGN3.SA', 'LAME4.SA', 'LIGT3.SA']
b_stocks = ['LREN3.SA', 'MDIA3.SA', 'MGLU3.SA', 'MILS3.SA', 'MMXM3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'ODPV3.SA', 'OIBR3.SA', 'PCAR3.SA', 'PDGR3.SA', 'PETR3.SA', 'POMO4.SA', 'PSSA3.SA', 'ENAT3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAPT4.SA', 'RENT3.SA', 'RSID3.SA', 'SANB3.SA', 'SBSP3.SA', 'SULA11.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'VLID3.SA', 'VVAR11.SA', 'WEGE3.SA' ]

In [6]:
for stock in a_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(first_output[stock]['Adj Close']['validation_real'])
  start_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 2)
  start_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 2)
  end_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 2)
  end_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 2)
  print(f'{stock} (REAL): INICIO {start_real} - FIM {end_real}')
  print(f'{stock} (PREVISTO): INICIO {start_predicted} - FIM {end_predicted}')
  print('\n')

for stock in b_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(second_output[stock]['Adj Close']['validation_real'])
  start_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 2)
  start_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 2)
  end_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 2)
  end_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 2)
  print(f'{stock} (REAL): INICIO {start_real} - FIM {end_real}')
  print(f'{stock} (PREVISTO): INICIO {start_predicted} - FIM {end_predicted}')
  print('\n')


ABEV3 (REAL): INICIO 12.54 - FIM 12.41
ABEV3 (PREVISTO): INICIO 12.35 - FIM 12.48


BBAS3 (REAL): INICIO 10.74 - FIM 8.25
BBAS3 (PREVISTO): INICIO 9.53 - FIM 9.66


BBDC3 (REAL): INICIO 7.94 - FIM 7.5
BBDC3 (PREVISTO): INICIO 7.52 - FIM 7.35


BBSE3 (REAL): INICIO 14.26 - FIM 13.5
BBSE3 (PREVISTO): INICIO 13.44 - FIM 13.14


BRAP4 (REAL): INICIO 1.77 - FIM 2.21
BRAP4 (PREVISTO): INICIO 2.13 - FIM 2.0


BRFS3 (REAL): INICIO 40.75 - FIM 37.34
BRFS3 (PREVISTO): INICIO 35.66 - FIM 41.92


BRKM5 (REAL): INICIO 6.07 - FIM 6.9
BRKM5 (PREVISTO): INICIO 7.5 - FIM 7.48


BRML3 (REAL): INICIO 6.36 - FIM 6.01
BRML3 (PREVISTO): INICIO 6.26 - FIM 6.23


BRPR3 (REAL): INICIO 6.59 - FIM 6.69
BRPR3 (PREVISTO): INICIO 6.73 - FIM 6.69


BRSR6 (REAL): INICIO 2.85 - FIM 2.74
BRSR6 (PREVISTO): INICIO 2.94 - FIM 3.09


AMER3 (REAL): INICIO 8.48 - FIM 8.52
AMER3 (PREVISTO): INICIO 8.63 - FIM 8.77


CCRO3 (REAL): INICIO 8.56 - FIM 8.41
CCRO3 (PREVISTO): INICIO 8.44 - FIM 8.45


CESP6 (REAL): INICIO 8.45 - FIM 

In [7]:
work = 0
not_work = 0
gone_up = 0
gone_down = 0

for stock in a_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(first_output[stock]['Adj Close']['validation_real'])
  start_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 4)
  start_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 4)
  end_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 4)
  end_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 4)

  if(end_real > start_real):
    gone_up += 1

  if(end_real < start_real):
    gone_down += 1

  if(end_real > start_real and end_predicted > start_real):
    work += 1
    
  if(end_real < start_real and end_predicted < start_real):
    not_work += 1

for stock in b_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(second_output[stock]['Adj Close']['validation_real'])
  start_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 2)
  start_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 2)
  end_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 2)
  end_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 2)

  if(end_real > start_real):
    gone_up += 1

  if(end_real < start_real):
    gone_down += 1

  if(end_real > start_real and end_predicted > start_real):
    work += 1
    
  if(end_real < start_real and end_predicted < start_real):
    not_work += 1

print(f'gone_up: {gone_up}')
print(f'gone_down: {gone_down}')
print(f'work_up: {work}')
print(f'work_down: {not_work}')

gone_up: 27
gone_down: 50
work_up: 21
work_down: 40


In [8]:
import pandas as pd
stock_list = []
start_real_list = []
end_real_list = []
end_predicted_list = []
end_diff_list = []

for stock in a_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(first_output[stock]['Adj Close']['validation_real'])
  start_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 4)
  start_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 4)
  end_real = round(first_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 4)
  end_predicted = round(first_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 4)
  end_diff = round(abs(((end_predicted - end_real) / end_real) * 100), 2)
  stock_list.append(stock)
  start_real_list.append(start_real)
  end_real_list.append(end_real)
  end_predicted_list.append(end_predicted)
  end_diff_list.append(end_diff)

for stock in b_stocks: 
  stock = stock.replace('.SA', "")
  stock_len = len(second_output[stock]['Adj Close']['validation_real'])
  start_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 -252], 4)
  start_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 -252], 4)
  end_real = round(second_output[stock]['Adj Close']['validation_real'][stock_len -1 ], 4)
  end_predicted = round(second_output[stock]['Adj Close']['validation_predicted'][stock_len -1 ], 4)
  end_diff = round(abs(((end_predicted - end_real) / end_real) * 100), 2)
  stock_list.append(stock)
  start_real_list.append(start_real)
  end_real_list.append(end_real)
  end_predicted_list.append(end_predicted)
  end_diff_list.append(end_diff)



data = {'stock': stock_list, 
        '01-01-2017': start_real_list, 
        '30-12-2017': end_real_list, 
        'predicted_end': end_predicted_list, 
        '%difference': end_diff_list }
df = pd.DataFrame(data)
df.to_csv('final_results.csv')
df.to_excel('final_results.xlsx')
print(df)

     stock  01-01-2017  30-12-2017  predicted_end  %difference
0    ABEV3     12.5361     12.4079        12.4806         0.59
1    BBAS3     10.7439      8.2466         9.6555        17.08
2    BBDC3      7.9448      7.4956         7.3486         1.96
3    BBSE3     14.2624     13.5028        13.1404         2.68
4    BRAP4      1.7708      2.2066         2.0028         9.24
..     ...         ...         ...            ...          ...
72   VALE3      5.4616      6.1463         6.2203         1.20
73   VIVT3     18.5323     18.5386        19.4072         4.69
74   VLID3     11.9517     13.4664        11.9566        11.21
75  VVAR11      3.1974      2.8568         2.9571         3.51
76   WEGE3      4.6164      4.5807         4.4766         2.27

[77 rows x 5 columns]


In [9]:

plot_feature = 'Adj Close'

for stock in a_stocks:
  stock = stock.replace('.SA', "")
  print(f'GRÁFICO {stock}')
  configure_plotly_browser_state()
  pd.DataFrame.from_dict(first_output[stock][plot_feature]).iplot(title=f'{stock} {plot_feature}', layout=space)
  print('\n')

for stock in b_stocks:
  stock = stock.replace('.SA', "")
  print(f'GRÁFICO {stock}')
  configure_plotly_browser_state()
  pd.DataFrame.from_dict(second_output[stock][plot_feature]).iplot(title=f'{stock} {plot_feature}', layout=space)
  print('\n')



Output hidden; open in https://colab.research.google.com to view.