<a href="https://colab.research.google.com/github/claubermartins/Stock-market-prediction/blob/main/stock_market_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install inflection

Collecting inflection
  Downloading https://files.pythonhosted.org/packages/59/91/aa6bde563e0085a02a435aa99b49ef75b0a4b062635e606dab23ce18d720/inflection-0.5.1-py2.py3-none-any.whl
Installing collected packages: inflection
Successfully installed inflection-0.5.1


#**1-Importando bibliotecas**

In [2]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping 
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import tensorflow as tf
import math
import seaborn as sns
import datetime
import inflection
from IPython.core.display import HTML

#**2-Funções auxiliares para o Jupyter Notebook**

In [3]:
def jupyter_settings():
    %matplotlib inline
    %pylab inline
    
    plt.style.use( 'bmh' )
    plt.rcParams['figure.figsize'] = [25, 12]
    plt.rcParams['font.size'] = 24
    
    display( HTML( '<style>.container { width:100% !important; }</style>') )
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option( 'display.expand_frame_repr', False )
    
    sns.set()

In [4]:
jupyter_settings()

Populating the interactive namespace from numpy and matplotlib


#**2.1Método de importação de arquivos do Google Colab**

In [5]:
from google.colab import files
upload = files.upload()
print("downloaded files: ")
print(*upload, sep = "\n")

Saving PETR4-teste.csv to PETR4-teste.csv
Saving PETR4-treinamento.csv to PETR4-treinamento.csv
downloaded files: 
PETR4-teste.csv
PETR4-treinamento.csv


#**3-Tratamento dos dados**

##**3.1-Importando os dados**

In [7]:
base = pd.read_csv('PETR4-treinamento.csv')
base = base.drop('Date', axis = 1)

In [9]:
#valores estatísticos
base.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,5030.0,5030.0,5030.0,5030.0,5030.0,5030.0
mean,17.891079,18.151007,17.607981,17.861287,14.436626,623868800.0
std,9.609917,9.761328,9.434506,9.571652,7.927764,4182380000.0
min,4.2,4.27,4.12,4.2,2.934877,0.0
25%,9.19,9.375312,8.963438,9.1925,7.050538,19862120.0
50%,17.464999,17.799999,17.15,17.450001,14.825717,30119600.0
75%,24.0775,24.39875,23.67,24.015,19.731367,47010400.0
max,67.5,67.5,51.950001,52.509998,39.535816,73564160000.0


In [10]:
base.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume       float64
dtype: object

##**3.2-Tratando os Valores Faltantes**

In [11]:
#valores faltantes
base.isna().sum()

Open         3
High         3
Low          3
Close        3
Adj Close    3
Volume       3
dtype: int64

In [13]:
#encontrando onde os valores faltantes estão
base.loc[pd.isnull(base['Open'])]
base.loc[pd.isnull(base['High'])]
base.loc[pd.isnull(base['Low'])]
base.loc[pd.isnull(base['Close'])]
base.loc[pd.isnull(base['Adj Close'])]
base.loc[pd.isnull(base['Volume'])]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
1260,,,,,,
4568,,,,,,
4827,,,,,,


In [14]:
#preenchendo os valores nulos com o valor médio
imputer = SimpleImputer(missing_values=np.nan, strategy='mean',verbose=0)
imputer = imputer.fit(base.iloc[:,0:6])
base.iloc[:,0:6] = imputer.transform(base.iloc[:,0:6])

In [15]:
#verificando novamente se ainda temos valores faltantes
base.loc[pd.isnull(base['Open'])]
base.loc[pd.isnull(base['High'])]
base.loc[pd.isnull(base['Low'])]
base.loc[pd.isnull(base['Close'])]
base.loc[pd.isnull(base['Adj Close'])]
base.loc[pd.isnull(base['Volume'])]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


#**3.3-Tratamento dos valores nulos (iguais a 0)**

In [16]:
#valores nulos
(base == 0).sum()

Open           0
High           0
Low            0
Close          0
Adj Close      0
Volume       146
dtype: int64

In [17]:
#Encontrando os valores nulos
base.loc[base['Volume'] == 0]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
15,5.163,5.163,5.163,5.163,3.46349,0.0
44,6.038,6.038,6.038,6.038,4.050466,0.0
45,6.038,6.038,6.038,6.038,4.050466,0.0
49,6.213,6.213,6.213,6.213,4.167862,0.0
73,5.9,5.9,5.9,5.9,3.957891,0.0
78,5.1,5.1,5.1,5.1,3.421229,0.0
84,5.376,5.376,5.376,5.376,3.606377,0.0
110,5.79,5.79,5.79,5.79,3.8841,0.0
122,6.438,6.438,6.438,6.438,4.318798,0.0
177,7.1125,7.1125,7.1125,7.1125,4.771273,0.0


In [18]:
#preenchendo os valores nulos com o valor médio
imputer = SimpleImputer(missing_values=0, strategy='mean',verbose=0)
imputer = imputer.fit(base.iloc[:,0:6])
base.iloc[:,0:6] = imputer.transform(base.iloc[:,0:6])

In [19]:
#Encontrando os valores nulos
base.loc[base['Volume'] == 0]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


In [17]:
#base = base.dropna()

In [18]:
#base.sample()

#**-Descrição dos dados** IGNORE ESTA CÉLULA

In [None]:
#Tendencia central - mean, median
ct1 = pd.DataFrame(base.apply(np.mean)).T #T transpondo 
ct2 = pd.DataFrame(base.apply(np.median)).T
#Dispersão - std(desvio padrão), min, max, range
d1 = pd.DataFrame(base.apply(np.std)).T
d2 = pd.DataFrame(base.apply(min)).T
d3 = pd.DataFrame(base.apply(max)).T
d4 = pd.DataFrame(base.apply(lambda x: x.max() - x.min())).T
#concatenate
m = pd.concat([d2,d3,d4,ct1,ct2,d1]).T.reset_index()
m.columns = ['attributes','min','max','range', 'mean', 'median', 'std']
m

Unnamed: 0,attributes,min,max,range,mean,median,std
0,Open,4.2,37.43,33.23,19.33303,19.09,6.833255
1,High,4.27,37.5,33.23,19.5972,19.37,6.867448
2,Low,4.12,37.07,32.95,19.04492,18.78,6.786986
3,Close,4.2,37.5,33.3,19.30418,19.1,6.824516
4,Adj Close,3.896351,30.54497,26.64862,17.05664,16.70628,5.745576
5,Volume,0.0,698950600.0,698950600.0,41154020.0,35086900.0,30580530.0


In [None]:
base.shape[0]

2478

#**4-Treinamento da rede**

In [20]:
#normalizando a base de dados com o MinMaxScaler
base_treinamento = base.iloc[:, 0:6].values
normalizador = MinMaxScaler(feature_range=(0,1))
base_treinamento_normalizada = normalizador.fit_transform(base_treinamento)

In [21]:
#Comparando a predição com o preço real
previsores = []
preco_real = []
for i in range(90, 4785):
    previsores.append(base_treinamento_normalizada[i-90:i, 0:6])
    preco_real.append(base_treinamento_normalizada[i, 0])
previsores, preco_real = np.array(previsores), np.array(preco_real)


In [22]:
#estrutura da rede neural 
regressor = Sequential()
regressor.add(LSTM(units = 100, return_sequences = True, input_shape = (previsores.shape[1], 6)))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.3))

regressor.add(Dense(units = 1, activation = 'sigmoid'))

regressor.compile(optimizer = 'RMSprop', loss = 'mean_squared_error',
                  metrics = ['mean_absolute_error'])

es = EarlyStopping(monitor = 'loss', min_delta = 1e-15, patience = 20, verbose = 1)
#opt = tf.keras.optimizers.Adam(amsgrad=True)
#regressor.compile(optimizer = opt, loss = 'mean_squared_error',
#                  metrics = ['mean_absolute_error'])
#opt = tf.keras.optimizers.RMSprop(centered=True)
#regressor.compile(optimizer = opt, loss = 'mean_squared_error',
#                 metrics = ['mean_absolute_error'])

regressor.fit(previsores, preco_real, epochs = 600, batch_size = 32,
              callbacks = [es])

Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch 71/600
Epoch 72/600
Epoch 73/600
Epoch 74/600
Epoch 75/600
Epoch 76/600
Epoch 77/600
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f9562aebad0>

#**5-Teste da rede**

##**5.1-Importando os dados para teste**

In [23]:
#criando a base de dados teste
base_teste = pd.read_csv('PETR4-teste.csv')
base_teste = base_teste.drop('Date', axis = 1)

In [24]:
#valores estatísticos
base_teste.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,22.0,22.0,22.0,22.0,22.0,22.0
mean,29.765454,29.995455,29.448636,29.727273,29.725069,42766950.0
std,0.7975,0.726693,0.810428,0.776624,0.776566,14891640.0
min,28.280001,28.639999,27.67,28.030001,28.027924,25397500.0
25%,29.2075,29.715001,28.9,29.2925,29.290328,32574120.0
50%,29.765,29.935001,29.545,29.75,29.747795,37336600.0
75%,30.42,30.575001,30.189999,30.3825,30.380248,47976480.0
max,30.879999,31.24,30.469999,30.809999,30.807716,81844000.0


##**5.2-Tratando os Valores Faltantes**

In [25]:
#valores faltantes
base_teste.isna().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [26]:
#encontrando onde os valores faltantes estão
base_teste.loc[pd.isnull(base_teste['Open'])]
base_teste.loc[pd.isnull(base_teste['High'])]
base_teste.loc[pd.isnull(base_teste['Low'])]
base_teste.loc[pd.isnull(base_teste['Close'])]
base_teste.loc[pd.isnull(base_teste['Adj Close'])]
base_teste.loc[pd.isnull(base_teste['Volume'])]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


In [27]:
#preenchendo os valores faltantes com o valor médio
imputer = SimpleImputer(missing_values=np.nan, strategy='mean',verbose=0)
imputer = imputer.fit(base_teste.iloc[:,0:6])
base_teste.iloc[:,0:6] = imputer.transform(base_teste.iloc[:,0:6])

In [29]:
#verificando novamente se ainda temos valores faltantes
base_teste.loc[pd.isnull(base_teste['Open'])]
base_teste.loc[pd.isnull(base_teste['High'])]
base_teste.loc[pd.isnull(base_teste['Low'])]
base_teste.loc[pd.isnull(base_teste['Close'])]
base_teste.loc[pd.isnull(base_teste['Adj Close'])]
base_teste.loc[pd.isnull(base_teste['Volume'])]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


#**5.3-Tratamento dos valores nulos (iguais a 0)**

In [27]:
#valores nulos
(base_teste == 0).sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [31]:
#Encontrando os valores nulos
base_teste.loc[base_teste['Volume'] == 0]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


In [220]:
#preenchendo os valores nulos com o valor médio
imputer = SimpleImputer(missing_values=0, strategy='mean',verbose=0)
imputer = imputer.fit(base_teste.iloc[:,0:6])
base_teste.iloc[:,0:6] = imputer.transform(base_teste.iloc[:,0:6])

In [221]:
#Verificando novamente se tem valores nulos
base_teste.loc[base_teste['Volume'] == 0]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


##**5.4-Preparando a estrutura para teste**

In [28]:
#concatenando a base de dados teste com o base de treinamento
preco_real_teste = base_teste.iloc[:, 3:4].values
frames = [base, base_teste]
base_completa = pd.concat(frames)

In [29]:
#pegando os 90 registros anteriores para percorrer a base teste e colocando no
#no formato np.array
entradas = base_completa[len(base_completa) - len(base_teste) - 90:].values
entradas = normalizador.transform(entradas)
Percorrer_teste = []
for i in range(90, 112): 
    Percorrer_teste.append(entradas[i-90:i, 0:6])
Percorrer_teste = np.array(Percorrer_teste)

In [30]:
#resultado da predição no formato MinMaxScaler
previsoes = regressor.predict(Percorrer_teste)

In [31]:
#convertendo para a escala MinMaxScaler 
normalizador_previsao = MinMaxScaler(feature_range=(0,1))
normalizador_previsao.fit_transform(base_treinamento[:,3:4])

array([[0.02794453],
       [0.02678535],
       [0.02639205],
       ...,
       [0.55288762],
       [0.54481476],
       [0.53777688]])

In [32]:
#visualisando a previsão com no formato original
previsoes = normalizador_previsao.inverse_transform(previsoes)

##**6-Análise dos resuldatos**

In [None]:
#Visualizando o gráfico da Previsão da rede
plt.plot(preco_real_teste, color = 'red', label = 'Preço real')
plt.plot(previsoes, color = 'blue', label = 'Previsões')
plt.title('Previsão do preço das ações')
plt.xlabel('Tempo em dias')
plt.ylabel('Preço das ações')
plt.legend()
plt.show()

In [None]:
previsoes.mean()

In [None]:
preco_real_teste.mean()