<a href="https://colab.research.google.com/github/italomarcelogit/keras-regressor/blob/main/keras_regressor_Previs%C3%A3o_de_Vendas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **TENSORFLOW - keras**
Exemplo de previsão de vendas utilizando features de tempo e valor total de vendas agrupados mensalmente

In [1]:
# bibliotecas
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.graph_objects as go
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)

2.6.0


# **Dataset**

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/italomarcelogit/keras-regressor/main/dados.csv', sep=';')
df.head()

Unnamed: 0,ds,y,y1,y2,y3,y4,y5,y6
0,2000-01-01,750.0,100.0,100.0,150.0,150.0,130.0,120.0
1,2000-02-01,750.0,100.0,100.0,150.0,150.0,130.0,120.0
2,2000-03-01,1000.0,145.0,145.0,200.0,200.0,150.0,160.0
3,2000-04-01,1000.0,145.0,145.0,200.0,200.0,150.0,160.0
4,2000-05-01,1000.0,145.0,145.0,200.0,200.0,150.0,160.0


In [3]:
# função para gerar gráfico scatter, utilizando plotly
def scatter(data, x, y, txt_titulo='', txt_xaxis='', txt_yaxis=''):
  # fig = go.Figure(data=go.Scatter(x=df[x], y=df[y], mode='lines'))
  fig = go.Figure()
  for f in x:
    for l in y:
      fig.add_trace(go.Scatter(x=data[f], y=data[l],
                      mode='lines+markers',
                      name=l))
  fig.update_layout(title=txt_titulo,
                    xaxis_title=txt_xaxis,
                    yaxis_title=txt_yaxis,
                    template='plotly_white',
                    xaxis = dict(tickangle=80)
                    )
  fig.show()

# exibir valores das vendas realizadas
scatter(df, x=['ds'], y=['y'], txt_titulo='Vendas Realizadas - História',
        txt_xaxis='Período', txt_yaxis='Total de Vendas')

# **PRé-PROCESSAMENTO**

In [4]:
df2 = df[['ds', 'y']].copy()
df2.head()

Unnamed: 0,ds,y
0,2000-01-01,750.0
1,2000-02-01,750.0
2,2000-03-01,1000.0
3,2000-04-01,1000.0
4,2000-05-01,1000.0


In [5]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264 entries, 0 to 263
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ds      264 non-null    object 
 1   y       264 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.2+ KB


In [6]:
df2['diaAno'] = df.ds.apply(lambda x: pd.to_datetime([x]).dayofyear[0])
df2['ano'] = df.ds.apply(lambda x: pd.to_datetime([x]).year[0])
df2.head()

Unnamed: 0,ds,y,diaAno,ano
0,2000-01-01,750.0,1,2000
1,2000-02-01,750.0,32,2000
2,2000-03-01,1000.0,61,2000
3,2000-04-01,1000.0,92,2000
4,2000-05-01,1000.0,122,2000


In [7]:
qtdna = df2.isna().sum().sum()
if qtdna:
  print(f'Existem {qtdna} valores nulos. Excluindo valores NA')
  df2 = df2.dropna()
  print(df2.isna().sum().sum())
else:
  print(f'Existem {qtdna} valores nulos')

Existem 0 valores nulos


In [8]:
# utilizar somente valores com o ano menor de 2021
# dataset = df2[df2.ano < 2021][['diaAno', 'ano', 'y']].copy()
dataset = df2[['diaAno', 'ano', 'y']].copy()

In [9]:
dataset.head()

Unnamed: 0,diaAno,ano,y
0,1,2000,750.0
1,32,2000,750.0
2,61,2000,1000.0
3,92,2000,1000.0
4,122,2000,1000.0


In [10]:
# FEATURES: dados de treino e teste
# dftreino = dataset.sample(frac=0.8, random_state=0)
# dfteste = dataset.drop(dftreino.index)
dftreino = dataset[dataset.ano<2021]
dfteste = dataset[dataset.ano>2020]
dataset.shape, dftreino.shape, dfteste.shape

((264, 3), (252, 3), (12, 3))

In [11]:
# FEATURES: visão estatística
dfstats = dftreino.describe()
dfstats.pop('y')
dfstats = dfstats.transpose()
dfstats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
diaAno,252.0,167.738095,105.290732,1.0,83.5,167.5,252.25,336.0
ano,252.0,2010.0,6.067351,2000.0,2005.0,2010.0,2015.0,2020.0


In [12]:
# LABELS: 
labels_treino = dftreino.pop('y')
labels_teste = dfteste.pop('y')
labels_treino.shape, labels_teste.shape

((252,), (12,))

In [13]:
# NORMALIZAR OS DADOS, para diminuir a escala de valores entre as features
dftreino.head(2)

Unnamed: 0,diaAno,ano
0,1,2000
1,32,2000


In [14]:
def normaliza(dataframe):
  return (dataframe - dfstats['mean']) / dfstats['std']

In [15]:
dftreino_normalizado = normaliza(dftreino)
dftreino_normalizado.head(2)

Unnamed: 0,diaAno,ano
0,-1.583597,-1.648166
1,-1.289174,-1.648166


In [16]:
# precisamos fazer a mesma coisa nos dados de teste
dfteste_normalizado = normaliza(dfteste)

# **Modelo keras**

**Criar e examinar o modelo**

In [17]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

def criarModelo():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(dftreino.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.01)
  # outras opções
  # optimizer = tf.keras.optimizers.Adam(0.2)
  # optimizer = tf.keras.optimizers.Adamax(0.1)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [18]:
# criar modelo
modelo = criarModelo()

In [19]:
# examinar modelo
modelo.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                192       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 4,417
Trainable params: 4,417
Non-trainable params: 0
_________________________________________________________________


**Treinar o modelo**

In [20]:
# classe para callback
class pontoEpoch(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print(f'')
    print('.', end='')

parada = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

In [21]:
EPOCHS = 1000
print('Treinamento iniciado')
treinamento = modelo.fit(dftreino_normalizado, labels_treino, epochs=EPOCHS,
                         validation_split = 0.2, verbose=0, 
                         callbacks=[parada, pontoEpoch()])
print('\nTreinamento finalizado')

Treinamento iniciado

....................................................................................................
....................................................................................................
....................................................................................................
.......................
Treinamento finalizado


In [22]:
historico = pd.DataFrame(treinamento.history)

In [23]:
hist = pd.DataFrame()
hist['epoch'] = historico.index.to_list()
hist['epoch'] = hist['epoch'].apply(lambda x: x+1)
hist['mae'] = historico['mae']
hist['val_mae'] = historico['val_mae']
scatter(hist, x=['epoch'], y=['mae', 'val_mae'], txt_titulo='Análise de erros',
        txt_xaxis='Epochs', txt_yaxis='MAE')

In [24]:
loss, mae, mse = modelo.evaluate(dfteste_normalizado, labels_teste, verbose=2)
print("Erro médio absoluto - dataset de teste: {:5.2f} y".format(mae))

1/1 - 0s - loss: 10812.9473 - mae: 84.6683 - mse: 10812.9473
Erro médio absoluto - dataset de teste: 84.67 y


**Prever valores**

In [25]:
label_previsao = modelo.predict(dfteste_normalizado).flatten()

In [26]:
vdc = []
for x in dfteste.index:
  a = dfteste.loc[x]['ano']
  da = dfteste.loc[x]['diaAno']
  dc = datetime.strptime(f'{a}' + "-" + f'{da}', "%Y-%j").strftime("%d-%m-%Y")
  vdc.append(dc)

In [27]:
dfprev = pd.DataFrame()
dfprev['Realizado'] = labels_teste.to_list()
dfprev['Previsão'] = label_previsao.tolist()
dfprev['Período'] = vdc

In [28]:
scatter(dfprev, ['Período'], ['Realizado', 'Previsão'], 
        txt_titulo='Análise de Comparação - Realizado x Previsão',
        txt_xaxis='Período', txt_yaxis='Total de Vendas')