<a href="https://colab.research.google.com/github/italomarcelogit/python.free/blob/master/Facebook_Prophet_x_Outros_MLs_Sales_Sazonalidade_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Libraries**

In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.plot import plot_components_plotly, plot_plotly
import plotly.graph_objects as go

# **Data**

**Gerando dados de venda onde:**


*   janeiro e fevereiro, o acumulado de venda é de 750 (k, m, etc)
*   junho, o acumulado de venda é de 2000 (k, m, etc)
*   dezembro, o acumulado de venda é de 1500 (k, m, etc)
*   o restante dos meses, o acumulado é de 1000 (k, m, etc)
*   De dois em dois anos ocorre evento esportivo, e isso faz a venda do primeiro semestre ter um crescimento de 10% (vvt)



In [2]:
vendas = pd.DataFrame()
for y in range(2000, 2020):
  for m in range(1,13):
    vvt = 0
    if m < 3:     # jan e fev 
      i = 750
    elif m == 6:  # jun 
      i = 2000
    elif m == 12: # dez 
      i = 1500
    else:         # restante
      i = 1000
    if y % 2 == 0 and m < 7:
      i += i*0.1
      vvt = 1
    s = {'ds': f'{y}-{m}-28', 'y': i, 'vvt': vvt}
    vendas = vendas.append(s, ignore_index=True)

In [3]:
vendas.y.sum()

266500.0

In [4]:
vendas.tail()

Unnamed: 0,ds,vvt,y
235,2019-8-28,0.0,1000.0
236,2019-9-28,0.0,1000.0
237,2019-10-28,0.0,1000.0
238,2019-11-28,0.0,1000.0
239,2019-12-28,0.0,1500.0


In [5]:
database = vendas.copy()
database['ds'] = vendas.ds.astype('datetime64[ns]')
# database['y'] = vendas.y

In [6]:
database.to_csv('database-fict.csv')

In [7]:
goTemplates = ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]

fig = go.Figure()
fig.add_trace(go.Scatter(x=database.ds, y=database.y,
                    mode='lines+markers',
                    name='total vendas'))

fig.update_layout(template=goTemplates[1], title="Histórico de Vendas")

# **Predictions - fbProphet**

**Instantiating a new Prophet object**

In [8]:
m = Prophet()


In [9]:
m.fit(database)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f926cb4a9e8>

In [10]:
f = m.make_future_dataframe(periods=24, freq='M')

In [11]:
p = m.predict(f)

In [12]:
# baseTeste = pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
# baseTeste['y'] = database.y
# baseTeste.tail(12)
pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]).tail(12)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
252,2020-12-31,1623.159636,1570.672068,1677.574146
253,2021-01-31,715.520838,662.968281,772.452423
254,2021-02-28,759.298976,710.56012,807.347117
255,2021-03-31,1141.228622,1091.191971,1191.082836
256,2021-04-30,1071.786381,1023.410134,1123.444534
257,2021-05-31,1038.412171,988.327684,1088.507833
258,2021-06-30,2249.542345,2200.50849,2301.94482
259,2021-07-31,838.858769,789.794477,889.930687
260,2021-08-31,871.716258,824.96012,927.647307
261,2021-09-30,962.764239,912.648874,1014.624645


In [13]:
gp=plot_plotly(m, p)
gp.update_layout(template=goTemplates[1], title="Histórico de Vendas")

In [14]:
gp=plot_components_plotly(m, p)
gp.update_layout(template=goTemplates[1], title="Detalhes do Histórico de Vendas")

# **Prediction Scikit-Learn**

In [15]:
# ML Algorithms sklearn
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.naive_bayes import GaussianNB
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

In [16]:
regressors = [
        DecisionTreeRegressor(),
        RandomForestRegressor(),
        SVR(),
        LinearRegression(),
        GradientBoostingRegressor(),
        DummyRegressor(),
        LogisticRegression(),
        GaussianNB()
    ]

In [17]:
vendasK = vendas.copy()
vendasK['mes'] = pd.DatetimeIndex(vendasK.ds).month
vendasK['ano'] = pd.DatetimeIndex(vendasK.ds).year

In [18]:
X = vendasK[['mes', 'ano', 'vvt']]
y = vendasK.y
Xtreino, Xteste, ytreino, yteste = train_test_split(X, y, test_size=0.3, random_state=123)

In [19]:
reg = []
mae = []
sco = []
rmse = []
for regressor in regressors:
    modelo = RandomForestRegressor()
    modelo.fit(Xtreino, np.array(ytreino))
    sco.append(modelo.score(Xtreino, ytreino))
    previsao = modelo.predict(Xteste)
    mae.append(round(mean_absolute_error(yteste, previsao), 2))
    rmse.append(round(mean_squared_error(yteste, previsao, squared=False), 2))
    reg.append(regressor)

In [20]:
meuMae = pd.DataFrame(columns=['Regressor', 'mae', 'rmse', 'score'])
meuMae['Regressor'] = reg
meuMae['mae'] = mae
meuMae['rmse'] = rmse
meuMae['score'] = sco

In [21]:
meuMae = meuMae.sort_values(by='rmse', ascending=True)
meuMae

Unnamed: 0,Regressor,mae,rmse,score
0,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.75,2.57,0.999959
2,"SVR(C=1.0, cache_size=200, coef0=0.0, degree=3...",0.83,2.58,0.999959
5,"DummyRegressor(constant=None, quantile=None, s...",0.97,2.84,0.999959
7,"GaussianNB(priors=None, var_smoothing=1e-09)",1.39,4.83,0.999911
4,"GradientBoostingRegressor(alpha=0.9, ccp_alpha...",1.39,6.01,0.999856
6,"LogisticRegression(C=1.0, class_weight=None, d...",1.72,6.24,0.999767
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",2.08,7.55,0.999802
1,"RandomForestRegressor(bootstrap=True, ccp_alph...",2.22,8.98,0.999609


In [22]:
f'Best Regressor: {meuMae["Regressor"].values[0]}'

"Best Regressor: DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n                      max_features=None, max_leaf_nodes=None,\n                      min_impurity_decrease=0.0, min_impurity_split=None,\n                      min_samples_leaf=1, min_samples_split=2,\n                      min_weight_fraction_leaf=0.0, presort='deprecated',\n                      random_state=None, splitter='best')"

In [31]:
model = RandomForestRegressor() # OR meuMae["Regressor"].values[1]

In [32]:
model.fit(X, y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [33]:
pVendas = vendasK.copy()
rMes = range(1, 13)
rAno = range(2020, 2022)
for year in rAno:
  for m in rMes:
    vvt = 0
    if year % 2 == 0 and m < 7:
      i += i*0.1
      vvt = 1
    valFeatures = [m, year, vvt]
    predict = float(model.predict([valFeatures]))
    pVendas = pVendas.append({'mes': m, 'ano': year, 'y': predict, 'vvt': vvt}, ignore_index=True)


In [34]:
pVendas['ds'] = pVendas.apply(lambda x: f'{int(x.ano)}-{int(x.mes)}-28', axis=1)

In [35]:
pVendas['ds'] = pVendas.ds.astype('datetime64[ns]')

In [36]:
pVendas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264 entries, 0 to 263
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      264 non-null    datetime64[ns]
 1   vvt     264 non-null    float64       
 2   y       264 non-null    float64       
 3   mes     264 non-null    float64       
 4   ano     264 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 10.4 KB


In [37]:
x = pVendas.ds
real = pVendas.y[pVendas.ano < 2020]
pred = pVendas[pVendas.ano >= 2020]

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=real,
                    mode='lines+markers',
                    name='Realizado'))

fig.add_trace(go.Scatter(x=pred.ds, y=pred.y,
                    mode='lines+markers',
                    name='Previsao'))
fig.update_layout(template=goTemplates[1],  title="Historico + Previsao de Vendas")

fig.show()

In [30]:
pVendas.tail(48)

Unnamed: 0,ds,vvt,y,mes,ano
216,2018-01-28,1.0,825.0,1.0,2018.0
217,2018-02-28,1.0,825.0,2.0,2018.0
218,2018-03-28,1.0,1100.0,3.0,2018.0
219,2018-04-28,1.0,1100.0,4.0,2018.0
220,2018-05-28,1.0,1100.0,5.0,2018.0
221,2018-06-28,1.0,2200.0,6.0,2018.0
222,2018-07-28,0.0,1000.0,7.0,2018.0
223,2018-08-28,0.0,1000.0,8.0,2018.0
224,2018-09-28,0.0,1000.0,9.0,2018.0
225,2018-10-28,0.0,1000.0,10.0,2018.0
