<a href="https://colab.research.google.com/github/italomarcelogit/python.free/blob/master/Facebook_Prophet_x_Outros_MLs_Sales_Sazonalidade_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Libraries**

In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.plot import plot_components_plotly, plot_plotly
import plotly.graph_objects as go

# **Data**

**Gerando dados de venda onde:**


*   de janeiro e fevereiro, o acumulado de venda é de 750 (k, m, etc)
*   junho, o acumulado de venda é de 2000 (k, m, etc)
*   dezembro, o acumulado de venda é de 900 (k, m, etc)
*   o restante dos meses, o acumulado é de 1000 (k, m, etc)



In [2]:
vendas = pd.DataFrame()
for y in range(2000, 2020):
  for m in range(1,13):
    if m < 3: # jan e fev == 750
      i = 750
    elif m == 6: # jun == 2000
      i = 2000
    elif m == 12: # dez == 900
      i = 1500
    else:
      i = 1000
    s = {'ds': f'{y}-{m}-28', 'y': i}
    vendas = vendas.append(s, ignore_index=True)

In [3]:
vendas.y.sum()

260000.0

In [4]:
vendas.head()

Unnamed: 0,ds,y
0,2000-1-28,750.0
1,2000-2-28,750.0
2,2000-3-28,1000.0
3,2000-4-28,1000.0
4,2000-5-28,1000.0


In [5]:
database = pd.DataFrame()
database['ds'] = vendas.ds.astype('datetime64[ns]')
database['y'] = vendas.y

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=database.ds, y=database.y,
                    mode='lines+markers',
                    name='total vendas'))

fig.update_layout(
    title="Histórico de Vendas")

# **Predictions - fbProphet**

**Instantiating a new Prophet object**

In [7]:
m = Prophet()


In [8]:
m.fit(database)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f36a33f7668>

In [9]:
f = m.make_future_dataframe(periods=13, freq='M')

In [10]:
p = m.predict(f)

In [11]:
# baseTeste = pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
# baseTeste['y'] = database.y
# baseTeste.tail(12)
pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]).tail(12)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
241,2020-01-31,757.777222,756.989064,758.500646
242,2020-02-29,751.655879,750.868096,752.417973
243,2020-03-31,1008.547703,1007.769166,1009.333843
244,2020-04-30,1007.781829,1007.014212,1008.587607
245,2020-05-31,1020.574079,1019.801631,1021.423308
246,2020-06-30,1982.248588,1981.388695,1983.104378
247,2020-07-31,1009.917261,1009.015041,1010.835627
248,2020-08-31,998.944565,997.991804,999.907133
249,2020-09-30,995.553454,994.564287,996.547843
250,2020-10-31,994.110369,993.011358,995.152457


In [12]:
gp=plot_plotly(m, p)
gp.update_layout(
    title="Histórico de Vendas")

In [13]:
gp=plot_components_plotly(m, p)
gp.update_layout(
    title="Detalhes do Histórico de Vendas")

# **Prediction Scikit-Learn**

In [14]:
# ML Algorithms sklearn
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.naive_bayes import GaussianNB
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [15]:
regressors = [
        DecisionTreeRegressor(),
        RandomForestRegressor(),
        SVR(),
        LinearRegression(),
        GradientBoostingRegressor(),
        DummyRegressor(),
        LogisticRegression(),
        GaussianNB()
    ]

In [16]:
vendasK = vendas.copy()
vendasK['mes'] = pd.DatetimeIndex(vendasK.ds).month
vendasK['ano'] = pd.DatetimeIndex(vendasK.ds).year

In [17]:
X = vendasK[['mes', 'ano']]
y = vendasK.y
Xtreino, Xteste, ytreino, yteste = train_test_split(X, y, test_size=0.3, random_state=123)

In [30]:
reg = []
mae = []
sco = []
for regressor in regressors:
    modelo = RandomForestRegressor()
    modelo.fit(Xtreino, np.array(ytreino))
    sco.append(modelo.score(Xtreino, ytreino))
    previsao = modelo.predict(Xteste)
    mae.append(round(mean_absolute_error(yteste, previsao), 2))
    reg.append(regressor)

In [31]:
meuMae = pd.DataFrame(columns=['Regressor', 'mae', 'score'])
meuMae['Regressor'] = reg
meuMae['mae'] = mae
meuMae['score'] = sco

In [32]:
meuMae = meuMae.sort_values(by='score', ascending=False)
meuMae

Unnamed: 0,Regressor,mae,score
2,"SVR(C=1.0, cache_size=200, coef0=0.0, degree=3...",0.0,1.0
4,"GradientBoostingRegressor(alpha=0.9, ccp_alpha...",0.0,1.0
6,"LogisticRegression(C=1.0, class_weight=None, d...",0.0,1.0
7,"GaussianNB(priors=None, var_smoothing=1e-09)",0.0,1.0
0,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.97,0.999954
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",0.97,0.999954
5,"DummyRegressor(constant=None, quantile=None, s...",2.22,0.99987
1,"RandomForestRegressor(bootstrap=True, ccp_alph...",2.64,0.999837


In [33]:
f'Best Regressor: {meuMae["Regressor"].values[0]}'

"Best Regressor: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"

In [41]:
model = RandomForestRegressor() # OR meuMae["Regressor"].values[0]

In [42]:
model.fit(X, y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [43]:
pVendas = vendasK.copy()
rMes = range(1, 13)
year = 2020
for m in rMes:
  valFeatures = [m, year]
  predict = float(model.predict([valFeatures]))
  pVendas = pVendas.append({'mes': m, 'ano': year, 'y': predict}, ignore_index=True)


In [44]:
pVendas['ds'] = pVendas.apply(lambda x: f'{int(x.ano)}-{int(x.mes)}-28', axis=1)

In [45]:
pVendas['ds'] = pVendas.ds.astype('datetime64[ns]')

In [46]:
pVendas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      252 non-null    datetime64[ns]
 1   y       252 non-null    float64       
 2   mes     252 non-null    float64       
 3   ano     252 non-null    float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 8.0 KB


In [47]:
x = pVendas.ds
real = pVendas.y[pVendas.ano < 2020]
pred = pVendas[pVendas.ano == 2020]

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=real,
                    mode='lines+markers',
                    name='Realizado'))

fig.add_trace(go.Scatter(x=pred.ds, y=pred.y,
                    mode='lines+markers',
                    name='Previsao'))
fig.update_layout(
    title="Historico + Previsao de Vendas")

fig.show()