<a href="https://colab.research.google.com/github/italomarcelogit/python.free/blob/master/Facebook_Prophet_x_Outros_MLs_Sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Libraries**

In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.plot import plot_components_plotly, plot_plotly
import plotly.graph_objects as go

# **Data**

**Gerando dados de venda onde:**


*   de janeiro a abril, o acumulado de venda é de 500 (k, m, etc)
*   o restante dos meses, o acumulado é de 1000 (k, m, etc)



In [2]:
vendas = pd.DataFrame()
for y in range(2000, 2020):
  for m in range(1,13):
    if m < 5:
      i = 500
    else:
      i = 1000
    s = {'ds': f'{y}-{m}-28', 'y': i}
    vendas = vendas.append(s, ignore_index=True)

In [3]:
vendas.y.sum()

200000.0

In [4]:
database = pd.DataFrame()
database['ds'] = vendas.ds.astype('datetime64[ns]')
database['y'] = vendas.y

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=database.ds, y=database.y,
                    mode='lines+markers',
                    name='total vendas'))

fig.update_layout(
    title="Histórico de Vendas")

# **Predictions - fbProphet**

**Instantiating a new Prophet object**

In [6]:
m = Prophet()


In [7]:
m.fit(database)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f5e5a85f4e0>

In [8]:
f = m.make_future_dataframe(periods=12, freq='M')

In [9]:
p = m.predict(f)

In [10]:
# baseTeste = pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
# baseTeste['y'] = database.y
# baseTeste.tail(12)
pd.DataFrame(p[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]).tail(12)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
240,2019-12-31,983.030021,982.121244,983.998398
241,2020-01-31,494.298636,493.278749,495.352753
242,2020-02-29,498.960203,497.850114,499.923994
243,2020-03-31,510.547953,509.511885,511.659477
244,2020-04-30,515.407882,514.314509,516.519477
245,2020-05-31,1007.448434,1006.324645,1008.620353
246,2020-06-30,1009.680431,1008.425735,1010.819352
247,2020-07-31,1011.434289,1010.222052,1012.799623
248,2020-08-31,1006.214712,1004.941659,1007.561679
249,2020-09-30,999.118176,997.711783,1000.652817


In [11]:
gp = plot_plotly(m, p)
gp.update_layout(
    title="Previsao de Vendas")

In [12]:
gp = plot_components_plotly(m, p)
gp.update_layout(
    title="Detalhes da Previsao de Vendas")

# **Prediction Scikit-Learn**

In [13]:
# ML Algorithms sklearn
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.naive_bayes import GaussianNB
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [14]:
vendasK = vendas.copy()
vendasK['mes'] = pd.DatetimeIndex(vendasK.ds).month
vendasK['ano'] = pd.DatetimeIndex(vendasK.ds).year

In [15]:
X = vendasK[['mes', 'ano']]
y = vendasK.y
Xtreino, Xteste, ytreino, yteste = train_test_split(X, y, test_size=0.3, random_state=123)

In [16]:
regressors = [
        DecisionTreeRegressor(),
        RandomForestRegressor(),
        SVR(),
        LinearRegression(),
        GradientBoostingRegressor(),
        DummyRegressor(),
        LogisticRegression(),
        GaussianNB()
    ]

In [17]:
reg = []
mae = []
sco = []
for regressor in regressors:
    modelo = RandomForestRegressor()
    modelo.fit(Xtreino, np.array(ytreino))
    sco.append(modelo.score(Xtreino, ytreino))
    previsao = modelo.predict(Xteste)
    mae.append(round(mean_absolute_error(yteste, previsao), 2))
    reg.append(regressor)

In [18]:
meuMae = pd.DataFrame(columns=['Regressor', 'mae', 'score'])
meuMae['Regressor'] = reg
meuMae['mae'] = mae
meuMae['score'] = sco

In [19]:
meuMae = meuMae.sort_values(by='score', ascending=False)
meuMae

Unnamed: 0,Regressor,mae,score
0,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.0,1.0
1,"RandomForestRegressor(bootstrap=True, ccp_alph...",0.0,1.0
2,"SVR(C=1.0, cache_size=200, coef0=0.0, degree=3...",0.0,1.0
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",0.0,1.0
4,"GradientBoostingRegressor(alpha=0.9, ccp_alpha...",0.0,1.0
5,"DummyRegressor(constant=None, quantile=None, s...",0.0,1.0
6,"LogisticRegression(C=1.0, class_weight=None, d...",0.0,1.0
7,"GaussianNB(priors=None, var_smoothing=1e-09)",0.0,1.0


In [20]:
f'Best Regressor: {meuMae["Regressor"].values[0]}'

"Best Regressor: DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n                      max_features=None, max_leaf_nodes=None,\n                      min_impurity_decrease=0.0, min_impurity_split=None,\n                      min_samples_leaf=1, min_samples_split=2,\n                      min_weight_fraction_leaf=0.0, presort='deprecated',\n                      random_state=None, splitter='best')"

In [21]:
model = meuMae["Regressor"].values[1]

In [22]:
model.fit(X, y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [23]:
# baseTeste = X_test.copy()
# baseTeste['y'] = y_test
# baseTeste = baseTeste.reset_index()
# baseTeste = baseTeste.drop('index', axis=1)
# pred = []
# for row in baseTeste.index:
#   m = baseTeste.mes.iloc[row]
#   y = baseTeste.ano.iloc[row]
#   valFeatures = [m, y]
#   pred.append(model.predict([valFeatures])[0])

#   # print(m, y, baseTeste.y.iloc[row], pred[i])
# baseTeste['previsao'] = pred
# baseTeste = baseTeste.sort_values(by=['ano','mes'])
# baseTeste = baseTeste.reset_index()
# baseTeste = baseTeste.drop('index', axis=1)
# baseTeste

In [24]:
pVendas = vendasK.copy()
rMes = range(1, 13)
year = 2020
for m in rMes:
  valFeatures = [m, year]
  predict = float(model.predict([valFeatures]))
  pVendas = pVendas.append({'mes': m, 'ano': year, 'y': predict}, ignore_index=True)


In [25]:
pVendas['ds'] = pVendas.apply(lambda x: f'{int(x.ano)}-{int(x.mes)}-28', axis=1)

In [26]:
pVendas['ds'] = pVendas.ds.astype('datetime64[ns]')

In [27]:
pVendas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      252 non-null    datetime64[ns]
 1   y       252 non-null    float64       
 2   mes     252 non-null    float64       
 3   ano     252 non-null    float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 8.0 KB


In [28]:
import plotly.graph_objects as go

x = pVendas.ds
real = pVendas.y[pVendas.ano < 2020]
pred = pVendas[pVendas.ano == 2020]

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=real,
                    mode='lines+markers',
                    name='Realizado'))

fig.add_trace(go.Scatter(x=pred.ds, y=pred.y,
                    mode='lines+markers',
                    name='Previsao'))
fig.update_layout(
    title="Histórico + Previsao de Vendas")

fig.show()