In [148]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


import pmdarima as pm
from prophet import Prophet
from xgboost import XGBRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [173]:
mmm = pd.DataFrame()
mmm = pd.concat([mmm]+[pd.read_excel(os.path.join('dados', 'mmm', arquivo)) for arquivo in os.listdir('dados/mmm')]).reset_index(drop=True)

etapas = pd.DataFrame()
etapas = pd.concat([etapas]+[pd.read_excel(os.path.join('dados', 'etapas', arquivo)) for arquivo in os.listdir('dados/etapas')]).reset_index(drop=True).rename(columns={'uasg':'codigo'})

da = pd.DataFrame()
da = pd.concat([da]+[pd.read_excel(os.path.join('dados', 'da', arquivo)) for arquivo in os.listdir('dados/da')]).reset_index(drop=True)

In [151]:
mmm.codigo.value_counts().describe()

count    247.000000
mean      50.842105
std       32.580035
min        1.000000
25%       13.500000
50%       70.000000
75%       80.000000
max       80.000000
Name: count, dtype: float64

In [152]:
da.groupby(['mes', 'ano']).codigo.count().describe()

count     80.000000
mean     357.900000
std       38.667554
min      102.000000
25%      334.750000
50%      369.500000
75%      386.000000
max      404.000000
Name: codigo, dtype: float64

In [153]:
etapas[['mes', 'ano', 'uasg']].drop_duplicates().groupby(['mes', 'ano']).uasg.count().describe()

count     79.000000
mean     301.265823
std       16.608392
min      277.000000
25%      288.000000
50%      300.000000
75%      313.500000
max      335.000000
Name: uasg, dtype: float64

In [154]:
mmm_marinha = mmm.groupby(['ano', 'mes'])[[col for col in mmm.columns if col not in ['ano', 'mes', 'codigo', 'nome']]].sum().reset_index()

In [155]:
mmm_marinha['mes'] = [''.join([str(i[0]), '_', str(i[1])]) for i in zip(mmm_marinha.mes.values, mmm_marinha.ano.values)]
mmm_marinha.drop(columns=['ano'], inplace=True)
mmm_marinha = mmm_marinha.iloc[:-2]

In [156]:
mmm

Unnamed: 0,ano,mes,codigo,nome,balanco_paiol_mes_anterior,gen_depsubmrj_depnav_reg,gen_adq_form_extra_mb_licit1,gen_adq_form_extra_mb_licit2,gen_adq_form_extra_mb_slicit,remessa_recebida,remessa_expedida,vale_extra,termo_de_despesa,generos_consumidos,totais_balanco_paiol_receita,totais_balanco_paiol_despesa,saldo
0,2019,1,81200,1° BATALHAO DE OPERAÇOES LITORANEAS DE FUZILEI...,98180.15720,73516.14,42111.800,43376.544,1174.4000,0.0,0.0,2871.75,0.0,162351.16000,258359.04120,165222.91000,93136.13120
1,2019,1,88200,1º Batalhão de Operações Ribeirinhas,698734.61658,0.00,0.000,0.000,158691.8100,0.0,0.0,19335.01,0.0,168038.20800,857426.42658,187373.21800,670053.20858
2,2019,1,82200,2° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,63728.75860,31178.35,54681.395,0.000,0.0000,0.0,0.0,5469.15,0.0,65744.78000,149588.50360,71213.93000,78374.57360
3,2019,1,84200,2º BATALHÃO DE OPERAÇÕES RIBEIRINHAS,171638.53500,54408.70,117189.000,0.000,6110.1599,0.0,0.0,4399.40,0.0,113672.31750,349346.39490,118071.71750,231274.67740
4,2019,1,83200,3° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,94752.65847,9647.28,3400.210,0.000,505.5600,0.0,0.0,6570.20,0.0,12721.57279,108305.70847,19291.77279,89013.93568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12553,2025,8,91536,Submarino Humaitá,877.08020,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,877.08020,0.00000,877.08020
12554,2025,8,91535,Submarino Riachuelo,7648.43200,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,7648.43200,0.00000,7648.43200
12555,2025,8,10100,TRIBUNAL MARÍTIMO,4763.40000,284.70,4313.600,4315.400,0.0000,0.0,0.0,0.00,0.0,0.00000,13677.10000,0.00000,13677.10000
12556,2025,9,88131,"NAVIO PATRULHA FLUVIAL ""RONDÔNIA""",0.00000,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,0.00000,0.00000,0.00000


In [157]:
mmm_receita_despesa = pd.merge(mmm, da, how='inner', on=['mes', 'ano', 'codigo'])

In [164]:
mmm_receita_despesa[abs((mmm_receita_despesa.despesa_autorizada_global - (mmm_receita_despesa.generos_consumidos + mmm_receita_despesa.vale_extra)) - mmm_receita_despesa.sobra_licita) < 1]

Unnamed: 0,ano,mes,codigo,nome,balanco_paiol_mes_anterior,gen_depsubmrj_depnav_reg,gen_adq_form_extra_mb_licit1,gen_adq_form_extra_mb_licit2,gen_adq_form_extra_mb_slicit,remessa_recebida,...,termo_de_despesa,generos_consumidos,totais_balanco_paiol_receita,totais_balanco_paiol_despesa,saldo,sigla,om_id,despesa_autorizada_global,sobra_licita,sobra_licita_ajustada
0,2019,1,81200,1° BATALHAO DE OPERAÇOES LITORANEAS DE FUZILEI...,98180.15720,73516.14,42111.8000,43376.544,1174.4000,0.00,...,0.0,162351.16000,258359.04120,165222.91000,93136.13120,GptFNRJ,10220,189626.0,24403.07,0.00
1,2019,1,88200,1º Batalhão de Operações Ribeirinhas,698734.61658,0.00,0.0000,0.000,158691.8100,0.00,...,0.0,168038.20800,857426.42658,187373.21800,670053.20858,1º BtlOpRib,10251,192259.9,4886.67,0.00
2,2019,1,82200,2° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,63728.75860,31178.35,54681.3950,0.000,0.0000,0.00,...,0.0,65744.78000,149588.50360,71213.93000,78374.57360,2°BtlOpLitFN,10112,75417.0,4203.05,0.00
4,2019,1,84200,2º BATALHÃO DE OPERAÇÕES RIBEIRINHAS,171638.53500,54408.70,117189.0000,0.000,6110.1599,0.00,...,0.0,113672.31750,349346.39490,118071.71750,231274.67740,2º BtlOpRib,10302,120572.4,2500.68,0.00
5,2019,1,83200,3° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,94752.65847,9647.28,3400.2100,0.000,505.5600,0.00,...,0.0,12721.57279,108305.70847,19291.77279,89013.93568,3°BtlOpLitFN,10395,33859.5,14567.74,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14090,2025,7,91535,Submarino Riachuelo,27408.97720,0.00,4607.1000,0.000,0.0000,0.00,...,0.0,11056.49000,32016.07720,24367.65000,7648.42720,SRiachuelo,14120,14670.0,3613.51,3613.51
14102,2025,8,80000,COMANDO DE OPERAÇÕES NAVAIS,42942.07960,48835.43,15137.8016,33032.340,4769.1902,0.00,...,0.0,75096.59000,144716.84140,82393.27000,62323.57140,ComOpNav,10137,100103.5,17710.24,17710.24
14106,2025,8,91619,CORVETA JULIO DE NORONHA,3144.12000,1906.24,0.0000,0.000,1482.6000,0.00,...,0.0,0.00000,6532.96000,0.00000,6532.96000,CvJNoronha,10225,0.0,0.00,0.00
14117,2025,8,51214,NAVIO HIDROCEANOGRÁFICO AMORIM DO VALLE,21534.21500,19056.02,2022.4300,129.500,4097.8360,804.38,...,0.0,0.00000,47644.38100,0.00000,47644.38100,NHoAValle,10024,0.0,0.00,0.00


In [168]:
mmm_receita_despesa[mmm_receita_despesa.codigo==80000]

Unnamed: 0,ano,mes,codigo,nome,balanco_paiol_mes_anterior,gen_depsubmrj_depnav_reg,gen_adq_form_extra_mb_licit1,gen_adq_form_extra_mb_licit2,gen_adq_form_extra_mb_slicit,remessa_recebida,...,termo_de_despesa,generos_consumidos,totais_balanco_paiol_receita,totais_balanco_paiol_despesa,saldo,sigla,om_id,despesa_autorizada_global,sobra_licita,sobra_licita_ajustada
54,2019,1,80000,COMANDO DE OPERAÇÕES NAVAIS,17232.72400,4407.42,8350.3280,0.0000,5923.1544,0.0,...,0.0,19196.77412,35913.62640,20471.48412,15442.14228,ComOpNav,10137,43372.0,22900.55,0.00
55,2019,1,80000,COMANDO DE OPERAÇÕES NAVAIS,17232.72400,4407.42,8350.3280,0.0000,5923.1544,0.0,...,0.0,19196.77412,35913.62640,20471.48412,15442.14228,ComOpNav,10137,60543.0,750.18,0.00
217,2019,2,80000,COMANDO DE OPERAÇÕES NAVAIS,15442.17536,7809.27,11877.4910,0.0000,27220.3437,0.0,...,0.0,42820.12210,62349.28006,47088.43210,15260.84796,ComOpNav,10137,55734.0,8645.58,0.00
218,2019,2,80000,COMANDO DE OPERAÇÕES NAVAIS,15442.17536,7809.27,11877.4910,0.0000,27220.3437,0.0,...,0.0,42820.12210,62349.28006,47088.43210,15260.84796,ComOpNav,10137,52128.0,1076.06,0.00
380,2019,3,80000,COMANDO DE OPERAÇÕES NAVAIS,15260.85100,8378.90,10830.5400,4601.5025,8617.3273,0.0,...,0.0,23620.65300,47689.12080,27911.73300,19777.38780,ComOpNav,10137,55886.0,27974.28,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13782,2025,6,80000,COMANDO DE OPERAÇÕES NAVAIS,18323.57000,42537.44,15940.0400,5082.9200,360.0000,0.0,...,0.0,68869.51100,82243.97000,70877.24100,11366.72900,ComOpNav,10137,105675.0,26505.12,26418.75
13973,2025,7,80000,COMANDO DE OPERAÇÕES NAVAIS,11366.74000,54831.99,19355.8840,31994.1690,2553.1600,0.0,...,0.0,75330.22000,120101.94300,77159.85000,42942.09300,ComOpNav,10137,110520.0,24261.87,24261.87
13974,2025,7,80000,COMANDO DE OPERAÇÕES NAVAIS,11366.74000,54831.99,19355.8840,31994.1690,2553.1600,0.0,...,0.0,75330.22000,120101.94300,77159.85000,42942.09300,ComOpNav,10137,101362.0,24202.15,24202.15
14102,2025,8,80000,COMANDO DE OPERAÇÕES NAVAIS,42942.07960,48835.43,15137.8016,33032.3400,4769.1902,0.0,...,0.0,75096.59000,144716.84140,82393.27000,62323.57140,ComOpNav,10137,100103.5,17710.24,17710.24


In [166]:
mmm_receita_despesa.codigo.value_counts()

codigo
71000    257
80000    180
62000    177
62500    177
64000    177
        ... 
11129      1
11131      1
11132      1
11134      1
84120      1
Name: count, Length: 239, dtype: int64

In [178]:
etapas[etapas.codigo_etapa.isin([102, 104])]

Unnamed: 0,mes,ano,codigo,codigo_etapa,quantidade
204,1,2019,51207,102,129.0
206,1,2019,51207,104,138.0
214,1,2019,51210,102,1227.0
216,1,2019,51210,104,1345.0
341,1,2019,65705,102,2.0
...,...,...,...,...,...
105384,7,2025,91613,104,261.0
105388,7,2025,91615,102,1185.0
105390,7,2025,91615,104,637.0
105460,7,2025,95140,102,192.0


In [179]:
mmm_etapas = pd.merge(left=mmm, right=etapas, how='inner', on=['ano', 'mes', 'codigo'])

In [182]:
mmm_etapas[mmm_etapas.codigo_etapa.isin([103, 105])][['ano', 'mes', 'nome', 'codigo_etapa', 'quantidade']]

Unnamed: 0,ano,mes,nome,codigo_etapa,quantidade
1,2019,1,1° BATALHAO DE OPERAÇOES LITORANEAS DE FUZILEI...,103,19537.0
3,2019,1,1º Batalhão de Operações Ribeirinhas,103,11422.0
8,2019,1,2° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,103,5767.0
9,2019,1,2° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,105,42.0
12,2019,1,2º BATALHÃO DE OPERAÇÕES RIBEIRINHAS,103,8768.0
...,...,...,...,...,...
60071,2025,7,SUBMARINO TUPI,103,624.0
60074,2025,7,Submarino Humaitá,103,132.0
60077,2025,7,Submarino Riachuelo,103,206.0
60080,2025,7,TRIBUNAL MARÍTIMO,103,490.0


In [143]:
pd.DataFrame({
    'diferenca': mmm_receita_despesa.despesa_autorizada_global - (mmm_receita_despesa.generos_consumidos + mmm_receita_despesa.vale_extra),
    'sobra_licita': mmm_receita_despesa.sobra_licita
})

Unnamed: 0,diferenca,sobra_licita
0,24403.09000,24403.07
1,4886.68200,4886.67
2,4203.07000,4203.05
3,2500.68250,2500.68
4,14567.72721,14567.74
...,...,...
12279,0.00000,
12280,0.00000,0.00
12281,11299.59000,
12282,27553.50000,24798.15


In [89]:
da_marinha = da.groupby(['mes', 'ano']).despesa_autorizada_global.sum().reset_index()
da_marinha['mes'] = ['_'.join(['{:02d}'.format(i[0]), str(i[1])]) for i in zip(da_marinha.mes.values, da_marinha.ano.values)]
da_marinha = da_marinha.sort_values(by=['ano', 'mes']).iloc[:-2]
fig = px.line(
    da_marinha,
    x = 'mes',
    y = 'despesa_autorizada_global',
    title='Despesa Autorizada global'
)

fig.show()

In [None]:
def grafico_base(titulo):
    return px.line(
        mmm_marinha,
        x = 'mes',
        y = 'totais_balanco_paiol_despesa',
        labels = {
            'mes': 'Mês e ano',
            'totais_balanco_paiol_despesa': 'Totais das despesas'
        },
        title = titulo
    )

fig = grafico_base('Gastos com alimentação dos últimos cinco anos')

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [None]:
# Teste de estacionariedade

result = adfuller(mmm_marinha.totais_balanco_paiol_despesa)
print(f'ADF: {result[0]}, p-valor: {result[1]}')

ADF: -0.7929840099869884, p-valor: 0.8211742792131471


In [None]:
naive_forecast = mmm_marinha.totais_balanco_paiol_despesa.shift(1)
mae_naive = mean_absolute_error(mmm_marinha.totais_balanco_paiol_despesa.iloc[1:], naive_forecast.iloc[1:])
print('Baseline Naïve MAE:', mae_naive)

Baseline Naïve MAE: 1371822.7820545628


In [None]:
# SARIMA

train = mmm_marinha.iloc[:-12, :]
test = mmm_marinha.iloc[-12:, :]

model_auto = pm.auto_arima(train.totais_balanco_paiol_despesa, seasonal=True, m=12, stepwise=True, trace=True)
print(model_auto.summary())

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=2075.426, Time=0.17 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=2081.293, Time=0.00 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=2071.938, Time=0.03 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=2073.317, Time=0.02 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=2079.434, Time=0.00 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=2077.323, Time=0.01 sec
 ARIMA(1,1,0)(2,0,0)[12] intercept   : AIC=2073.498, Time=0.09 sec
 ARIMA(1,1,0)(1,0,1)[12] intercept   : AIC=2072.305, Time=0.05 sec
 ARIMA(1,1,0)(0,0,1)[12] intercept   : AIC=2072.411, Time=0.02 sec
 ARIMA(1,1,0)(2,0,1)[12] intercept   : AIC=2074.220, Time=0.10 sec
 ARIMA(0,1,0)(1,0,0)[12] intercept   : AIC=2073.821, Time=0.02 sec
 ARIMA(2,1,0)(1,0,0)[12] intercept   : AIC=2073.333, Time=0.04 sec
 ARIMA(1,1,1)(1,0,0)[12] intercept   : AIC=2073.515, Time=0.06 sec
 ARIMA(0,1,1)(1,0,0)[12] intercept   : AIC=2072.829, Time=0.03 sec
 ARIMA(2,1,1)(1,0,0

In [184]:
mmm

Unnamed: 0,ano,mes,codigo,nome,balanco_paiol_mes_anterior,gen_depsubmrj_depnav_reg,gen_adq_form_extra_mb_licit1,gen_adq_form_extra_mb_licit2,gen_adq_form_extra_mb_slicit,remessa_recebida,remessa_expedida,vale_extra,termo_de_despesa,generos_consumidos,totais_balanco_paiol_receita,totais_balanco_paiol_despesa,saldo
0,2019,1,81200,1° BATALHAO DE OPERAÇOES LITORANEAS DE FUZILEI...,98180.15720,73516.14,42111.800,43376.544,1174.4000,0.0,0.0,2871.75,0.0,162351.16000,258359.04120,165222.91000,93136.13120
1,2019,1,88200,1º Batalhão de Operações Ribeirinhas,698734.61658,0.00,0.000,0.000,158691.8100,0.0,0.0,19335.01,0.0,168038.20800,857426.42658,187373.21800,670053.20858
2,2019,1,82200,2° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,63728.75860,31178.35,54681.395,0.000,0.0000,0.0,0.0,5469.15,0.0,65744.78000,149588.50360,71213.93000,78374.57360
3,2019,1,84200,2º BATALHÃO DE OPERAÇÕES RIBEIRINHAS,171638.53500,54408.70,117189.000,0.000,6110.1599,0.0,0.0,4399.40,0.0,113672.31750,349346.39490,118071.71750,231274.67740
4,2019,1,83200,3° BATALHÃO DE OPERAÇÕES LITORÂNEAS DE FUZILEI...,94752.65847,9647.28,3400.210,0.000,505.5600,0.0,0.0,6570.20,0.0,12721.57279,108305.70847,19291.77279,89013.93568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12553,2025,8,91536,Submarino Humaitá,877.08020,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,877.08020,0.00000,877.08020
12554,2025,8,91535,Submarino Riachuelo,7648.43200,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,7648.43200,0.00000,7648.43200
12555,2025,8,10100,TRIBUNAL MARÍTIMO,4763.40000,284.70,4313.600,4315.400,0.0000,0.0,0.0,0.00,0.0,0.00000,13677.10000,0.00000,13677.10000
12556,2025,9,88131,"NAVIO PATRULHA FLUVIAL ""RONDÔNIA""",0.00000,0.00,0.000,0.000,0.0000,0.0,0.0,0.00,0.0,0.00000,0.00000,0.00000,0.00000


In [186]:
import ipywidgets as widgets
from IPython.display import display

w = widgets.Dropdown(
    options=['Addition', 'Multiplication', 'Subtraction', 'Division'],
    value='Addition',
    description='Task:',
)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print("changed to %s" % change['new'])

w.observe(on_change)

display(w)

Dropdown(description='Task:', options=('Addition', 'Multiplication', 'Subtraction', 'Division'), value='Additi…

In [187]:
color = 'green' #@param ["red", "green", "blue"]
print(f"Selected color: {color}")

Selected color: green


In [36]:
sarima_model = SARIMAX(train.totais_balanco_paiol_despesa,
                       order=model_auto.order,
                       seasonal_order=model_auto.seasonal_order)

sarima_fit = sarima_model.fit(disp=False)

forecast_sarima = sarima_fit.get_forecast(steps=12)
pred_sarima = forecast_sarima.predicted_mean

mae_sarima = mean_absolute_error(test.totais_balanco_paiol_despesa, pred_sarima)
print('SARIMA MAE:', mae_sarima)



SARIMA MAE: 1582296.4842019656


In [37]:
fig = grafico_base('Previsão temporal com o algoritmo SARIMA')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = pred_sarima
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [None]:
# Previsão com o Prophet

prophet_df = mmm_marinha[['mes', 'totais_balanco_paiol_despesa']]
prophet_df.columns = ['ds', 'y']
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%m_%Y')

model_prophet = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
model_prophet.fit(prophet_df)

future = model_prophet.make_future_dataframe(periods=12, freq='M')
forecast = model_prophet.predict(future)

forecast_test = forecast.set_index('ds').loc[pd.to_datetime(test.mes, format='%m_%Y')]
mae_prophet = mean_absolute_error(test['totais_balanco_paiol_despesa'], forecast_test['yhat'])
print('Prophet MAE', mae_prophet)

18:40:51 - cmdstanpy - INFO - Chain [1] start processing
18:40:52 - cmdstanpy - INFO - Chain [1] done processing


Prophet MAE 1378543.2568654409


In [39]:
fig = grafico_base('Previsão temporal com o algoritmo Prophet')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = forecast_test.yhat
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [None]:
# Regressão com XGBoost para séries temporais

xg_df = mmm_marinha[['mes', 'totais_balanco_paiol_despesa']]
xg_df['ano'] = xg_df['mes'].apply(lambda x: x.split('_')[-1]).astype(int)
xg_df['mes'] = xg_df['mes'].apply(lambda x: x.split('_')[0]).astype(int)
xg_df['lag1'] = xg_df['totais_balanco_paiol_despesa'].shift(1)
xg_df['lag3'] = xg_df['totais_balanco_paiol_despesa'].shift(3)
xg_df['lag6'] = xg_df['totais_balanco_paiol_despesa'].shift(6)
xg_df['rolling3'] = xg_df['totais_balanco_paiol_despesa'].rolling(3).mean()
xg_df['rolling6'] = xg_df['totais_balanco_paiol_despesa'].rolling(6).mean()

xg_df = xg_df.dropna().reset_index(drop=True)

In [41]:
train_xg = xg_df.iloc[:-12]
test_xg = xg_df.iloc[-12:]

X_train = train_xg.drop(columns=['totais_balanco_paiol_despesa'])
y_train = train_xg['totais_balanco_paiol_despesa']
X_test = test_xg.drop(columns=['totais_balanco_paiol_despesa'])
y_test = test_xg['totais_balanco_paiol_despesa']

xgb = XGBRegressor(
    n_estimators = 300,
    learning_rate = 0.05,
    max_depth = 5,
    subsample = 0.8,
    colsample_bytree = 0.8,
    random_state = 42
)

xgb.fit(X_train, y_train)

pred_xgb = xgb.predict(X_test)

mae_xgb = mean_absolute_error(y_test, pred_xgb)

print("XGBoost MAE:", mae_xgb)


XGBoost MAE: 1814813.112411666


In [42]:
fig = grafico_base('Previsão temporal com o algoritmo XGBoost')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = pred_xgb
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [43]:
hw_model = ExponentialSmoothing(
    train['totais_balanco_paiol_despesa'],
    trend='add',
    seasonal='add',
    seasonal_periods=12
).fit()

pred_hw = hw_model.forecast(12)

In [44]:
fig = grafico_base('Previsão temporal com o algoritmo ExponentialSmoothing')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = pred_hw
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
despesas_scaled = scaler.fit_transform(mmm_marinha.totais_balanco_paiol_despesa.values.reshape(-1, 1))

def create_sequences(data, window=12):
    X, y = [], []
    for i in range(len(data)- window):
        X.append(data[i:i+window])
        y.append(data[i+window])
    return np.array(X), np.array(y)

X, y = create_sequences(despesas_scaled)


split = len(X) - 12
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print('Shape treino:', X_train.shape, y_train.shape)

Shape treino: (55, 12, 1) (55, 1)


In [46]:
model = Sequential()
model.add(LSTM(64, activation='tanh', return_sequences=True, input_shape=(12, 1)))
model.add(Dropout(0.2))
model.add(LSTM(32, activation='tanh'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mae')

history = model.fit(
    X_train, y_train,
    epochs = 200,
    batch_size = 4,
    validation_split = 0.1, 
    verbose = 1
)

Epoch 1/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.2467 - val_loss: 0.1305
Epoch 2/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1683 - val_loss: 0.2326
Epoch 3/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1610 - val_loss: 0.1421
Epoch 4/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1315 - val_loss: 0.0670
Epoch 5/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1298 - val_loss: 0.1068
Epoch 6/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1297 - val_loss: 0.0861
Epoch 7/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1283 - val_loss: 0.1027
Epoch 8/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1323 - val_loss: 0.1590
Epoch 9/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━

In [47]:
y_pred = model.predict(X_test)

y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_inv = scaler.inverse_transform(y_pred)

mae_lstm = mean_absolute_error(y_test_inv, y_pred_inv)
print('LSTM MAE:', mae_lstm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
LSTM MAE: 1737220.0047799994


In [48]:
fig = grafico_base('Previsão temporal com o algoritmo LSTM')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = y_pred_inv.reshape(1, -1)[0]
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [49]:
def create_sequences_multistep(data, window=12, horizon=12):
    X, y = [], []
    for i in range(len(data) - window - horizon + 1):
        X.append(data[i:i+window])
        y.append(data[i+window:i+window+horizon].flatten())
    return np.array(X), np.array(y)

window = 12
horizon = 12
X, y = create_sequences_multistep(despesas_scaled, window, horizon)

split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print('X_train shape:', X_train.shape)
print('y_train, shape:', y_train.shape)

X_train shape: (44, 12, 1)
y_train, shape: (44, 12)


In [50]:
model = Sequential()
model.add(LSTM(64, activation="tanh", return_sequences=True, input_shape=(window, 1)))
model.add(Dropout(0.2))
model.add(LSTM(32, activation="tanh"))
model.add(Dense(horizon))  # saída com 12 valores (multi-step)

model.compile(optimizer="adam", loss="mse")

history = model.fit(
    X_train, y_train,
    epochs=300,
    batch_size=4,
    validation_split=0.1,
    verbose=1
)

Epoch 1/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.1682 - val_loss: 0.2689
Epoch 2/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1084 - val_loss: 0.1282
Epoch 3/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0656 - val_loss: 0.0741
Epoch 4/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0490 - val_loss: 0.0362
Epoch 5/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0448 - val_loss: 0.0282
Epoch 6/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0435 - val_loss: 0.0580
Epoch 7/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0452 - val_loss: 0.0233
Epoch 8/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0399 - val_loss: 0.0510
Epoch 9/300
[1m10/10[0m [32m━━━━━━━━━━━━━━━━

In [51]:
y_pred = model.predict(X_test)

y_test_inv = scaler.inverse_transform(y_test)
y_pred_inv = scaler.inverse_transform(y_pred)

mae_lstm_multi = mean_absolute_error(y_test_inv.flatten(), y_pred_inv.flatten())
print('LSTM Multi-step MAE:', mae_lstm_multi)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
LSTM Multi-step MAE: 1976546.0605149998


In [None]:
last_input_dates = pd.to_datetime(mmm_marinha.mes, format='%m_%Y').iloc[-(window + horizon):-horizon]
future_dates = pd.date_range(start = pd.to_datetime(mmm_marinha.mes, format='%m_%Y').iloc[-horizon], periods=horizon, freq='M')

In [53]:
fig = grafico_base('Previsão temporal com o algoritmo LSTM Multi-step')

fig.add_trace(
    go.Scatter(
        x = test.mes,
        y = y_pred_inv.flatten()
    )
)

fig.update_traces(mode='lines+markers', line=dict(width=2))
fig.update_xaxes(tickangle=45)
fig.update_layout(
    template='plotly_white',
    hovermode='x unified'
    )

fig.show()

In [54]:
df['mes/ano'] = ['/'.join([str(i[0]), str(i[1])]) for i in zip(df.mes, df.ano)]

plt.figure(figsize=(100, 20))
px.line(df, x='mes/ano', y='saldo', color='nome')

NameError: name 'df' is not defined