<img src="img/logo-pirelli-dsa.png" align="left"/>
<br>

# Projeção de Produção - Modelo Preditivo Dados 2018
***
**Integrantes Grupo:**
<br>Danillo Silva
<br>Diego Dantas
<br>Gustavo Sainatto
<br>Nestor Soken
<br>Rubia Quinteiro Nierotka

Travou em 2018-04-22 23:00:00

### Bibliotecas:
***

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

from statsmodels.tsa.stattools import adfuller
from sklearn import metrics

from math import sqrt
from math import pi

from pmdarima.arima import auto_arima

### Carregar Base de Dados Historicos:
***

In [2]:
df_base = pd.read_parquet('prod_hist.parquet')
df_base = df_base[['EVS_END', 'MCH_NUMBER', 'EVS_REAL_QTY']]

df_base.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3693833 entries, 0 to 3693832
Data columns (total 3 columns):
EVS_END         datetime64[ns]
MCH_NUMBER      object
EVS_REAL_QTY    float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 112.7+ MB


### Definir Funcao de Teste Para o Modelo:
***

In [3]:
def forecast_2018(interval, df_base):
    turnos = 3
    tempo_turno = (8 * 60)

    turno_atual = pd.to_datetime(interval)
    turno_passado = turno_atual - dt.timedelta(minutes=(tempo_turno * turnos))
    turno_futuro = turno_atual + dt.timedelta(minutes=tempo_turno)

    print('Processando Turno : {}'.format(turno_atual))

    df_series = df_base.loc[(df_base['EVS_END'] >= turno_passado) & (df_base['EVS_END'] < turno_atual)]

    df_series = df_series.groupby(by=['MCH_NUMBER', pd.Grouper(key='EVS_END', freq='5Min')]).sum()
    df_series = df_series.unstack('MCH_NUMBER')
    df_series.fillna(value=0, inplace=True)
    df_series.columns = df_series.columns.droplevel()

    adf = dict(zip(df_series.columns.tolist(),
                   [True if (adfuller(df_series[col])[1] < 0.05) else False for col in df_series.columns.tolist()]))

    model ={}
    for i in range(df_series.columns.size):
        maq = df_series.columns[i]
        
        print(maq)
        
        arima_model = auto_arima(df_series[maq], seasonal=True, stationary=adf[maq],trace=False,
                                 error_action='ignore',suppress_warnings=True, stepwise=True)
        
        model[maq] = [arima_model]

    df_model =  pd.DataFrame.from_dict(model, orient='index', columns=['model'])
    period = pd.date_range(start= turno_atual, end= turno_futuro, freq='5Min')

    pred = {}
    for i in range(df_model.shape[0]):
        pred[df_model.index[i]] = df_model['model'][i].predict(n_periods=period.size)

    df_forecast =  pd.DataFrame.from_dict(pred)
    df_forecast.index = period

    final = {}
    
    df_real = df_base.loc[(df_base['EVS_END'] >= turno_atual) & (df_base['EVS_END'] < turno_futuro)]

    df_real = df_real.groupby(by=['MCH_NUMBER', pd.Grouper(key='EVS_END', freq='5Min')]).sum()
    df_real = df_real.unstack('MCH_NUMBER')
    df_real.fillna(value=0, inplace=True)
    df_real.columns = df_real.columns.droplevel()
    
    for i in range(df_forecast.columns.size):
        try:
            final[df_forecast.columns[i]] = [turno_atual,
                                             df_forecast[df_forecast.columns[i]].tolist(),
                                             df_real[df_forecast.columns[i]].tolist()]
        except:
            final[df_forecast.columns[i]] = [turno_atual, 0, 0]

    df_final = pd.DataFrame.from_dict(final, orient='index', columns=['DATA', 'FORECAST', 'REAL'])
       
    return df_final

In [4]:

#pd.to_datetime(dt.datetime(2018,3,5,7))

data_inicio = pd.to_datetime(dt.datetime(2018,3,5,7))
data_final =  pd.to_datetime(dt.datetime(2018,12,31,23))

interval = pd.date_range(start= data_inicio,
                         end=   data_final,
                         freq='480Min').astype(str).tolist()

In [5]:

result =[]
for i in range(len(interval)):
    result.append(forecast_2018(interval[i],df_base))

Processando Turno : 2018-03-05 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-05 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-05 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-06 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-06 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-06 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-07 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-07 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-07 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01


V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-28 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-29 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-29 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-29 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-30 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-30 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-30 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-03-31 07:00:00
AM1
AM2
AM3
AM4
TL1
TR1
V01
V02
V03
V04
V06
V07
V09
V10
V11
V12
Processando Turno : 2018-03-31 15:00:00
AM1


AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-04-21 15:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-04-21 23:00:00
AM1
AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-04-22 07:00:00
AM1


  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


AM2
AM3
AM4
TL1
TR1
TR2
V01
V02
V03
V04
V05
V06
V07
V08
V09
V10
V11
V12
Processando Turno : 2018-04-22 15:00:00
V01


  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


V02
V03
V06
V07
V12
Processando Turno : 2018-04-22 23:00:00


ValueError: Length mismatch: Expected axis has 0 elements, new values have 97 elements

In [None]:
df_final = pd.concat(result)

In [None]:
df_final[df_final['FORECAST']==0]

In [None]:
df_final.to_parquet('results_2018.parquet')