<img src="img/logo-pirelli-dsa.png" align="left"/>
<br>

# Projeção de Produção - Modelo Preditivo Dados 2018
***
**Integrantes Grupo:**
<br>Danillo Silva
<br>Diego Dantas
<br>Gustavo Sainatto
<br>Nestor Soken
<br>Rubia Quinteiro Nierotka

Travou em 2018-06-23 07:00:00

### Bibliotecas:
***

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

from statsmodels.tsa.stattools import adfuller
from sklearn import metrics

from math import sqrt
from math import pi

from pmdarima.arima import auto_arima

### Carregar Base de Dados Historicos:
***

In [2]:
df_base = pd.read_parquet('prod_hist.parquet')
df_base = df_base[['EVS_END', 'MCH_NUMBER', 'EVS_REAL_QTY']]

df_base.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3693833 entries, 0 to 3693832
Data columns (total 3 columns):
EVS_END         datetime64[ns]
MCH_NUMBER      object
EVS_REAL_QTY    float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 112.7+ MB


### Definir Funcao de Teste Para o Modelo:
***

In [3]:
def forecast_2018(interval, df_base):
    turnos = 3
    tempo_turno = (8 * 60)

    final = {}
    model ={}
    final = {}
    
    turno_atual = pd.to_datetime(interval)
    turno_passado = turno_atual - dt.timedelta(minutes=(tempo_turno * turnos))
    turno_futuro = turno_atual + dt.timedelta(minutes=tempo_turno)

    df_series = df_base.loc[(df_base['EVS_END'] >= turno_passado) & (df_base['EVS_END'] < turno_atual)]

    df_series = df_series.groupby(by=['MCH_NUMBER', pd.Grouper(key='EVS_END', freq='5Min')]).sum()
    df_series = df_series.unstack('MCH_NUMBER')
    df_series.fillna(value=0, inplace=True)
    df_series.columns = df_series.columns.droplevel()
    
    try:
        adf = dict(zip(df_series.columns.tolist(),
                   [True if (adfuller(df_series[col])[1] < 0.05) else False for col in df_series.columns.tolist()]))
    except:
        for i in range(df_series.columns.size):
            final[df_series.columns[i]] = [turno_atual, 0, 0]
        
        df_final = pd.DataFrame.from_dict(final, orient='index', columns=['DATA', 'FORECAST', 'REAL'])
        
        return df_final
        
    for i in range(df_series.columns.size):
        maq = df_series.columns[i]
              
        arima_model = auto_arima(df_series[maq], seasonal=True, stationary=adf[maq],trace=False,
                                 error_action='ignore',suppress_warnings=True, stepwise=True)
        
        model[maq] = [arima_model]

    df_model =  pd.DataFrame.from_dict(model, orient='index', columns=['model'])
    period = pd.date_range(start= turno_atual, end= turno_futuro, freq='5Min')

    pred = {}
    for i in range(df_model.shape[0]):
        pred[df_model.index[i]] = df_model['model'][i].predict(n_periods=period.size)

    df_forecast =  pd.DataFrame.from_dict(pred)
        
    try:
        df_forecast.index = period
    except:
        for i in range(df_forecast.columns.size):
            final[df_forecast.columns[i]] = [turno_atual, 0, 0]
        
        df_final = pd.DataFrame.from_dict(final, orient='index', columns=['DATA', 'FORECAST', 'REAL'])
        
        return df_final
  
    df_real = df_base.loc[(df_base['EVS_END'] >= turno_atual) & (df_base['EVS_END'] < turno_futuro)]

    df_real = df_real.groupby(by=['MCH_NUMBER', pd.Grouper(key='EVS_END', freq='5Min')]).sum()
    df_real = df_real.unstack('MCH_NUMBER')
    df_real.fillna(value=0, inplace=True)
    df_real.columns = df_real.columns.droplevel()
    
    for i in range(df_forecast.columns.size):
        try:
            final[df_forecast.columns[i]] = [turno_atual,
                                             df_forecast[df_forecast.columns[i]].tolist(),
                                             df_real[df_forecast.columns[i]].tolist()]
        except:
            final[df_forecast.columns[i]] = [turno_atual, 0, 0]

    df_final = pd.DataFrame.from_dict(final, orient='index', columns=['DATA', 'FORECAST', 'REAL'])
       
    return df_final

In [4]:

#pd.to_datetime(dt.datetime(2018,3,5,7))

data_inicio = pd.to_datetime(dt.datetime(2018,3,5,7))
data_final =  pd.to_datetime(dt.datetime(2018,12,31,23))

interval = pd.date_range(start= data_inicio,
                         end=   data_final,
                         freq='480Min').astype(str).tolist()

In [5]:
start_time = pd.Timestamp.today()

result =[]
for i in range(len(interval)):
    
    s = pd.Timestamp.today()
    result.append(forecast_2018(interval[i],df_base))
    f = pd.Timestamp.today()
    
    print('Processado Turno {}/{}: {} em {}'.format(i+1,len(interval),interval[i], f-s))
    
print("Executed in: {}".format(f - s))

Processado Turno 1/906: 2018-03-05 07:00:00 em 0 days 00:00:18.580237
Processado Turno 2/906: 2018-03-05 15:00:00 em 0 days 00:00:18.620055
Processado Turno 3/906: 2018-03-05 23:00:00 em 0 days 00:00:19.592401
Processado Turno 4/906: 2018-03-06 07:00:00 em 0 days 00:00:18.931125
Processado Turno 5/906: 2018-03-06 15:00:00 em 0 days 00:00:16.100899
Processado Turno 6/906: 2018-03-06 23:00:00 em 0 days 00:00:23.620058
Processado Turno 7/906: 2018-03-07 07:00:00 em 0 days 00:00:18.954896
Processado Turno 8/906: 2018-03-07 15:00:00 em 0 days 00:00:19.152804
Processado Turno 9/906: 2018-03-07 23:00:00 em 0 days 00:00:14.841160
Processado Turno 10/906: 2018-03-08 07:00:00 em 0 days 00:00:18.130943
Processado Turno 11/906: 2018-03-08 15:00:00 em 0 days 00:00:23.582092
Processado Turno 12/906: 2018-03-08 23:00:00 em 0 days 00:00:22.354340
Processado Turno 13/906: 2018-03-09 07:00:00 em 0 days 00:00:18.488630
Processado Turno 14/906: 2018-03-09 15:00:00 em 0 days 00:00:18.584559
Processado Turn

Processado Turno 117/906: 2018-04-12 23:00:00 em 0 days 00:00:27.173125
Processado Turno 118/906: 2018-04-13 07:00:00 em 0 days 00:00:18.989557
Processado Turno 119/906: 2018-04-13 15:00:00 em 0 days 00:00:16.865572
Processado Turno 120/906: 2018-04-13 23:00:00 em 0 days 00:00:20.042448
Processado Turno 121/906: 2018-04-14 07:00:00 em 0 days 00:00:20.221759
Processado Turno 122/906: 2018-04-14 15:00:00 em 0 days 00:00:18.033138
Processado Turno 123/906: 2018-04-14 23:00:00 em 0 days 00:00:14.133377
Processado Turno 124/906: 2018-04-15 07:00:00 em 0 days 00:00:19.195367
Processado Turno 125/906: 2018-04-15 15:00:00 em 0 days 00:00:16.047386
Processado Turno 126/906: 2018-04-15 23:00:00 em 0 days 00:00:20.056012
Processado Turno 127/906: 2018-04-16 07:00:00 em 0 days 00:00:19.676157
Processado Turno 128/906: 2018-04-16 15:00:00 em 0 days 00:00:21.679384
Processado Turno 129/906: 2018-04-16 23:00:00 em 0 days 00:00:20.686640
Processado Turno 130/906: 2018-04-17 07:00:00 em 0 days 00:00:17

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 145/906: 2018-04-22 07:00:00 em 0 days 00:00:16.166267


  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 146/906: 2018-04-22 15:00:00 em 0 days 00:00:01.103125
Processado Turno 147/906: 2018-04-22 23:00:00 em 0 days 00:00:00.033589


  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 148/906: 2018-04-23 07:00:00 em 0 days 00:00:02.431693
Processado Turno 149/906: 2018-04-23 15:00:00 em 0 days 00:00:10.855241
Processado Turno 150/906: 2018-04-23 23:00:00 em 0 days 00:00:16.804398
Processado Turno 151/906: 2018-04-24 07:00:00 em 0 days 00:00:19.498647
Processado Turno 152/906: 2018-04-24 15:00:00 em 0 days 00:00:13.216186
Processado Turno 153/906: 2018-04-24 23:00:00 em 0 days 00:00:21.702924
Processado Turno 154/906: 2018-04-25 07:00:00 em 0 days 00:00:24.117344
Processado Turno 155/906: 2018-04-25 15:00:00 em 0 days 00:00:20.340232
Processado Turno 156/906: 2018-04-25 23:00:00 em 0 days 00:00:13.322891
Processado Turno 157/906: 2018-04-26 07:00:00 em 0 days 00:00:19.939678
Processado Turno 158/906: 2018-04-26 15:00:00 em 0 days 00:00:23.570855
Processado Turno 159/906: 2018-04-26 23:00:00 em 0 days 00:00:15.241226
Processado Turno 160/906: 2018-04-27 07:00:00 em 0 days 00:00:18.434902
Processado Turno 161/906: 2018-04-27 15:00:00 em 0 days 00:00:24

Processado Turno 262/906: 2018-05-31 07:00:00 em 0 days 00:00:17.386056
Processado Turno 263/906: 2018-05-31 15:00:00 em 0 days 00:00:25.879202
Processado Turno 264/906: 2018-05-31 23:00:00 em 0 days 00:00:18.829166
Processado Turno 265/906: 2018-06-01 07:00:00 em 0 days 00:00:21.771081
Processado Turno 266/906: 2018-06-01 15:00:00 em 0 days 00:00:19.328084
Processado Turno 267/906: 2018-06-01 23:00:00 em 0 days 00:00:21.092012
Processado Turno 268/906: 2018-06-02 07:00:00 em 0 days 00:00:20.016873
Processado Turno 269/906: 2018-06-02 15:00:00 em 0 days 00:00:23.131812
Processado Turno 270/906: 2018-06-02 23:00:00 em 0 days 00:00:19.712869
Processado Turno 271/906: 2018-06-03 07:00:00 em 0 days 00:00:25.217989
Processado Turno 272/906: 2018-06-03 15:00:00 em 0 days 00:00:20.074605
Processado Turno 273/906: 2018-06-03 23:00:00 em 0 days 00:00:22.996140
Processado Turno 274/906: 2018-06-04 07:00:00 em 0 days 00:00:19.209388
Processado Turno 275/906: 2018-06-04 15:00:00 em 0 days 00:00:22

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 337/906: 2018-06-25 07:00:00 em 0 days 00:00:05.999554
Processado Turno 338/906: 2018-06-25 15:00:00 em 0 days 00:00:10.815277
Processado Turno 339/906: 2018-06-25 23:00:00 em 0 days 00:00:18.057980
Processado Turno 340/906: 2018-06-26 07:00:00 em 0 days 00:00:20.712764
Processado Turno 341/906: 2018-06-26 15:00:00 em 0 days 00:00:17.245444
Processado Turno 342/906: 2018-06-26 23:00:00 em 0 days 00:00:19.925506
Processado Turno 343/906: 2018-06-27 07:00:00 em 0 days 00:00:18.627417
Processado Turno 344/906: 2018-06-27 15:00:00 em 0 days 00:00:17.041508
Processado Turno 345/906: 2018-06-27 23:00:00 em 0 days 00:00:24.242003
Processado Turno 346/906: 2018-06-28 07:00:00 em 0 days 00:00:19.364596
Processado Turno 347/906: 2018-06-28 15:00:00 em 0 days 00:00:20.758474
Processado Turno 348/906: 2018-06-28 23:00:00 em 0 days 00:00:24.890283
Processado Turno 349/906: 2018-06-29 07:00:00 em 0 days 00:00:18.411138
Processado Turno 350/906: 2018-06-29 15:00:00 em 0 days 00:00:16

Processado Turno 451/906: 2018-08-02 07:00:00 em 0 days 00:00:19.055421
Processado Turno 452/906: 2018-08-02 15:00:00 em 0 days 00:00:23.536279
Processado Turno 453/906: 2018-08-02 23:00:00 em 0 days 00:00:16.645078
Processado Turno 454/906: 2018-08-03 07:00:00 em 0 days 00:00:18.778079
Processado Turno 455/906: 2018-08-03 15:00:00 em 0 days 00:00:20.986969
Processado Turno 456/906: 2018-08-03 23:00:00 em 0 days 00:00:23.390084
Processado Turno 457/906: 2018-08-04 07:00:00 em 0 days 00:00:22.826010
Processado Turno 458/906: 2018-08-04 15:00:00 em 0 days 00:00:19.099547
Processado Turno 459/906: 2018-08-04 23:00:00 em 0 days 00:00:15.825057
Processado Turno 460/906: 2018-08-05 07:00:00 em 0 days 00:00:21.266917
Processado Turno 461/906: 2018-08-05 15:00:00 em 0 days 00:00:25.943196
Processado Turno 462/906: 2018-08-05 23:00:00 em 0 days 00:00:26.025278
Processado Turno 463/906: 2018-08-06 07:00:00 em 0 days 00:00:18.445443
Processado Turno 464/906: 2018-08-06 15:00:00 em 0 days 00:00:23

Processado Turno 565/906: 2018-09-09 07:00:00 em 0 days 00:00:21.140312
Processado Turno 566/906: 2018-09-09 15:00:00 em 0 days 00:00:21.442525
Processado Turno 567/906: 2018-09-09 23:00:00 em 0 days 00:00:16.631852
Processado Turno 568/906: 2018-09-10 07:00:00 em 0 days 00:00:16.309592
Processado Turno 569/906: 2018-09-10 15:00:00 em 0 days 00:00:18.734290
Processado Turno 570/906: 2018-09-10 23:00:00 em 0 days 00:00:23.786519
Processado Turno 571/906: 2018-09-11 07:00:00 em 0 days 00:00:26.005654
Processado Turno 572/906: 2018-09-11 15:00:00 em 0 days 00:00:24.781480
Processado Turno 573/906: 2018-09-11 23:00:00 em 0 days 00:00:18.538540
Processado Turno 574/906: 2018-09-12 07:00:00 em 0 days 00:00:17.730001
Processado Turno 575/906: 2018-09-12 15:00:00 em 0 days 00:00:15.849410
Processado Turno 576/906: 2018-09-12 23:00:00 em 0 days 00:00:21.553929
Processado Turno 577/906: 2018-09-13 07:00:00 em 0 days 00:00:17.601185
Processado Turno 578/906: 2018-09-13 15:00:00 em 0 days 00:00:22

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 667/906: 2018-10-13 07:00:00 em 0 days 00:00:08.476760
Processado Turno 668/906: 2018-10-13 15:00:00 em 0 days 00:00:27.797232
Processado Turno 669/906: 2018-10-13 23:00:00 em 0 days 00:00:13.968310
Processado Turno 670/906: 2018-10-14 07:00:00 em 0 days 00:00:19.957922
Processado Turno 671/906: 2018-10-14 15:00:00 em 0 days 00:00:24.646416
Processado Turno 672/906: 2018-10-14 23:00:00 em 0 days 00:00:20.309414
Processado Turno 673/906: 2018-10-15 07:00:00 em 0 days 00:00:25.821258
Processado Turno 674/906: 2018-10-15 15:00:00 em 0 days 00:00:21.434855
Processado Turno 675/906: 2018-10-15 23:00:00 em 0 days 00:00:19.922285
Processado Turno 676/906: 2018-10-16 07:00:00 em 0 days 00:00:16.721960
Processado Turno 677/906: 2018-10-16 15:00:00 em 0 days 00:00:18.157767
Processado Turno 678/906: 2018-10-16 23:00:00 em 0 days 00:00:23.738713
Processado Turno 679/906: 2018-10-17 07:00:00 em 0 days 00:00:22.508862
Processado Turno 680/906: 2018-10-17 15:00:00 em 0 days 00:00:20

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 769/906: 2018-11-16 07:00:00 em 0 days 00:00:01.093387
Processado Turno 770/906: 2018-11-16 15:00:00 em 0 days 00:00:04.817914
Processado Turno 771/906: 2018-11-16 23:00:00 em 0 days 00:00:11.097264
Processado Turno 772/906: 2018-11-17 07:00:00 em 0 days 00:00:14.208402
Processado Turno 773/906: 2018-11-17 15:00:00 em 0 days 00:00:12.744708
Processado Turno 774/906: 2018-11-17 23:00:00 em 0 days 00:00:16.487670
Processado Turno 775/906: 2018-11-18 07:00:00 em 0 days 00:00:19.094383
Processado Turno 776/906: 2018-11-18 15:00:00 em 0 days 00:00:16.734290
Processado Turno 777/906: 2018-11-18 23:00:00 em 0 days 00:00:15.296801
Processado Turno 778/906: 2018-11-19 07:00:00 em 0 days 00:00:14.741102
Processado Turno 779/906: 2018-11-19 15:00:00 em 0 days 00:00:16.589561
Processado Turno 780/906: 2018-11-19 23:00:00 em 0 days 00:00:15.423889
Processado Turno 781/906: 2018-11-20 07:00:00 em 0 days 00:00:16.705359
Processado Turno 782/906: 2018-11-20 15:00:00 em 0 days 00:00:15

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Processado Turno 856/906: 2018-12-15 07:00:00 em 0 days 00:00:13.713876
Processado Turno 857/906: 2018-12-15 15:00:00 em 0 days 00:00:17.213384
Processado Turno 858/906: 2018-12-15 23:00:00 em 0 days 00:00:16.455861
Processado Turno 859/906: 2018-12-16 07:00:00 em 0 days 00:00:16.529657
Processado Turno 860/906: 2018-12-16 15:00:00 em 0 days 00:00:16.420428
Processado Turno 861/906: 2018-12-16 23:00:00 em 0 days 00:00:16.387919
Processado Turno 862/906: 2018-12-17 07:00:00 em 0 days 00:00:17.197931
Processado Turno 863/906: 2018-12-17 15:00:00 em 0 days 00:00:22.215675
Processado Turno 864/906: 2018-12-17 23:00:00 em 0 days 00:00:15.614883
Processado Turno 865/906: 2018-12-18 07:00:00 em 0 days 00:00:19.518838
Processado Turno 866/906: 2018-12-18 15:00:00 em 0 days 00:00:18.762960
Processado Turno 867/906: 2018-12-18 23:00:00 em 0 days 00:00:20.964722
Processado Turno 868/906: 2018-12-19 07:00:00 em 0 days 00:00:17.372984
Processado Turno 869/906: 2018-12-19 15:00:00 em 0 days 00:00:17

In [20]:
FINAL = pd.concat(result)

In [23]:
FINAL.to_pickle('Results_Proj_2018.pkl')

In [27]:
FINAL.shape

(16960, 3)