In [30]:
import pandas as pd
import numpy as np
from numpy import mean
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from pmdarima.arima import auto_arima
import fbprophet as fb

In [3]:
top_technician_agg = pd.read_csv("top_technician_agg.csv")
top_technician_agg

Unnamed: 0,assigned_to,sys_created_on,ticket_counts
0,"Bender, Brendt",2018-01-14,27
1,"Bender, Brendt",2018-01-21,63
2,"Bender, Brendt",2018-01-28,61
3,"Bender, Brendt",2018-02-04,75
4,"Bender, Brendt",2018-02-11,66
5,"Bender, Brendt",2018-02-18,59
6,"Bender, Brendt",2018-02-25,77
7,"Bender, Brendt",2018-03-04,72
8,"Bender, Brendt",2018-03-11,72
9,"Bender, Brendt",2018-03-18,81


In [4]:
top_tech_list = list(top_technician_agg.assigned_to.unique())
top_tech_list

['Bender, Brendt',
 'Blount, Dustin',
 'Cook, Stephanie',
 'Dakin, Timothy',
 'Dressler, Derek',
 'Hilliard, Felix',
 'Jackson, Tucker',
 'Kester, Kenny',
 'McDowell, Joe',
 'Muthuraman, Vairakumar',
 'Pappas, Gillian',
 'Patel, Jighnesh',
 'Poe, William',
 'Pronschinske, Cole',
 'Rasmussen, Ranee',
 'Tapia Velazco, Manuel',
 'Velazquez, Nilda',
 'Walley, Miah',
 'Weber, James',
 'Zellmer, Jamie']

In [5]:
#Exponential Smoothening

result = pd.DataFrame()
score = pd.DataFrame()

for technician in top_tech_list:
    track = top_technician_agg[top_technician_agg.assigned_to == technician]
    track = track[['sys_created_on', 'ticket_counts']]
    
    track_score = dict()
    
    track = track.set_index(['sys_created_on'])
    track.index = pd.to_datetime(track.index)
    
    train = track[:-5]
    test = track[-5:]
    
    model = ExponentialSmoothing(train['ticket_counts'], trend='add').fit()
    pred = pd.DataFrame(model.forecast(5))
    pred = pred.reset_index()
    pred.columns = ['sys_created_on', 'pred_ticket_counts']
    
    track_score['assigned_to'] = technician
    track_score['mae'] = mean_absolute_error(test['ticket_counts'], pred['pred_ticket_counts'])
    track_score['mse'] = mean_squared_error(test['ticket_counts'], pred['pred_ticket_counts'])
    track_score['model'] = 'Exponential Smoothening'
    
    track = track.reset_index()
    track = pd.merge(track, pred, on = 'sys_created_on', how = 'left')
    track['assigned_to'] = technician
    result = result.append(track)
    score = score.append(pd.DataFrame([track_score]))
result.to_csv('Exp_smoothening_result.csv', index = False)




In [6]:
score

Unnamed: 0,assigned_to,mae,model,mse
0,"Bender, Brendt",4.2,Exponential Smoothening,19.0
0,"Blount, Dustin",6.984617,Exponential Smoothening,78.559679
0,"Cook, Stephanie",3.536179,Exponential Smoothening,19.687016
0,"Dakin, Timothy",3.824176,Exponential Smoothening,21.64656
0,"Dressler, Derek",13.239598,Exponential Smoothening,302.295755
0,"Hilliard, Felix",19.491917,Exponential Smoothening,554.611184
0,"Jackson, Tucker",27.800001,Exponential Smoothening,791.000039
0,"Kester, Kenny",11.129438,Exponential Smoothening,167.144184
0,"McDowell, Joe",5.957397,Exponential Smoothening,45.083524
0,"Muthuraman, Vairakumar",7.867319,Exponential Smoothening,114.237551


In [7]:
#Arima Forecasting

result = pd.DataFrame()

for technician in top_tech_list:
    track = top_technician_agg[top_technician_agg.assigned_to == technician]
    track = track[['sys_created_on', 'ticket_counts']]
    
    track_score = dict()
    
    track = track.set_index(['sys_created_on'])
    track.index = pd.to_datetime(track.index)
    
    train = track[:-5]
    test = track[-5:]
    
    model = auto_arima(train['ticket_counts'], seasonal=False)
    model.fit(train['ticket_counts'])
    pred = pd.DataFrame(model.predict(5))
    pred = pred.reset_index()
    pred.columns = ['sys_created_on', 'pred_ticket_counts']
    pred['sys_created_on'] = test.index
    
    track_score['assigned_to'] = technician
    track_score['mae'] = mean_absolute_error(test['ticket_counts'], pred['pred_ticket_counts'])
    track_score['mse'] = mean_squared_error(test['ticket_counts'], pred['pred_ticket_counts'])
    track_score['model'] = 'Arima'
    
    track = track.reset_index()
    track = pd.merge(track, pred, on = 'sys_created_on', how = 'left')
    track['assigned_to'] = technician
    result = result.append(track)
    score = score.append(pd.DataFrame([track_score]))
result.to_csv('Arima_result.csv', index = False)


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


In [8]:
result

Unnamed: 0,sys_created_on,ticket_counts,pred_ticket_counts,assigned_to
0,2018-01-14,27,,"Bender, Brendt"
1,2018-01-21,63,,"Bender, Brendt"
2,2018-01-28,61,,"Bender, Brendt"
3,2018-02-04,75,,"Bender, Brendt"
4,2018-02-11,66,,"Bender, Brendt"
5,2018-02-18,59,,"Bender, Brendt"
6,2018-02-25,77,,"Bender, Brendt"
7,2018-03-04,72,,"Bender, Brendt"
8,2018-03-11,72,,"Bender, Brendt"
9,2018-03-18,81,,"Bender, Brendt"


In [20]:
#Moving windown average
X = top_technician_agg[top_technician_agg.assigned_to == 'Bender, Brendt'].ticket_counts
window = 5
history = [X[i] for i in range(len(X)- 15,len(X)-5)]
test = [X[i] for i in range(len(X) - 5, len(X))]
predictions = list()
# walk forward over time steps in test
for t in range(len(test)):
    length = len(history)
    yhat = mean([history[i] for i in range(length-window,length)])
    obs = test[t]
    predictions.append(yhat)
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)

predicted=4.600000, expected=6.000000
predicted=4.600000, expected=6.000000
predicted=5.400000, expected=3.000000
predicted=4.800000, expected=6.000000
predicted=4.400000, expected=5.000000


In [23]:
#Window Moving Forecasting

result = pd.DataFrame()

for technician in top_tech_list:
    track = top_technician_agg[top_technician_agg.assigned_to == technician]
    
    track = track[['sys_created_on', 'ticket_counts']]
    
    track_score = dict()
    
    track = track.set_index(['sys_created_on'])
    track.index = pd.to_datetime(track.index)
    
    train = track[:-5]
    test_meta = track[-5:]
    
    X = track.ticket_counts
    window = 5
    history = [X[i] for i in range(len(X)- 15,len(X)-5)]
    test = [X[i] for i in range(len(X) - 5, len(X))]
    predictions = list()
    
    # walk forward over time steps in test
    for t in range(len(test)):
        length = len(history)
        yhat = mean([history[i] for i in range(length-window,length)])
        obs = test[t]
        predictions.append(yhat)
        history.append(obs)
        #print('predicted=%f, expected=%f' % (yhat, obs))
    #error = mean_squared_error(test, predictions)
    
    pred = pd.DataFrame()
    pred['sys_created_on'] = test_meta.index
    pred['pred_ticket_counts'] = predictions
    
    track_score['assigned_to'] = technician
    track_score['mae'] = mean_absolute_error(test, predictions)
    track_score['mse'] = mean_squared_error(test, predictions)
    track_score['model'] = 'Window moving Average'
    
    track = track.reset_index()
    track = pd.merge(track, pred, on = 'sys_created_on', how = 'left')
    track['assigned_to'] = technician
    result = result.append(track)
    score = score.append(pd.DataFrame([track_score]))
result.to_csv('Moving_Average_result.csv', index = False)


In [26]:
score.sort_values('assigned_to').to_csv('Model_score_technician.csv', index = False)

In [33]:
score.sort_values('assigned_to')

Unnamed: 0,assigned_to,mae,model,mse
0,"Bender, Brendt",4.2,Exponential Smoothening,19.0
0,"Bender, Brendt",4.2,Arima,19.0
0,"Bender, Brendt",1.4,Window moving Average,2.296
0,"Blount, Dustin",6.984617,Exponential Smoothening,78.559679
0,"Blount, Dustin",6.984615,Arima,78.559632
0,"Blount, Dustin",4.28,Window moving Average,48.76
0,"Cook, Stephanie",3.536179,Exponential Smoothening,19.687016
0,"Cook, Stephanie",15.046722,Arima,271.81954
0,"Cook, Stephanie",2.84,Window moving Average,15.4
0,"Dakin, Timothy",3.824176,Exponential Smoothening,21.64656


In [32]:
train

Unnamed: 0_level_0,ticket_counts
sys_created_on,Unnamed: 1_level_1
2018-01-14,27
2018-01-21,63
2018-01-28,61
2018-02-04,75
2018-02-11,66
2018-02-18,59
2018-02-25,77
2018-03-04,72
2018-03-11,72
2018-03-18,81
