# notebook to run current models

In [1]:
import os
import numpy as np
import pandas as pd

In [5]:
df = pd.read_csv(os.path.join('.', 'Aggregated Data.csv'))
df.head()

Unnamed: 0,Time_Code,Year,Month,LSCA,OA,SCA,TRA,Total
0,1,2017,10,0,1,1,0,2
1,2,2017,11,0,1,0,0,1
2,3,2017,12,0,1,1,0,2
3,4,2018,1,0,3,1,0,4
4,5,2018,2,0,0,1,0,1


### add a total_assessment column to be consistent with farzad

In [6]:
df['Total_Assessment'] = df['Total']
df.head()

Unnamed: 0,Time_Code,Year,Month,LSCA,OA,SCA,TRA,Total,Total_Assessment
0,1,2017,10,0,1,1,0,2,2
1,2,2017,11,0,1,0,0,1,1
2,3,2017,12,0,1,1,0,2,2
3,4,2018,1,0,3,1,0,4,4
4,5,2018,2,0,0,1,0,1,1


# Linear Regression

In [7]:
X = df.Time_Code.values
y = (df['Total_Assessment']).values
X = X.reshape(-1,1)

In [8]:
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
reg_all = linear_model.LinearRegression()
reg_all.fit(X_train, y_train)
print('-------------------------------------------')
from sklearn import metrics 
print('Root Mean Squared Error-Test:', np.sqrt(metrics.mean_squared_error(y_test, reg_all.predict(X_test))))
mse = metrics.mean_squared_error(y_test, reg_all.predict(X_test))
print('Mean Squared Error-Test:',mse)

-------------------------------------------
Root Mean Squared Error-Test: 1.9643992648584025
Mean Squared Error-Test: 3.8588644717762324


In [12]:
# fit final model (Predictions-LR)
from sklearn import linear_model
X = df.Time_Code.values
y = (df.Total_Assessment).values
X = X.reshape(-1,1)
model = linear_model.LinearRegression()

model.fit(X, y)
Xnew = [47,48,49,50,51]
ynew = model.predict(X)
for i in range(len(Xnew)):
    print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=47, Predicted=1.0851063829787244
X=48, Predicted=1.2339808818994766
X=49, Predicted=1.382855380820229
X=50, Predicted=1.5317298797409813
X=51, Predicted=1.6806043786617337


In [14]:
df.tail()

Unnamed: 0,Time_Code,Year,Month,LSCA,OA,SCA,TRA,Total,Total_Assessment
41,42,2021,3,1,1,5,0,7,7
42,43,2021,4,3,3,3,0,9,9
43,44,2021,5,0,0,4,0,4,4
44,45,2021,6,0,0,5,0,7,7
45,46,2021,7,1,8,1,0,10,10


# Arima

In [15]:
from statsmodels.tsa.arima.model import ARIMA

In [16]:
# split into train and test sets
X = df.Total_Assessment.values
size = int(len(X) * 0.7)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]

In [18]:
predictions = list()
# walk-forward validation
for t in range(len(test)):
    model = ARIMA(history, order=(1,1,1))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))

predicted=5.531750, expected=11.000000
predicted=7.968834, expected=5.000000
predicted=7.071921, expected=5.000000
predicted=6.064003, expected=5.000000
predicted=5.724131, expected=6.000000
predicted=5.837680, expected=7.000000
predicted=6.192300, expected=7.000000
predicted=6.419885, expected=7.000000
predicted=6.584250, expected=9.000000
predicted=7.394523, expected=7.000000
predicted=7.190952, expected=9.000000
predicted=7.822318, expected=4.000000
predicted=6.698219, expected=7.000000
predicted=6.739981, expected=10.000000


In [19]:
# evaluate forecasts
mse = np.sqrt(metrics.mean_squared_error(test, predictions))
print('Test MSE: %.3f' % mse)


Test MSE: 2.408


In [20]:
# create a differenced series (Prediction-ARIMA)
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return np.array(diff)

In [21]:
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

In [22]:
# seasonal difference
X = df.Total_Assessment.values
Months_in_year = 12
differenced = difference(X, Months_in_year)

# fit model
model = ARIMA(differenced, order=(1,1,1))
model_fit = model.fit()

# multi-step out-of-sample forecast
forecast = model_fit.forecast(steps=5)

# invert the differenced forecast to something usable
history = [x for x in X]
month = 1
for yhat in forecast:
    inverted = inverse_difference(history, yhat, Months_in_year)
    print('month %d: %f' % (month, inverted))
    history.append(inverted)
    month += 1

month 1: 6.669962
month 2: 6.679078
month 3: 7.679053
month 4: 8.679053
month 5: 8.679053
