In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('datasets\electricity_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df[df['Date'].dt.year == 2021]
df.reset_index(drop=True, inplace=True)

In [None]:
df.head(3)

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
import math
import statsmodels.api as sm
import statsmodels.tsa.api as smt
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
import matplotlib.pyplot as plt

actual_vals = df.Total_Consumption.values
actual_log = np.log10(actual_vals)

train, test = actual_vals[0:-80], actual_vals[-80:]
train_log, test_log = np.log10(train), np.log10(test)
my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 7)

In [None]:
history = [x for x in train_log]
predictions = list()
predict_log=list()
for t in range(len(test_log)):
    model = sm.tsa.SARIMAX(history, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    predict_log.append(output[0])
    yhat = 10**output[0]
    predictions.append(yhat)
    obs = test_log[t]
    history.append(obs)
   # print('predicted=%f, expected=%f' % (output[0], obs))
#error = math.sqrt(mean_squared_error(test_log, predict_log))
#print('Test rmse: %.3f' % error)
# plot
figsize=(12, 7)
plt.figure(figsize=figsize)
pyplot.plot(test,label='Actuals')
pyplot.plot(predictions, color='red',label='Predicted')
pyplot.legend(loc='upper right')
pyplot.show()

In [None]:
df_preds = df[-80:]
df_preds['Predicted_Consumption'] = predictions

In [None]:
import plotly.graph_objects as go
import plotly.offline as py

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Total_Consumption'],
                    mode='lines',
                    name='lines'))
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Predicted_Consumption'],
                    mode='lines',
                    name='lines'))

In [None]:
new_dates = pd.date_range(start='2021/07/31', end='2021/09/30')

In [None]:
train = df.Total_Consumption.values

my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 62)

model = sm.tsa.SARIMAX(train, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
model_fit = model.fit()
output = model_fit.forecast(62)

In [None]:
listofzeros = [np.NaN] * 62

In [None]:
data_new = {'Date':new_dates,
            'Total_Consumption':listofzeros,
            'Predicted_Consumption':output}

df_new_preds = pd.DataFrame(data_new)

In [None]:
df_all = pd.concat([df_preds, df_new_preds], ignore_index=True)

In [None]:
df_all.head(2)

In [None]:
df_all['MAE'] = df_all['Total_Consumption'] - df_all['Predicted_Consumption']

# Threshold: 5

In [None]:
df_anoms = df_all[df_all['MAE'] >= 5]
df_anoms.reset_index(drop=True, inplace=True)

In [None]:
df_anoms.tail(2)

In [None]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 5

In [None]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [None]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

In [None]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

In [None]:
180/2300 # 8-15%

# Threshold: 10

In [None]:
df_anoms = df_all[df_all['MAE'] >= 10]
df_anoms.reset_index(drop=True, inplace=True)

In [None]:
df_anoms.tail(2)

In [None]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 10

In [None]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [None]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

In [None]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

In [None]:
638/8862 # 8-15%

# Threshold: 12

In [None]:
df_anoms = df_all[df_all['MAE'] >= 12]
df_anoms.reset_index(drop=True, inplace=True)

In [None]:
df_anoms.tail(2)

In [None]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 12

In [None]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [None]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

In [None]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

In [None]:
180/2300 # 8-15%