In [None]:
!pip install pmdarima

In [None]:
import numpy as np
import math
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [None]:
job_path = '/gdrive/My Drive/Colab_Dataset/job_df.csv'
job_df = pd.read_csv(job_path, index_col=[0])

In [None]:
jobs_path = '/gdrive/My Drive/Colab_Dataset/Done and Exit Jobs DF/jobs_energy_use.csv'
jobs_df = pd.read_csv(jobs_path, index_col=[0])

In [None]:
jobs_df.index = pd.to_datetime(jobs_df.index)

In [None]:
jobs_df.info()

# **Time Series Decomposition**

In [None]:
result=seasonal_decompose(jobs_df['Active Resource Consumption Energy'], model='additive', freq = 4)
plt.rcParams["figure.figsize"] = (14,7)
print('                           Active Resource Consumption Energy - Time Series Decomposition')
result.plot()
plt.show()
#plt.suptitle('Resource Utilization Time Series Decomposition')

In [None]:
result=seasonal_decompose(jobs_df['Energy Waste'], model='additive', freq = 4)
plt.rcParams["figure.figsize"] = (14,7)
print('                                         Energy Waste - Time Series Decomposition')
result.plot()
plt.show()
#plt.suptitle('Resource Utilization Time Series Decomposition')

In [None]:
from matplotlib import pyplot
from pylab import rcParams
import matplotlib.pyplot as plt


rcParams['figure.figsize'] = 12, 7
jobs_df[[ 'Energy Waste']].plot()
plt.title('Energy Waste during Job Execution')
plt.legend(loc='upper right')
plt.xlabel('Time (Monthly)')
plt.ylabel('Values')

In [None]:
from matplotlib import pyplot
from pylab import rcParams
import matplotlib.pyplot as plt

rcParams['figure.figsize'] = 12, 7
jobs_df[[ 'Active Resource Consumption Energy']].plot()
plt.title('Active Resource Consumption Energy during Jobs Execution')
plt.legend(loc='upper right')
plt.xlabel('Time (Monthly)')
plt.ylabel('Values')

# **ARIMA Modelling for Energy Consumption by Active Jobs**




In [None]:
jobs_df.rename(columns={'Active Resource Consumption Energy': 'Active_Resource_Consumption_Energy'}, inplace=True)


In [None]:
plt.figure(figsize=[15, 7.5]); # Set dimensions for figure
plt.plot(jobs_df['Active Resource Consumption Energy'])
plt.title('Active Resource Consumption Energy')
plt.ylabel('Joules')
plt.xlabel('Timeline')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm_notebook

In [None]:
plot_pacf(jobs_df['Active Resource Consumption Energy']);
plot_acf(jobs_df['Active Resource Consumption Energy']);

In [None]:
ad_fuller_result = adfuller(jobs_df['Active Resource Consumption Energy'])
print(f'ADF Statistic: {ad_fuller_result[0]}')
print(f'p-value: {ad_fuller_result[1]}')

In [None]:
def optimize_SARIMA(parameters_list, d, D, s, exog):
    """
        Return dataframe with parameters, corresponding AIC and SSE
        
        parameters_list - list with (p, q, P, Q) tuples
        d - integration order
        D - seasonal integration order
        s - length of season
        exog - the exogenous variable
    """
    
    results = []
    
    for param in tqdm_notebook(parameters_list):
        try: 
            model = SARIMAX(exog, order=(param[0], d, param[1]), seasonal_order=(param[2], D, param[3], s)).fit(disp=-1)
        except:
            continue
            
        aic = model.aic
        results.append([param, aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)x(P,Q)', 'AIC']
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [None]:
best_model = SARIMAX(jobs_df['Active Resource Consumption Energy'], order=(0, 1, 2), seasonal_order=(0, 1, 2, 4)).fit(dis=-1)
print(best_model.summary())

In [None]:
best_model.plot_diagnostics(figsize=(15,12));

In [None]:
jobs_df['arima_model'] = best_model.fittedvalues
#jobs_df['arima_model'][:4+1] = np.NaN
forecast = best_model.predict(start=jobs_df.shape[0], end=jobs_df.shape[0] + 8)
forecast = jobs_df['arima_model'].append(forecast)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast, color='r', label='model')
plt.axvspan(jobs_df.index[-1], forecast.index[-1], alpha=0.5, color='lightgrey')
plt.plot(jobs_df['Active Resource Consumption Energy'], label='actual')
plt.legend()
plt.title('Active Resource Consumption Energy Forecast')
plt.xlabel('Timeline')
plt.show()

In [None]:
jobs_df['arima_model']

In [None]:
from sklearn import metrics

print(metrics.mean_absolute_error(jobs_df['Active Resource Consumption Energy'], jobs_df['arima_model']))

print("\n", metrics.mean_squared_error(jobs_df['Active Resource Consumption Energy'], jobs_df['arima_model']))

print("\n", np.sqrt(metrics.mean_squared_error(jobs_df['Active Resource Consumption Energy'], jobs_df['arima_model'])))


# **ARIMA Modelling for Energy Waste by Exit Jobs**

In [None]:
#jobs_df.rename(columns={'Energy Waste': 'Active_Resource_Consumption_Energy'}, inplace=True)
data = jobs_df[['Energy Waste']]

In [None]:
plt.figure(figsize=[15, 7.5]); # Set dimensions for figure
plt.plot(data['Energy Waste'])
plt.title('Energy Waste')
plt.ylabel('')
plt.xlabel('Timeline')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm_notebook

In [None]:
plot_pacf(data['Energy Waste']);
plot_acf(data['Energy Waste']);

In [None]:
ad_fuller_result = adfuller(data['Energy Waste'])
print(f'ADF Statistic: {ad_fuller_result[0]}')
print(f'p-value: {ad_fuller_result[1]}')

In [None]:
data['Energy Waste'] = np.log(data['Energy Waste'])
data['Energy Waste'] = data['Energy Waste'].diff()
data = data.drop(data.index[0])


In [None]:
plt.figure(figsize=[15, 7.5]); # Set dimensions for figure
plt.plot(data['Energy Waste'] )
plt.title("Log Difference of Quarterly EPS for Johnson & Johnson")
plt.show()

In [None]:
data['Energy Waste'] = data['Energy Waste'].diff(4)


In [None]:
plt.figure(figsize=[15, 7.5]); # Set dimensions for figure
plt.plot(data['Energy Waste'] )
plt.title("Log Difference of Quarterly EPS for Johnson & Johnson")
plt.show()

In [None]:
def optimize_SARIMA(parameters_list, d, D, s, exog):
    """
        Return dataframe with parameters, corresponding AIC and SSE
        
        parameters_list - list with (p, q, P, Q) tuples
        d - integration order
        D - seasonal integration order
        s - length of season
        exog - the exogenous variable
    """
    
    results = []
    
    for param in tqdm_notebook(parameters_list):
        try: 
            model = SARIMAX(exog, order=(param[0], d, param[1]), seasonal_order=(param[2], D, param[3], s)).fit(disp=-1)
        except:
            continue
            
        aic = model.aic
        results.append([param, aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)x(P,Q)', 'AIC']
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [None]:
best_model = SARIMAX(data['Energy Waste'], order=(0, 1, 2), seasonal_order=(0, 1, 2, 4)).fit(dis=-1) 
print(best_model.summary())

In [None]:
best_model.plot_diagnostics(figsize=(15,12));

In [None]:
data['arima_model'] = best_model.fittedvalues
#data['arima_model'][:4+1] = np.NaN
forecast = best_model.predict(start=data.shape[0], end=data.shape[0] + 8)
forecast = data['arima_model'].append(forecast)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast, color='r', label='model')
plt.axvspan(data.index[-1], forecast.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data['Energy Waste'], label='actual')
plt.legend()
plt.title('Energy Waste Forecast')
plt.xlabel('Timeline')
plt.show()

In [None]:
from sklearn import metrics



print(metrics.mean_absolute_error(data['Energy Waste'], data['arima_model']))

print("\n", metrics.mean_squared_error(data['Energy Waste'], data['arima_model']))

print("\n", np.sqrt(metrics.mean_squared_error(data['Energy Waste'], data['arima_model'])))