# Time Series Forecasting Using ARIMA For Gas Levels In Mines

In [1]:
import pandas as pd
from pandas import Series
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
from math import sqrt
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
from matplotlib import style

## Declaring the Prediction & Difference Functions

In [2]:
# Predict function takes in two args, the coefficient of the AR part or the coefficient of the MA part and the previous values
# or the training dataset
def predict(coeff, o_val):
    y_t = 0.0
    for i in range(1, len(coeff)+1):
        y_t += coeff[i-1] * o_val[-i]
    return y_t

# Difference function takes in the data set and computes the difference betweeen the elements of the column so as to make the 
# data stationary as the ARIMA function relies on the data being stationary.
def difference(data):
    diff = list()
    for i in range(1, len(data)):
        val = data[i] - data[i - 1]
        diff.append(val)
    return np.array(diff)

## ARIMA prediction model

In [None]:
#Import CH4 Data
ch4 = pd.read_csv('ch4.csv',index_col=0)

#Store the timestamps in time_index
time_index = ch4.index

series = pd.Series(ch4['CH4_ppm'])
gas_level = series.values

# Determine the size of the training dataset
size = len(gas_level) - 400

# Set the prediction times
time_pred = (pd.date_range(time_index[size], periods=len(gas_level)-size, freq="5S")).time

train, test = gas_level[0:size], gas_level[size:] # Seperates the training and test datasets
o_val = [x for x in train]
predictions = list()
ppm = list()

#This loop implements the ARIMA algorithm with the arguments p(AR coeff),d(difference),q(MA coeff) values as 1,1,1
for t in range(len(test)):
    model = SARIMAX(o_val, order=(1,1,1))
    model_fit = model.fit(trend='nc', disp=False)
    #model_fit_conf_int = model_fit.get_forecast(steps=len(X)-size).conf_int()
    ar_coef, ma_coef = model_fit.arparams, model_fit.maparams  # Assign the weights of the AR and MA models
    resid = model_fit.resid  #Calculating residues
    diff = difference(o_val)
    y_t = o_val[-1] + predict(ar_coef, diff) + predict(ma_coef, resid) 
    predictions.append(y_t)
    obs = test[t]
    ppm.append(obs)
    o_val.append(obs)
    print('predicted_gas_value=%.4f, actual_gas_value=%.4f' % (y_t, obs))

dict = {'time':time_pred, 'CH4_ppm':ppm}
rmse = sqrt(mean_squared_error(test, predictions)) # Root Mean Square Error
print('Test RMSE: %.3f' % rmse)
pred_data = pd.DataFrame(dict)
pred_data.set_index('time',inplace=True)


ch4.index = pd.DatetimeIndex(ch4.index.values, freq='5S')
ch4.index = ch4.index.time

## Plotting Predicted vs Actual Data

In [None]:
plt.figure(figsize=(20,10))

# Plot past CH4 levels
plt.plot(ch4.index.values, ch4.values, label='past')

# Plot the prediction means as line
plt.plot(data.index, data['CH4_ppm'] , label='predicted')

plt.gcf().autofmt_xdate()

# Save the recorded prediction data to csv file.
#data.to_csv('ch4_pred.csv',  float_format='%g')

# Plot legend and show figure

plt.legend()
plt.show()