In [1]:
import numpy as np
import pandas as pd
import math

import statsmodels.api as sm
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
%matplotlib widget
import itertools
import warnings
warnings.filterwarnings('ignore')

In [2]:
colab_path = 'ScadaData.txt'
local_path = 'C:\\Users\\hanna\\Desktop\\data\\ScadaData.txt'

scada_data = pd.read_csv(local_path, delimiter='\t',  parse_dates=True)
scada_data['dtTimeStamp'] = pd.to_datetime(scada_data['dtTimeStamp'])
average_mean_wind_speeds = scada_data[['dtTimeStamp','WindSpeedMean', 'intObjectId']].groupby('dtTimeStamp').mean()[['WindSpeedMean']]
average_mean_wind_direction = scada_data[['dtTimeStamp','WindDirectionMean', 'intObjectId']].groupby('dtTimeStamp').mean()[['WindDirectionMean']]
data = pd.concat([average_mean_wind_speeds,average_mean_wind_direction], axis=1)
data = data.asfreq(pd.infer_freq(data.index))

In [3]:
from statsmodels.tsa.arima_model import ARIMA
import datetime as dt


def predict(history):
    model = ARIMA(history, order=(1,1,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0].flatten()[0]
    return yhat


X = data.iloc[0:500]
size = int(len(X) * 0.05)
train, test = X.iloc[0:size], X.iloc[size:len(X)]
columns = ['WindDirectionMean', 'WindSpeedMean']
history = train
predictions = pd.DataFrame(index=test.index.shift(-1), columns=[x+"Prediction" for x in columns])
times = []
for i, row in test.iterrows():
    start = dt.datetime.now()
    for col_name in columns:
        predictions.loc[i-predictions.index.freq, col_name+'Prediction'] = predict(history[col_name].dropna())
        history.loc[i, col_name] = row[col_name]
        end = dt.datetime.now()
    times.append(end-start)


In [4]:
predictions['time'] = times
predictions["vertical_lines"] = predictions.index.shift(-1) + predictions['time']
print(predictions.head(5))
history_plot = history[0:60]
predictions_plot = predictions[0:36]

                    WindDirectionMeanPrediction WindSpeedMeanPrediction  \
dtTimeStamp                                                               
2019-08-01 04:00:00                      83.075                 5.58516   
2019-08-01 04:10:00                     83.2096                 5.44098   
2019-08-01 04:20:00                     86.4926                 5.37849   
2019-08-01 04:30:00                     88.4138                 5.46719   
2019-08-01 04:40:00                     89.2418                 5.58845   

                                      time             vertical_lines  
dtTimeStamp                                                            
2019-08-01 04:00:00 0 days 00:00:00.193499 2019-08-01 03:50:00.193499  
2019-08-01 04:10:00 0 days 00:00:00.069863 2019-08-01 04:00:00.069863  
2019-08-01 04:20:00 0 days 00:00:00.065071 2019-08-01 04:10:00.065071  
2019-08-01 04:30:00 0 days 00:00:00.051406 2019-08-01 04:20:00.051406  
2019-08-01 04:40:00 0 days 00:00:00.070108

In [15]:
from datetime import datetime
import matplotlib.dates as mdates

plt.figure(figsize=(20,7))
plt.subplot(2, 1, 1)
plt.plot(history_plot['WindDirectionMean'], color='blue', label='Real data')
plt.scatter(y=predictions_plot['WindDirectionMeanPrediction'], x=predictions_plot.index, c='red', marker="o", s=6, label='Predicted data')

plt.xticks(history_plot.index, labels=[])
plt.axvspan(datetime(2019,8,1,0), datetime(2019,8,1,3,50), facecolor='yellow', alpha=0.1, label='Initial training period')
plt.title('Model Prediction Timeline')
plt.grid()

for d in predictions_plot["vertical_lines"]:
    plt.axvline(x=d, linestyle='--', alpha=0.5, label='Prediction made for next period')

plt.ylabel('Wind Speed')
    
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), loc='best')

for i in predictions_plot['vertical_lines']:
    plt.text(i, 68, i.strftime("%S.%f")[:-3]+'s', rotation=90, verticalalignment='center', alpha=0.7)
plt.show()

plt.subplot(2, 1, 2)
plt.plot(history_plot['WindSpeedMean'], color='blue', label='Real data')
plt.scatter(y=predictions_plot['WindSpeedMeanPrediction'], x=predictions_plot.index, c='red', marker="o", s=6, label='Predicted data')
plt.xticks(history_plot.index, rotation='vertical', labels=[ i.strftime("%H:%M:%S") for i in history_plot.index ])
plt.axvspan(datetime(2019,8,1,0), datetime(2019,8,1,3,50), facecolor='yellow', alpha=0.1, label='Initial training period')
plt.grid()

plt.rcParams["date.autoformatter.minute"] = "%m-%d %H:%M:%S"

for d in predictions_plot["vertical_lines"]:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Prediction made for next 10 period')
    
for i in predictions_plot['vertical_lines']:
    plt.text(i, 3, i.strftime("%S.%f")[:-3]+'s', rotation=90, verticalalignment='center', alpha=0.7)

plt.xlabel('Time')
plt.ylabel('Wind Direction')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous â€¦