In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
scada_data = pd.read_csv("C:\\Users\\hanna\\Desktop\\data\\Events.csv", delimiter=',',  parse_dates=True)


In [3]:
scada_data.drop(columns=['timestamp ($ts)_DateTime','payload_quality_String', 'payload_timestamp_DateTime', 'payload_unit_String', 'payload_quantity_String'], inplace=True)
scada_data.columns
scada_data.rename(columns={'timestamp  (UTC+00:00) Local - Europe/London: GMT_DateTime':'timestamp',
                          'apiKey_String': 'turbine', 'measurement_String': 'measurement', 'payload_value_Double' : 'value'}, inplace=True)
scada_data['timestamp'] = pd.to_datetime(scada_data['timestamp'])

In [4]:
wind_speed = scada_data[scada_data['measurement'] == 'Windspeed'][["timestamp","turbine","value"]]
wind_speed.rename(columns={"value": "wind_speed"}, inplace=True)
wind_speed.reset_index(inplace=True, drop=True)
wind_direction = scada_data[scada_data['measurement'] == 'WindDirection'][["timestamp","turbine","value"]]
wind_direction.rename(columns={"value": "wind_direction"}, inplace=True)
wind_direction.reset_index(inplace=True, drop=True)
wind_speed['wind_direction'] = wind_direction['wind_direction']
data = wind_speed.copy()
data

Unnamed: 0,timestamp,turbine,wind_speed,wind_direction
0,2020-12-26 11:55:38.012,nor_var_t01,7.0,170.300003
1,2020-12-26 11:55:38.012,nor_var_t03,7.4,160.300003
2,2020-12-26 11:55:38.012,nor_var_t05,5.7,166.300003
3,2020-12-26 11:55:38.012,nor_var_t06,8.2,170.500000
4,2020-12-26 11:55:38.013,nor_var_t07,9.6,171.600006
...,...,...,...,...
604368,2021-01-25 11:55:10.250,nor_var_t05,1.3,37.400002
604369,2021-01-25 11:55:10.250,nor_var_t06,2.2,33.700001
604370,2021-01-25 11:55:10.250,nor_var_t07,2.2,28.000000
604371,2021-01-25 11:55:10.250,nor_var_t08,1.4,34.299999


In [5]:
data_rs = data.groupby(['timestamp']).mean()
data_rs = data_rs.resample('2min').mean()
data_rs = data_rs.asfreq(pd.infer_freq(data_rs.index))
data_rs

Unnamed: 0_level_0,wind_speed,wind_direction
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-26 11:54:00,7.254167,170.058335
2020-12-26 11:56:00,7.825000,168.118255
2020-12-26 11:58:00,7.050992,166.044046
2020-12-26 12:00:00,7.476667,172.162857
2020-12-26 12:02:00,7.546508,172.160397
...,...,...
2021-01-25 11:46:00,1.961701,84.096259
2021-01-25 11:48:00,1.857143,70.270636
2021-01-25 11:50:00,1.898849,72.101151
2021-01-25 11:52:00,1.898571,68.825953


In [6]:
from statsmodels.tsa.arima_model import ARIMA
import datetime as dt


def predict(history):
    model = ARIMA(history, order=(1,1,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0].flatten()[0]
    return yhat


X = data_rs.iloc[0:500]
size = int(len(X) * 0.05)
train, test = X.iloc[0:size], X.iloc[size:len(X)]
columns = ['wind_speed', 'wind_direction']
history = train
predictions = pd.DataFrame(index=test.index.shift(-1), columns=[x+"Prediction" for x in columns])
times = []
for i, row in test.iterrows():
    start = dt.datetime.now()
    for col_name in columns:
        predictions.loc[i-predictions.index.freq, col_name+'Prediction'] = predict(history[col_name].tail(size).dropna())
        history.loc[i, col_name] = row[col_name]
        end = dt.datetime.now()
    times.append(end-start)




In [7]:
plt.figure(figsize=(20,5))
plt.plot(history['wind_speed'], color='blue', label='real')
plt.plot(predictions['wind_speedPrediction'], color='red', label='prediction')
plt.legend(loc='best')
plt.title('Real vs Predictions')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Real vs Predictions')

In [8]:
from sklearn.metrics import mean_squared_error
combined_df = pd.concat([history,predictions], axis=1).dropna()
error = mean_squared_error(combined_df['wind_speed'], combined_df['wind_speedPrediction'])
print(error)

0.12743045876739165


In [9]:
plt.figure(figsize=(20,5))
plt.plot(history['wind_direction'], color='blue', label='real')
plt.plot(predictions['wind_directionPrediction'], color='red', label='prediction')
plt.legend(loc='best')
plt.title('Real vs Predictions')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Real vs Predictions')

In [10]:
from sklearn.metrics import mean_squared_error
combined_df = pd.concat([history,predictions], axis=1).dropna()
error = mean_squared_error(combined_df['wind_direction'], combined_df['wind_directionPrediction'])
print(error)

1.21629639842871


In [11]:
print((history['wind_speed'] - history['wind_speed'].shift(1)).median())
print((history['wind_direction'] - history['wind_direction'].shift(-1)).mean())

print((predictions['wind_speedPrediction'] - predictions['wind_speedPrediction'].shift(1)).median())
print((predictions['wind_directionPrediction'] - predictions['wind_directionPrediction'].shift(-1)).mean())

0.06515874408540334
0.014127464366026686
0.00526356365779268
0.032806035129279175


In [68]:
from datetime import datetime, timedelta

predictions["vertical_lines"] = predictions.index
predictions["vertical_lines"] = predictions["vertical_lines"].dt.ceil('10min')

history_plot = history[24:70]
predictions_plot = predictions[(predictions.index.minute+2)%10==0][0:9]


print(history_plot.head(5))
print(predictions_plot.head(5))

                     wind_speed  wind_direction
timestamp                                      
2020-12-26 12:42:00    6.458286      178.922572
2020-12-26 12:44:00    7.147917      171.529166
2020-12-26 12:46:00    7.260238      172.902144
2020-12-26 12:48:00    7.102449      174.795170
2020-12-26 12:50:00    7.565714      177.712039
                    wind_speedPrediction wind_directionPrediction  \
timestamp                                                           
2020-12-26 12:48:00              7.09157                  174.762   
2020-12-26 12:58:00              7.74909                  187.137   
2020-12-26 13:08:00              8.70015                  186.267   
2020-12-26 13:18:00              9.36285                  183.415   
2020-12-26 13:28:00              8.76551                  177.985   

                         vertical_lines  
timestamp                                
2020-12-26 12:48:00 2020-12-26 12:50:00  
2020-12-26 12:58:00 2020-12-26 13:00:00  
2020-12-26 1

In [130]:
from datetime import datetime
import matplotlib.dates as mdates

blue_lines = [ i for i in history_plot.index if i.minute % 10 == 0]
red_lines = [ i for i in history_plot.index if (i.minute+4) % 10 == 0]
prediction_ticks = [ i for i in history_plot.index if (i.minute+2) % 10 == 0]
ticks = blue_lines+red_lines+prediction_ticks
l = [i.strftime("%H:%M:%S") if (i in ticks) else "" for i in history_plot.index]

In [132]:

plt.figure(figsize=(12,8))
plt.subplot(2, 1, 1)
plt.plot(history_plot['wind_direction'], color='blue', label='Real data')
#plt.scatter(y=history_plot['wind_direction'], x=history_plot.index, c='blue', marker="o", s=20, label='Real data')

plt.scatter(y=predictions_plot['wind_directionPrediction'], x=predictions_plot.index, c='red', marker="o", s=20, label='Predicted data')

plt.xticks(history_plot.index, rotation='vertical', labels=[])
#plt.axvspan(datetime(2020,12,26,13,44), datetime(2020,12,26,14,6), facecolor='yellow', alpha=0.1, label='Initial training period')
plt.title('Prediction to Decision Timeline')


for d in blue_lines:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Turbine Status Change', c='green')
    

for d in red_lines:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Prediction Made', c='red')

for d in prediction_ticks:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Send command')

plt.ylabel('Wind Direction')

for i, row in history_plot.iterrows():
    if i in prediction_ticks:
        plt.annotate(round(row['wind_direction'], 2), xy=(i, row['wind_direction']+0.4), size=8, c='blue', weight='bold')
        plt.annotate(round(predictions_plot.loc[i]['wind_directionPrediction'], 2), xy=(i, row['wind_direction']+0.7), size=8, c='red', weight='bold')
    if i in blue_lines:
        plt.annotate(round(row['wind_direction'], 2), xy=(i, row['wind_direction']+0.2), size=8, c='blue', weight='bold')


plt.subplot(2, 1, 2)
plt.plot(history_plot['wind_speed'], color='blue', label='Real data')
#plt.scatter(y=history_plot['wind_speed'], x=history_plot.index, c='blue', marker="o", s=20, label='Real data')
plt.scatter(y=predictions_plot['wind_speedPrediction'], x=predictions_plot.index, c='red', marker="o", s=20, label='Predicted data')
plt.xticks(history_plot.index, rotation='vertical', labels=l)
#plt.axvspan(datetime(2020,12,26,13,44), datetime(2020,12,26,14,6), facecolor='yellow', alpha=0.1, label='Initial training period')


plt.rcParams["date.autoformatter.minute"] = "%m-%d %H:%M:%S"

for d in blue_lines:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Turbine Status Change', c='green')

for d in red_lines:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Prediction Made', c='red')

for d in prediction_ticks:
    plt.axvline(x=d, linestyle='--', alpha=0.7, label='Send command')

# for d in predictions_plot.index:
#     plt.axvline(x=d, linestyle='--', alpha=0.7, label='Prediction Made')

plt.xlabel('Time of Day')
plt.ylabel('Wind Speed')

# for i, row in predictions_plot.iterrows():
#     plt.annotate(round(row['wind_speedPrediction'], 2), xy=(i, row['wind_speedPrediction']), size=8, c='red')

for i, row in history_plot.iterrows():
    if i in prediction_ticks:
        plt.annotate(round(row['wind_speed'], 2), xy=(i, row['wind_speed']-0.15), size=8, c='blue', weight='bold')
        plt.annotate(round(predictions_plot.loc[i]['wind_speedPrediction'], 2), xy=(i, row['wind_speed']-0.25), size=8, c='red', weight='bold')
    
    if i in blue_lines:
        plt.annotate(round(row['wind_speed'], 2), xy=(i, row['wind_speed']+0.05), size=8, c='blue', weight='bold')

handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), loc='upper left', bbox_to_anchor=(1, -0.05))
plt.tight_layout()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …