# ODE - turbine wind speed predictor

https://charlies-organization-9.gitbook.io/charlies-organization/v/wedowind-open-data-exploration/challenges/images-and-media

https://app.gitbook.com/o/WlpzH68zwmW5fTMYLtJp/s/L1RZyE8JUkAQQmXGYjqX/charlies-attempt-s

The first step is to use the data from January to create some wind speed prediction models.

We can then use the February data to make the predictions for the challenge for comparison with others.


In [1]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from sklearn import linear_model 

In [2]:
scada = pd.read_excel("January.xlsx",sheet_name="January", header=[0, 1],index_col=[0])

scada_nogaps = scada.dropna()

windspeed_predictions = None
result_statistics = None

## Straight average (mean)
Taking a straight average (mean) of the wind speed at the other turbines to predict the wind speed at the target turbine.

In [3]:
method_mean = scada["Wind speed (m/s)"].drop(columns=["Kelmarsh 1"]).mean(axis=1)
method_mean.to_csv("method_mean.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_mean)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_mean))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_mean)

print(rms, mae, bias)

windspeed_predictions = method_mean
windspeed_predictions.name = "mean"
windspeed_predictions = pd.DataFrame(windspeed_predictions)

0.8096136869105809 0.5849670873851651 0.03691628609048692


## Straight average (median)
Taking a straight average (median) of the wind speed at the other turbines to predict the wind speed at the target turbine.

In [4]:
method_median = scada["Wind speed (m/s)"].drop(columns=["Kelmarsh 1"]).median(axis=1)
method_median.to_csv("method_median.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_median)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_median))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_median)

print(rms, mae, bias)

windspeed_predictions["median"] = method_median

0.849889805479719 0.5889206010334975 -0.05393827287710014


## Max of other turbines
Taking the max wind speed from the other turbines to predict the wind speed at the target turbine.

In [5]:
method_max = scada["Wind speed (m/s)"].drop(columns=["Kelmarsh 1"]).max(axis=1)
method_max.to_csv("method_max.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_max)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_max))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_max)

print(rms, mae, bias)

windspeed_predictions["max"] = method_max

1.0403927641643507 0.781155356869386 -0.7463764114883302


## Polynomial fit with neighbouring turbine's wind speed
Predicting the wind speed through fitting a polynomial between the wind speed at the target turbine and a neighbouring wind turbine

In [6]:
mymodel = np.poly1d(np.polyfit(scada_nogaps["Wind speed (m/s)"]["Kelmarsh 2"] , scada_nogaps["Wind speed (m/s)"]["Kelmarsh 1"] , 4))
method_poly = mymodel(scada["Wind speed (m/s)"]["Kelmarsh 2"])
method_poly = pd.Series(method_poly,index=scada.index)
method_poly.to_csv("method_poly.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_poly)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_poly))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_poly)

print(rms, mae, bias)  

windspeed_predictions["poly"] = method_poly

0.7785792035039318 0.5653278252056716 0.00038281698381861725


## Multiple linear regression using wind speed
Utilising the wind speed from all remaining turbines to run a multilinear fit to predict the wind speed at the target turbine.

In [7]:
X = scada_nogaps["Wind speed (m/s)"].drop(columns=["Kelmarsh 1"])
y = scada_nogaps["Wind speed (m/s)"]["Kelmarsh 1"]

regr = linear_model.LinearRegression()
regr.fit(X, y) 

method_multilinearWS = regr.predict(scada_nogaps["Wind speed (m/s)"].drop(columns=["Kelmarsh 1"]))
method_multilinearWS = pd.Series(method_multilinearWS,index=scada_nogaps.index)
method_multilinearWS = method_multilinearWS.reindex(scada.index)
method_multilinearWS.to_csv("method_multilinearWS.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinearWS)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinearWS))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinearWS)

print(rms, mae, bias)

windspeed_predictions["multilinearWS"] = method_multilinearWS

0.7336880532242803 0.5212941849162054 -7.028750083957825e-16


## Multiple linear regression using all variables
Utilising the variables from all remaining turbines to run a multilinear fit to predict the wind speed at the target turbine.

In [8]:
column_names = [' '.join(words) for words in scada_nogaps.drop(columns="Kelmarsh 1",level=1).columns]
X = scada_nogaps.drop(columns="Kelmarsh 1",level=1)
X.columns=column_names

y = scada_nogaps["Wind speed (m/s)"]["Kelmarsh 1"]

regr = linear_model.LinearRegression()
regr.fit(X, y)

method_multilinear = regr.predict(X)
method_multilinear = pd.Series(method_multilinear,index=scada_nogaps.index)
method_multilinear = method_multilinear.reindex(scada.index)
method_multilinear.to_csv("method_multilinear.csv")

rms = np.sqrt(np.mean((scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinear)**2))
mae = np.mean(abs(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinear))
bias = np.mean(scada["Wind speed (m/s)"]["Kelmarsh 1"] - method_multilinear)

print(rms, mae, bias)

windspeed_predictions["multilinear"] = method_multilinear

0.7198744571877352 0.5078898355781948 -2.3003182092952884e-16


# Results

In [9]:
windspeed_predictions.to_csv("all_methods.csv")

rms = np.sqrt(np.mean((windspeed_predictions.sub(scada["Wind speed (m/s)"]["Kelmarsh 1"],axis=0))**2,axis=0))
mae = np.mean(abs(windspeed_predictions.sub(scada["Wind speed (m/s)"]["Kelmarsh 1"],axis=0)),axis=0)
bias = np.mean(windspeed_predictions.sub(scada["Wind speed (m/s)"]["Kelmarsh 1"],axis=0),axis=0)

print(pd.DataFrame.from_dict({"RMS":rms,"MAE":mae, "bias":bias}))

                    RMS       MAE          bias
mean           0.809614  0.584967 -3.691629e-02
median         0.849890  0.588921  5.393827e-02
max            1.040393  0.781155  7.463764e-01
poly           0.778579  0.565328 -3.828170e-04
multilinearWS  0.733688  0.521294  7.028750e-16
multilinear    0.719874  0.507890  2.300318e-16
