In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

data = pd.read_excel("WindPark VI.xlsx", index_col = 0, parse_dates = True)

# Multivariate Time Series Forecasting using Support Vector Regression

In [2]:
#a function to clean and prepare the data set
def clean_data(df_):
    
    df = df_.copy()
    
    all_cols = df.columns
    
    #creating a list for each category of variables
    active_power_avg = ['WTG 1-Active power avg [kW]', 'WTG 10-Active power avg [kW]', 
       'WTG 11-Active power avg [kW]', 'WTG 2-Active power avg [kW]', 
       'WTG 3-Active power avg [kW]', 'WTG 4-Active power avg [kW]',
       'WTG 5-Active power avg [kW]', 'WTG 6-Active power avg [kW]',
       'WTG 7-Active power avg [kW]', 'WTG 8-Active power avg [kW]',
       'WTG 9-Active power avg [kW]']

    wind_speed_avg = ['WTG 1-Wind speed avg [m/s]', 'WTG 10-Wind speed avg [m/s]', 
       'WTG 11-Wind speed avg [m/s]', 'WTG 2-Wind speed avg [m/s]', 
       'WTG 3-Wind speed avg [m/s]', 'WTG 4-Wind speed avg [m/s]',
       'WTG 5-Wind speed avg [m/s]', 'WTG 6-Wind speed avg [m/s]',
       'WTG 7-Wind speed avg [m/s]', 'WTG 8-Wind speed avg [m/s]',
       'WTG 9-Wind speed avg [m/s]']

    wind_direction_avg = ['WTG 1-Wind direction avg [°]', 'WTG 10-Wind direction avg [°]', 
       'WTG 11-Wind direction avg [°]', 'WTG 2-Wind direction avg [°]', 
       'WTG 3-Wind direction avg [°]', 'WTG 4-Wind direction avg [°]',
       'WTG 5-Wind direction avg [°]', 'WTG 6-Wind direction avg [°]',
       'WTG 7-Wind direction avg [°]', 'WTG 8-Wind direction avg [°]',
       'WTG 9-Wind direction avg [°]']

    ambient_temperature_avg = ['WTG 1-Ambient temperature avg [°C]', 'WTG 10-Ambient temperature avg [°C]', 
       'WTG 11-Ambient temperature avg [°C]', 'WTG 2-Ambient temperature avg [°C]', 
       'WTG 3-Ambient temperature avg [°C]', 'WTG 4-Ambient temperature avg [°C]',
       'WTG 5-Ambient temperature avg [°C]', 'WTG 6-Ambient temperature avg [°C]',
       'WTG 7-Ambient temperature avg [°C]', 'WTG 8-Ambient temperature avg [°C]',
       'WTG 9-Ambient temperature avg [°C]']
    
    #using the interpolate function to remove null values
    df = df.interpolate()
    
    #setting boundaries for the values of each category of variables, with the clip function
    df[active_power_avg] = df[active_power_avg].clip(0, 850)
    df[wind_speed_avg] = df[wind_speed_avg].clip(0, 30)
    df[wind_direction_avg] = df[wind_direction_avg].clip(0, 360)
    df[ambient_temperature_avg] = df[ambient_temperature_avg].clip(-10, 40)
    
    #creating new columns with the sum or average, depending on the category of variables
    df["Total Active power avg [kW]"] = df[active_power_avg].sum(axis = 1)
    df["Wind speed avg [m/s]"] = df[wind_speed_avg].mean(axis = 1)
    df["Ambient temperature avg [°C]"] = df[ambient_temperature_avg].mean(axis = 1)
    df["Wind direction avg [°]"] = df[wind_direction_avg].mean(axis = 1)
    
    #dropping the original columns
    df.drop(all_cols, axis = 1, inplace = True)
    
    #setting an hourly frequency to the time series
    df = df.resample('H').mean()
    
    return df

In [3]:
data = clean_data(data)
data.head()

Unnamed: 0_level_0,Total Active power avg [kW],Wind speed avg [m/s],Ambient temperature avg [°C],Wind direction avg [°]
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01 00:00:00,1372.666667,5.936364,-4.060606,32.151515
2016-01-01 01:00:00,664.116667,4.743939,-4.045455,26.448485
2016-01-01 02:00:00,13.583333,2.871212,-3.757576,67.734848
2016-01-01 03:00:00,176.716667,3.675758,-3.939394,60.374242
2016-01-01 04:00:00,64.116667,3.280303,-3.863636,69.712121


First of all, we clean the data set, using the `clean_data()` function, which is based on what we did on the Exploratory Data Analysis part of the project.

In [4]:
def time_lags(df_, steps):
    
    df = df_.copy()
    cols = df.columns
        
    for col in cols:
        for i in range(1, steps + 1):
            df[col + f" (t-{str(i)})"] = df[col].shift(i)
            
    df.dropna(inplace = True)        
            
    return df

In [261]:
cols = ["Wind speed avg [m/s]", "Ambient temperature avg [°C]",
        "Wind direction avg [°]"]

lags = 5

var_model = {}
data1 = {}
train = {}
test = {}

train_x = {}
train_y = {}
test_x = {}
test_y = {}

#creating univariate series forecasts for each independent variable,
#to use their predictions as input for the multivariate series forecast
for col in cols:
    data1[col] = data.loc[:, [col]]
    data1[col] = time_lags(data1[col], lags)
    train[col], test[col] = data1[col]["2016":"2017"], data1[col]["2018"]
    
    train_x[col] = train[col].iloc[:, 1:].values
    train_y[col] = train[col].iloc[:, 0].values
    
    test_x[col] = test[col].iloc[:, 1:].values
    test_y[col] = test[col].iloc[:, 0].values
    
    var_model[col] = LinearRegression()
    var_model[col].fit(train_x[col], train_y[col])
    
    
data_power = time_lags(data, lags)
train_power, test_power = data_power["2016":"2017"], data_power["2018"]

train_x_power = train_power.iloc[:, 1:].values
train_y_power = train_power.iloc[:, 0].values
    
test_x_power = test_power.iloc[:, 1:].values
test_y_power = test_power.iloc[:, 0].values
    
var_model_power = LinearRegression()
var_model_power.fit(train_x_power, train_y_power)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [257]:
steps = 12

X = np.array([[3319.900000, 2295.800000, 1091.416667, 565.950000, 294.016667]])
p = np.array([])

for i in range(0, steps):
    prediction = model.predict(X)
    p = np.append(p, prediction)
    X = np.delete(X, -1)
    X = np.insert(X, 0, prediction)
    X = np.reshape(X, (1, -1))

actual = data1.iloc[16:28, 0].values
print(np.sqrt(mean_squared_error(actual, p)))

AttributeError: 'dict' object has no attribute 'predict'