# Task-2: Wind Power Forecasting With Multiple Targets
Author: Karan Singh

In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
labels = pd.read_parquet(r'../Case_Interview_-_Multitarget_Power_Forecasting/Case Interview - Multitarget Power Forecasting/data/labels.parquet', engine='pyarrow')
weather = pd.read_parquet(r'../Case_Interview_-_Multitarget_Power_Forecasting/Case Interview - Multitarget Power Forecasting/data/weather_forecast.parquet', engine='pyarrow')

In [3]:
# Ensure that the labels are in 15 minute resolution
labels_index = pd.date_range(start=labels.index.min(), end=labels.index.max(), freq='15min')
labels.index = labels_index

In [4]:
# Add features -> part of day & Month
# Could temperature play a part? Is it cooler in the evenings/night?  
# Can the months represent seasons?    

conditions = [(0 <= labels.index.hour) & (labels.index.hour < 6), 
            (6 <= labels.index.hour) & (labels.index.hour < 12), 
            (12 <= labels.index.hour) & (labels.index.hour < 18), 
            (18 <= labels.index.hour) & (labels.index.hour < 24)]
choices = [0, 1, 2, 3]
labels['Day_part'] = np.select(condlist=conditions, choicelist=choices, default=np.nan)
labels['Month'] = labels.index.month
labels = labels[['Day_part', 'Month', 'power_1', 'power_2', 'power_3']]

In [5]:
labels.head()

Unnamed: 0,Day_part,Month,power_1,power_2,power_3
2019-01-01 00:00:00,0.0,1,48637.0,51637.0,54637.0
2019-01-01 00:15:00,0.0,1,52357.0,55357.0,58357.0
2019-01-01 00:30:00,0.0,1,54317.0,57317.0,60317.0
2019-01-01 00:45:00,0.0,1,54220.5,57220.5,60220.5
2019-01-01 01:00:00,0.0,1,51680.0,54680.0,57680.0


In [6]:
weather.head()

Unnamed: 0,Generation_Date,Forecast_Date,U,V,ws,Direction
1,2019-06-25,2019-06-25 01:00:00,1.861707,0.195582,1.871952,264.002783
2,2019-06-25,2019-06-25 02:00:00,1.257695,0.570511,1.381043,245.600179
3,2019-06-25,2019-06-25 03:00:00,1.883554,0.701067,2.009794,249.584514
4,2019-06-25,2019-06-25 04:00:00,2.664914,0.169917,2.670325,266.351713
5,2019-06-25,2019-06-25 05:00:00,2.855113,-0.255226,2.866498,275.108242


In [7]:
# Drop Generation_Date and make Forecast_Date the index 
weather = weather.drop('Generation_Date', axis=1).set_index('Forecast_Date')

In [8]:
weather.head()

Unnamed: 0_level_0,U,V,ws,Direction
Forecast_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-06-25 01:00:00,1.861707,0.195582,1.871952,264.002783
2019-06-25 02:00:00,1.257695,0.570511,1.381043,245.600179
2019-06-25 03:00:00,1.883554,0.701067,2.009794,249.584514
2019-06-25 04:00:00,2.664914,0.169917,2.670325,266.351713
2019-06-25 05:00:00,2.855113,-0.255226,2.866498,275.108242


In [9]:
# Merge the labels and weather data
data = pd.merge(left=labels, right=weather, left_index=True, right_index=True, how='left')

In [10]:
data.head()

Unnamed: 0,Day_part,Month,power_1,power_2,power_3,U,V,ws,Direction
2019-01-01 00:00:00,0.0,1,48637.0,51637.0,54637.0,,,,
2019-01-01 00:15:00,0.0,1,52357.0,55357.0,58357.0,,,,
2019-01-01 00:30:00,0.0,1,54317.0,57317.0,60317.0,,,,
2019-01-01 00:45:00,0.0,1,54220.5,57220.5,60220.5,,,,
2019-01-01 01:00:00,0.0,1,51680.0,54680.0,57680.0,,,,


In [11]:
# Keep values from the start date of weather 
data = data[np.where(data.index == weather.index.min())[0][0]:]

In [12]:
# Fill the hourly values forward to quarter hour 
data['U'] = data['U'].fillna(method='ffill')
data['V'] = data['V'].fillna(method='ffill')
data['ws'] = data['ws'].fillna(method='ffill')
data['Direction'] = data['Direction'].fillna(method='ffill')

In [13]:
# Rearrange so that power is the last column
data = data[['Day_part', 'Month', 'U', 'V', 'ws', 'Direction', 'power_1','power_2','power_3']]

In [14]:
data.head()

Unnamed: 0,Day_part,Month,U,V,ws,Direction,power_1,power_2,power_3
2019-06-25 01:00:00,0.0,6,1.861707,0.195582,1.871952,264.002783,4686.2,7686.2,10686.2
2019-06-25 01:15:00,0.0,6,1.861707,0.195582,1.871952,264.002783,4047.9,7047.9,10047.9
2019-06-25 01:30:00,0.0,6,1.861707,0.195582,1.871952,264.002783,3251.0,6251.0,9251.0
2019-06-25 01:45:00,0.0,6,1.861707,0.195582,1.871952,264.002783,2244.5,5244.5,8244.5
2019-06-25 02:00:00,0.0,6,1.257695,0.570511,1.381043,245.600179,2224.6,5224.6,8224.6


#### Use Random Forest Regression to impute missing power values

In [15]:
missing_powers_mask = (data['power_1'].isnull()) & (data['power_2'].isnull()) & (data['power_3'].isnull())   
data.loc[missing_powers_mask].shape[0]

341

In [16]:
powers_present_mask = (data['power_1'].notnull()) & (data['power_2'].notnull()) & (data['power_3'].notnull())
data[powers_present_mask]
rf_inputs = data[powers_present_mask].drop(['power_1', 'power_2', 'power_3'], axis=1)
rf_labels = data[powers_present_mask][['power_1','power_2', 'power_3']]

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

In [18]:
X_train, X_test, y_train, y_test = train_test_split(rf_inputs, rf_labels, test_size=0.20)

In [19]:
regr = MultiOutputRegressor(RandomForestRegressor(n_estimators=50))
regr.fit(X_train, y_train)
regr.score(X_test, y_test)

0.9702429823830423

In [20]:
# Get the values that are missing
pred_data = data.loc[missing_powers_mask].drop(['power_1', 'power_2', 'power_3'], axis=1)

In [21]:
predictions = regr.predict(pred_data)

In [22]:
for ind, pred in zip(data[missing_powers_mask].index, predictions):
    data.loc[ind,'power_1'] = pred[0]
    data.loc[ind,'power_2'] = pred[1]
    data.loc[ind,'power_3'] = pred[2]

In [23]:
data.isna().sum()

Day_part     0
Month        0
U            0
V            0
ws           0
Direction    0
power_1      0
power_2      0
power_3      0
dtype: int64

### Model Building

In [24]:
np.set_printoptions(suppress=True)

In [25]:
# Date to split the training and testing
index_1Aug2020 = np.where(data.index == '2020-08-01 00:00:00')[0][0] 

training_data = data[:index_1Aug2020] 
testing_data = data[index_1Aug2020:]

In [26]:
# Ensure that training data starts at 00:15:00 and ends at 00:00:00
training_data = training_data.loc['2019-06-26 00:15:00':'2020-07-30 00:00:00']

In [27]:
# Remove the labels for X
training_X = training_data.iloc[:,:6].values

In [28]:
# Split the training data into chunks of 96
X = np.array(np.split(training_X, training_X.shape[0]//96))
X.shape

(400, 96, 6)

In [29]:
training_data.head()

Unnamed: 0,Day_part,Month,U,V,ws,Direction,power_1,power_2,power_3
2019-06-26 00:15:00,0.0,6,2.202267,1.753324,2.814982,231.475115,7021.0,10021.0,13021.0
2019-06-26 00:30:00,0.0,6,2.202267,1.753324,2.814982,231.475115,6050.6,9050.6,12050.6
2019-06-26 00:45:00,0.0,6,2.202267,1.753324,2.814982,231.475115,7160.7,10160.7,13160.7
2019-06-26 01:00:00,0.0,6,1.593115,0.233521,1.610139,261.660883,6447.0,9447.0,12447.0
2019-06-26 01:15:00,0.0,6,1.593115,0.233521,1.610139,261.660883,5422.2,8422.2,11422.2


In [30]:
# Assign power_1 to y_1 and split it into 96 daily values 
y_1 = training_data['power_1'].values
y_1 = np.array(np.split(y_1, y_1.shape[0]/96))
y_1.shape

(400, 96)

In [31]:
# Assign power_2 to y_2 and split it into 96 daily values 
y_2 = training_data['power_2'].values
y_2 = np.array(np.split(y_2, y_2.shape[0]/96))
y_2.shape

(400, 96)

In [32]:
# Assign power_3 to y_3 and split it into 96 daily values 
y_3 = training_data['power_3'].values
y_3 = np.array(np.split(y_3, y_3.shape[0]/96))
y_3.shape

(400, 96)

In [33]:
# Flatten X
n_input = X.shape[1] * X.shape[2]
X = X.reshape((X.shape[0], n_input))
X.shape 

(400, 576)

In [34]:
# Set the output dimension
n_output = y_1.shape[1]

In [35]:
# Train test split 20%
X_train = X[:320]
X_test = X[320:]

y_train_1 = y_1[:320]
y_test_1 = y_1[320:]

y_train_2 = y_2[:320]
y_test_2 = y_2[320:]

y_train_3 = y_3[:320]
y_test_3 = y_3[320:]

In [36]:
from sklearn.preprocessing import MinMaxScaler
sc_X = MinMaxScaler(feature_range=(0, 1))

In [37]:
# Normalize the data
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [38]:
def norm_mean_abs_error(y_true, y_pred):
    ''' Custom loss function '''
    return 100*((np.sum(np.abs(y_true - y_pred))/y_true.size)/106400)

In [39]:
from keras.models import Sequential
from keras.layers import Dense
import keras
from keras import layers
import tensorflow as tf
from keras.layers import Layer

## Custom layer

In [40]:
class FiLMLayer(layers.Layer):
    def __init__(self, gamma, beta, switch):
      super(FiLMLayer, self).__init__()

      self.gamma = gamma
      self.beta = beta
      self.switch = switch

    def call(self, input):
      if self.switch == 1:
        return tf.math.multiply(self.gamma[0], input) + self.beta[0]
      elif self.switch == 2:
        return tf.math.multiply(self.gamma[1], input) + self.beta[1]
      else: 
        return tf.math.multiply(self.gamma[2], input) + self.beta[2]
      

In [41]:
class MLPBlock(Sequential):
    def __init__(self):
        super(MLPBlock, self).__init__()
        self.switch = tf.Variable(1)
        self.gamma = tf.Variable(initial_value=tf.random.uniform(shape=(3,200), minval=0, maxval=1, dtype=tf.dtypes.float32), trainable=True) 
        self.beta = tf.Variable(initial_value=tf.random.uniform(shape=(3,200), minval=0, maxval=1, dtype=tf.dtypes.float32), trainable=True)

In [42]:
mlp = MLPBlock()

In [43]:
mlp.add(Dense(200, activation='relu', input_dim=n_input, name="Dense1"))
mlp.add(FiLMLayer(mlp.gamma, mlp.beta, mlp.switch))
mlp.add(Dense(200, activation='relu', input_dim=n_input, name="Dense2"))
mlp.add(Dense(n_output))

In [44]:
# y = mlp(X_train)

In [45]:
mlp.summary()

Model: "mlp_block"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense1 (Dense)              (None, 200)               115400    
                                                                 
 fi_lm_layer (FiLMLayer)     (None, 200)               1201      
                                                                 
 Dense2 (Dense)              (None, 200)               40200     
                                                                 
 dense (Dense)               (None, 96)                19296     
                                                                 
Total params: 176,097
Trainable params: 176,097
Non-trainable params: 0
_________________________________________________________________


In [46]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
# mse = tf.keras.losses.MeanSquaredError()
loss_metric = tf.keras.metrics.Mean()

In [47]:
def tf_nmae(y_true, y_pred):
    abs_diff = tf.abs(y_true - y_pred)
    return 100*(tf.reduce_sum(abs_diff)/y_true.shape[0])/106400

In [48]:
# Supress warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [49]:
epochs = 10

# Iterate over epochs.
for epoch in range(epochs):
    print("Start of epoch %d" % (epoch,))

    with tf.GradientTape() as tape:
        mlp.switch.assign(1)
        y_pred1 = mlp(X_train)
        loss = tf_nmae(y_true=y_train_1, y_pred=y_pred1)
        
        mlp.switch.assign(2)
        y_pred2 = mlp(X_train)
        loss += tf_nmae(y_true=y_train_2, y_pred=y_pred2)
        
        mlp.switch.assign(3)
        y_pred3 = mlp(X_train)
        loss += tf_nmae(y_true=y_train_3, y_pred=y_pred3)
        
    print(f"Loss:{loss}")
    tf.cast(loss, dtype=tf.float32, name=None)
    grads = tape.gradient(loss, mlp.trainable_weights)
    optimizer.apply_gradients(zip(grads, mlp.trainable_weights))

    loss_metric(loss)

Start of epoch 0
Loss:7585.283203125
Start of epoch 1
Loss:7585.232421875
Start of epoch 2
Loss:7585.177734375
Start of epoch 3
Loss:7585.115234375
Start of epoch 4
Loss:7585.0380859375
Start of epoch 5
Loss:7584.9453125
Start of epoch 6
Loss:7584.83251953125
Start of epoch 7
Loss:7584.697265625
Start of epoch 8
Loss:7584.537109375
Start of epoch 9
Loss:7584.3466796875


Make a prediction model with different

In [70]:
# class FiLMLayer2(Layer):
#     def __init__(self, **kwargs):
#       super(FiLMLayer2, self).__init__(**kwargs)

#       self.switch = tf.Variable(1)
    
#     def build(self, input_shape):
#         self.gamma = self.add_weight(name="gamma_weight", shape=((3, input_shape)), initializer='random_normal', trainable=True)
#         self.beta = self.add_weight(name="beta_weight", shape=((3, input_shape)), initializer='random_normal', trainable=True)
#         super(FiLMLayer2, self).build(input_shape)

#     def call(self, input):
#       if self.switch == 1:
#         return tf.math.multiply(self.gamma[0], input) + self.beta[0]
#       elif self.switch == 2:
#         return tf.math.multiply(self.gamma[1], input) + self.beta[1]
#       else: 
#         return tf.math.multiply(self.gamma[2], input) + self.beta[2]

In [51]:
class FL(Layer):
    def __init__(self,**kwargs):
        super(FL,self).__init__(**kwargs)

        self.switch = tf.Variable(1)

    def build(self, input_shape):
        self.gamma = self.add_weight(name="gamma_weight", shape=(3, 200), initializer='random_normal', trainable=True)
        self.beta = self.add_weight(name="beta_weight", shape=(3, 200), initializer='random_normal', trainable=True)
        super(FL, self).build(input_shape)
    
    def call(self, input):
        if self.switch == 1:
            return tf.math.multiply(self.gamma[0], input) + self.beta[0]
        elif self.switch == 2:
            return tf.math.multiply(self.gamma[1], input) + self.beta[1]
        else:
            return tf.math.multiply(self.gamma[2], input) + self.beta[2]


In [52]:
# model = Sequential()
# model.add(Dense(200, activation='relu', input_dim=n_input, name='Dense1'))
# model.add(FL())
# model.add(Dense(200, activation='relu'))
# model.add(Dense(n_output))
# model.compile(loss='mse', optimizer='adam')
# model.fit(X_train, y_train, epochs=20, batch_size=10, verbose=1)

In [53]:
inputLayer = keras.Input(shape=(n_input))
dense_layer2= Dense(200, activation='relu', name="Dense1")(inputLayer)
film_layer = FL()(dense_layer2)
dense_layer2 = Dense(200, activation='relu', name="Dense2")(film_layer)
outputLayer = Dense(n_output)(dense_layer2) 

In [54]:
model = keras.Model(inputLayer,outputLayer)

In [55]:
model.layers[2].switch

<tf.Variable 'Variable:0' shape=() dtype=int32, numpy=1>

In [56]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 576)]             0         
                                                                 
 Dense1 (Dense)              (None, 200)               115400    
                                                                 
 fl (FL)                     (None, 200)               1201      
                                                                 
 Dense2 (Dense)              (None, 200)               40200     
                                                                 
 dense_1 (Dense)             (None, 96)                19296     
                                                                 
Total params: 176,097
Trainable params: 176,097
Non-trainable params: 0
_________________________________________________________________


In [57]:
epochs = 10

# Iterate over epochs.
for epoch in range(epochs):
    print("Start of epoch %d" % (epoch,))

    with tf.GradientTape() as tape:
        model.layers[2].switch.assign(1)
        y_pred1 = model(X_train)
        loss = tf_nmae(y_true=y_train_1, y_pred=y_pred1)
        
        model.layers[2].switch.assign(2)
        y_pred2 = model(X_train)
        loss += tf_nmae(y_true=y_train_2, y_pred=y_pred2)
        
        model.layers[2].switch.assign(3)
        y_pred3 = model(X_train)
        loss += tf_nmae(y_true=y_train_3, y_pred=y_pred3)
        
    print(f"Loss:{loss}")
    # tf.cast(loss, dtype=tf.float32, name=None)
    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    loss_metric(loss)

Start of epoch 0
Loss:7585.28271484375
Start of epoch 1
Loss:7585.2822265625
Start of epoch 2
Loss:7585.28125
Start of epoch 3
Loss:7585.2783203125
Start of epoch 4
Loss:7585.27685546875
Start of epoch 5
Loss:7585.2734375
Start of epoch 6
Loss:7585.26953125
Start of epoch 7
Loss:7585.2646484375
Start of epoch 8
Loss:7585.2578125
Start of epoch 9
Loss:7585.25


In [58]:
model.layers[2].switch.assign(1)
testPredict1 = model(X_test)        

In [59]:
tf_nmae(y_true=y_test_1, y_pred=testPredict1).numpy()

2028.5663

## Make Predictions on Testing Data

In [60]:
class CustomModel(keras.Model):
    def train_step(self, data):
        if len(data) == 4:
            x,y1,y2,y3 = data

        with tf.GradientTape() as tape:
            self.layers[2].switch.assign(1)
            y_pred1 = self(X_train, training=True)
            loss = tf_nmae(y_true=y_train_1, y_pred=y_pred1)
            
            self.layers[2].switch.assign(2)
            y_pred2 = model(X_train)
            loss += tf_nmae(y_true=y_train_2, y_pred=y_pred2)
            
            self.layers[2].switch.assign(3)
            y_pred3 = model(X_train)
            loss += tf_nmae(y_true=y_train_3, y_pred=y_pred3)
        
        print(f"Loss:{loss}")
        # tf.cast(loss, dtype=tf.float32, name=None)
        grads = tape.gradient(loss, self.trainable_weights)
        optimizer.apply_gradients(zip(grads, self.trainable_weights))

        loss_metric(loss)


In [61]:
test_data.shape

(8736, 9)

In [62]:
test_data_split = np.array(np.split(test_data , test_data.shape[0]//96))

In [64]:
test_data_split.shape

(91, 96, 9)

In [63]:
# Flatten test_data_split
n_input2 = test_data_split.shape[1] * test_data_split.shape[2]
test_data_flat = test_data_split.reshape((test_data_split.shape[0], n_input2))
test_data_flat.shape

(91, 864)

In [65]:
# Normalize the data
scaler_test = MinMaxScaler(feature_range=(0, 1))
test_data_scaled = sc_X.fit_transform(test_data_flat)

In [68]:
test_data_scaled.shape

(91, 864)

In [69]:
model.predict(test_data_scaled)

ValueError: in user code:

    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\engine\training.py", line 1845, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\engine\training.py", line 1834, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\engine\training.py", line 1823, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\engine\training.py", line 1791, in predict_step
        return self(x, training=False)
    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\singh\OneDrive\Documents\Job\Jungle\task\code\env\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 576), found shape=(None, 864)
