In [12]:
import torch
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import time
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from typing import Iterable
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from joblib import dump, load

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
import sys

In [2]:
site = 'C'

In [5]:
# load the data
data_finetune = pd.read_csv("data_finetune.csv", index_col = 0)

In [6]:
data_finetune

Unnamed: 0,ts,Month,Day,Hour,instanceID,Wind_speed,TI,Temperature,Power
958671,2020-11-13 00:30:00,11,13,0,C_WTG01,6.343173,15.312233,9.355000,496.738776
682500,2020-08-13 16:40:00,8,13,16,C_WTG01,3.461359,17.919733,23.900000,42.106780
827043,2020-09-30 11:50:00,9,30,11,C_WTG01,3.595130,10.394158,10.966667,58.858180
278880,2020-04-02 05:20:00,4,2,5,C_WTG01,16.338689,15.963571,6.796667,2043.203491
794451,2020-09-19 17:10:00,9,19,17,C_WTG01,6.913757,8.228349,16.663334,614.239596
...,...,...,...,...,...,...,...,...,...
580775,2020-07-11 01:10:00,7,11,1,C_WTG21,6.650944,14.949392,10.298334,830.078456
122912,2020-02-10 15:20:00,2,10,15,C_WTG21,6.982645,19.118705,2.593333,825.983393
11129,2020-01-04 16:10:00,1,4,16,C_WTG21,6.100355,13.664154,7.396552,653.990937
855014,2020-10-09 17:40:00,10,9,17,C_WTG21,4.106625,28.984064,7.183333,166.245071


In [7]:
# load normalization function 
scaler1 = load('scaler1.bin')
scaler2 = load('scaler2.bin')

In [8]:
turbine_count = data_finetune['instanceID'].nunique()

### Define network

In [9]:
class Net(nn.Module):
    def __init__(self, dims: Iterable[int], output_activation: nn.Module = None):
        """Creates a network using ReLUs between layers and no activation at the end

        :param dims (Iterable[int]): tuple in the form of (IN_SIZE, HIDDEN_SIZE, HIDDEN_SIZE2,
            ..., OUT_SIZE) for dimensionalities of layers
        :param output_activation (nn.Module): PyTorch activation function to use after last layer
        """
        super().__init__()
        self.input_size = dims[0]
        self.out_size = dims[-1]
        self.layers = self.make_seq(dims, output_activation)

    @staticmethod
    def make_seq(dims: Iterable[int], output_activation: nn.Module) -> nn.Module:
        """Creates a sequential network using ReLUs between layers and no activation at the end

        :param dims (Iterable[int]): tuple in the form of (IN_SIZE, HIDDEN_SIZE, HIDDEN_SIZE2,
            ..., OUT_SIZE) for dimensionalities of layers
        :param output_activation (nn.Module): PyTorch activation function to use after last layer
        :return (nn.Module): return created sequential layers
        """
        mods = []

        for i in range(len(dims) - 2):
            mods.append(nn.Linear(dims[i], dims[i + 1]))
            mods.append(nn.ReLU())

        mods.append(nn.Linear(dims[-2], dims[-1]))
        if output_activation:
            mods.append(output_activation())
        return nn.Sequential(*mods)
    
    def forward(self, x: Tensor) -> Tensor:
        """Computes a forward pass through the network

        :param x (torch.Tensor): input tensor to feed into the network
        :return (torch.Tensor): output computed by the network
        """
        # Feedforward
        return self.layers(x)


In [10]:
def train(X, y, quantile, net, lr, batch_size, epoch):    
    
    # create tensor dataset
    train = TensorDataset(Tensor(X), Tensor(y))

    # create data loader from dataset
    trainset = DataLoader(train, batch_size = batch_size, shuffle = True)

    # define optimizer
    optimizer = optim.Adam(net.parameters(), lr = lr)
        
    mse_loss = nn.MSELoss()

    for ep in range(epoch):

        for t in trainset:
            X_temp, y_temp = t
            output = net(X_temp)
            residual = y_temp - output
            loss = Tensor.max(quantile*residual, (quantile-1)*residual).mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    return net, net.state_dict()

### Fine tuning

use a very low learning rate at this stage, because we are training on a dataset that is very small. This is to prevent the risk of overfitting very quickly if we apply large weight updates. 

In [13]:
# load hyperparameters
model = torch.load(sys.path[0] + '/hparams_finetune.pth')

# load the pretrained weights
pretrain = torch.load(sys.path[0] + '/pretrain.pth')

In [14]:
pretrain

{'median': OrderedDict([('layers.0.weight',
               tensor([[-0.6934, -0.1782,  0.1139],
                       [ 0.1297, -0.1491,  0.2334],
                       [-0.2782,  0.1037, -0.0160],
                       [-0.4199,  0.0642,  0.3406],
                       [-0.5982, -0.0845,  0.0386],
                       [ 0.0336,  0.0705,  0.2021],
                       [-0.4348, -0.0809, -0.0234],
                       [-0.2826, -0.1212,  0.3681],
                       [-0.3156,  0.2249,  0.1606],
                       [ 0.5576,  0.0397, -0.0183],
                       [-0.2828,  0.1926,  0.1895],
                       [ 0.3320,  0.0260, -0.0412],
                       [-0.5285,  0.1406, -0.2217],
                       [ 0.3128, -0.2808, -0.1001],
                       [ 0.3667, -0.0926,  0.0011],
                       [-0.2467, -0.3039,  0.2554]])),
              ('layers.0.bias',
               tensor([ 0.2494, -0.1198, -0.5109, -0.1858,  0.6825, -0.3242,  0.2908, -0.

In [15]:
# # define quantiles and hyperparameters
q_median = 0.5
q_upper = 0.975
q_lower = 0.025
dims = model['dims']
lr = model['lr']
batch_size = int(model['batch_size'])
epoch = model['epoch']

In [16]:
print("lr:", lr)
print("batch size:", batch_size)
print("epoch:", epoch)

lr: 0.001
batch size: 500
epoch: 20


In [17]:
%%time
#################################################### training ######################################################### 

turbines = data_finetune.instanceID.unique()
median_state_dict_all = []
UQ_state_dict_all = []
LQ_state_dict_all = []


for ID in turbines:
    
    # select data based on turbine ID
    data_temp = data_finetune[data_finetune['instanceID'] == ID]

    # normalize data
    X = scaler1.transform(data_temp.iloc[:, 5:-1])
    y = scaler2.transform(data_temp.iloc[:, -1:])
    
    # create network and load pretrain weights
    net_median_temp = Net(dims = dims)
    net_median_temp.load_state_dict(pretrain['median'])
    
    net_upper_temp = Net(dims = dims)
    net_upper_temp.load_state_dict(pretrain['UQ'])
    
    net_lower_temp = Net(dims = dims)
    net_lower_temp.load_state_dict(pretrain['LQ'])
    
    # train
    net_median_temp, median_state_dict = train(X=X, y=y, quantile=q_median, net=net_median_temp, 
                                          lr=lr, batch_size=batch_size, epoch=epoch)
    net_upper_temp, UQ_state_dict = train(X=X, y=y, quantile=q_upper, net=net_upper_temp, 
                                          lr=lr, batch_size=batch_size, epoch=epoch)
    net_lower_temp, LQ_state_dict = train(X=X, y=y, quantile=q_lower, net=net_lower_temp, 
                                          lr=lr, batch_size=batch_size, epoch=epoch)
    
    median_state_dict_all.append(median_state_dict)
    UQ_state_dict_all.append(UQ_state_dict)
    LQ_state_dict_all.append(LQ_state_dict)
    
    print('Done', ID)
    

Done C_WTG01
Done C_WTG02
Done C_WTG03
Done C_WTG04
Done C_WTG05
Done C_WTG06
Done C_WTG07
Done C_WTG08
Done C_WTG09
Done C_WTG10
Done C_WTG11
Done C_WTG12
Done C_WTG13
Done C_WTG14
Done C_WTG15
Done C_WTG16
Done C_WTG17
Done C_WTG18
Done C_WTG19
Done C_WTG20
Done C_WTG21
Wall time: 2min 3s


### Save trained network

In [18]:
median_state_dict_all_zip = dict(zip(turbines, median_state_dict_all))
UQ_state_dict_all_zip = dict(zip(turbines, UQ_state_dict_all))
LQ_state_dict_all_zip = dict(zip(turbines, LQ_state_dict_all))

In [19]:
# # save trained network

torch.save(median_state_dict_all_zip, sys.path[0] + '/median.pth')
torch.save(UQ_state_dict_all_zip, sys.path[0] + '/UQ.pth')
torch.save(LQ_state_dict_all_zip, sys.path[0] + '/LQ.pth')