In [6]:
import torch
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import time
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from typing import Iterable
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from joblib import dump, load

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
import sys

In [7]:
site = 'A'

In [8]:
# load the data
data_pretrain = pd.read_csv("data_pretrain.csv", index_col = 0)

In [9]:
data_pretrain

Unnamed: 0,ts,Month,Day,Hour,Wind_speed,TI,Temperature,Power
167553,2020-07-12 22:10:00,7,12,22,6.131801,11.858708,12.950720,444.45150
49567,2020-02-27 08:50:00,2,27,8,10.811110,15.976056,1.169232,1954.85700
141167,2020-06-12 09:10:00,6,12,9,9.529058,13.774205,10.015760,1854.28800
25166,2020-01-30 03:00:00,1,30,3,9.971829,14.215015,7.121192,1821.84900
59366,2020-03-09 17:00:00,3,9,17,9.988299,14.935616,4.491905,1945.55200
...,...,...,...,...,...,...,...,...
96843,2020-04-22 02:00:00,4,22,2,7.039987,9.846843,6.488026,681.20100
23171,2020-01-27 19:30:00,1,27,19,4.644200,12.817917,2.705695,183.79530
204842,2020-08-25 02:00:00,8,25,2,6.133950,11.147895,10.880120,416.12640
266398,2020-11-04 07:50:00,11,4,7,7.128852,13.091645,5.901052,830.52080


In [5]:
# load normalization function 
scaler1 = load('scaler1.bin')
scaler2 = load('scaler2.bin')

### Define network

In [6]:
class Net(nn.Module):
    def __init__(self, dims: Iterable[int], output_activation: nn.Module = None):
        """Creates a network using ReLUs between layers and no activation at the end

        :param dims (Iterable[int]): tuple in the form of (IN_SIZE, HIDDEN_SIZE, HIDDEN_SIZE2,
            ..., OUT_SIZE) for dimensionalities of layers
        :param output_activation (nn.Module): PyTorch activation function to use after last layer
        """
        super().__init__()
        self.input_size = dims[0]
        self.out_size = dims[-1]
        self.layers = self.make_seq(dims, output_activation)

    @staticmethod
    def make_seq(dims: Iterable[int], output_activation: nn.Module) -> nn.Module:
        """Creates a sequential network using ReLUs between layers and no activation at the end

        :param dims (Iterable[int]): tuple in the form of (IN_SIZE, HIDDEN_SIZE, HIDDEN_SIZE2,
            ..., OUT_SIZE) for dimensionalities of layers
        :param output_activation (nn.Module): PyTorch activation function to use after last layer
        :return (nn.Module): return created sequential layers
        """
        mods = []

        for i in range(len(dims) - 2):
            mods.append(nn.Linear(dims[i], dims[i + 1]))
            mods.append(nn.ReLU())

        mods.append(nn.Linear(dims[-2], dims[-1]))
        if output_activation:
            mods.append(output_activation())
        return nn.Sequential(*mods)
    
    def forward(self, x: Tensor) -> Tensor:
        """Computes a forward pass through the network

        :param x (torch.Tensor): input tensor to feed into the network
        :return (torch.Tensor): output computed by the network
        """
        # Feedforward
        return self.layers(x)


In [7]:
def train(X, y, quantile, net, lr, batch_size, epoch):    
    
    # create tensor dataset
    train = TensorDataset(Tensor(X), Tensor(y))

    # create data loader from dataset
    trainset = DataLoader(train, batch_size = batch_size, shuffle = True)

    # define optimizer
    optimizer = optim.Adam(net.parameters(), lr = lr)
        
    mse_loss = nn.MSELoss()

    for ep in range(epoch):

        for t in trainset:
            X_temp, y_temp = t
            output = net(X_temp)
            residual = y_temp - output
            loss = Tensor.max(quantile*residual, (quantile-1)*residual).mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    return net, net.state_dict()

### Pretraining

In [10]:
# load hyperparameters
model = torch.load(sys.path[0] + '/hparams.pth')

In [9]:
# # define quantiles and hyperparameters
q_median = 0.5
q_upper = 0.975
q_lower = 0.025
dims = model['dims']
lr = model['lr']
batch_size = int(model['batch_size'])
epoch = model['epoch']

In [10]:
print("lr:", lr)
print("batch size:", batch_size)
print("epoch:", epoch)

lr: 0.005
batch size: 1000
epoch: 20


In [11]:
net_median = Net(dims = dims)
net_upper = Net(dims = dims)
net_lower = Net(dims = dims)

In [12]:
data_pretrain.iloc[:, 4:-1]

Unnamed: 0,Wind_speed,TI,Temperature
167553,6.131801,11.858708,12.950720
49567,10.811110,15.976056,1.169232
141167,9.529058,13.774205,10.015760
25166,9.971829,14.215015,7.121192
59366,9.988299,14.935616,4.491905
...,...,...,...
96843,7.039987,9.846843,6.488026
23171,4.644200,12.817917,2.705695
204842,6.133950,11.147895,10.880120
266398,7.128852,13.091645,5.901052


In [13]:
# normalize data
X_pretrain = scaler1.transform(data_pretrain.iloc[:, 4:-1])
y_pretrain = scaler2.transform(data_pretrain.iloc[:, -1:])

In [14]:
%%time
# Train
net_median, median_state_dict = train(X=X_pretrain, y=y_pretrain, quantile=q_median, net=net_median, 
                                     lr=lr, batch_size=batch_size, epoch=epoch)
net_upper, UQ_state_dict = train(X=X_pretrain, y=y_pretrain, quantile=q_upper, net=net_upper, 
                                 lr=lr, batch_size=batch_size, epoch=epoch)
net_lower, LQ_state_dict = train(X=X_pretrain, y=y_pretrain, quantile=q_lower, net=net_lower, 
                                 lr=lr, batch_size=batch_size, epoch=epoch)

Wall time: 4min 6s


### Save trained network

In [15]:
# create dictionary to store the trained weights
pretrain_state_dict = {'median': net_median.state_dict(), 
                       'UQ': net_upper.state_dict(),
                       'LQ': net_lower.state_dict()}

In [16]:
# torch.save(pretrain_state_dict, sys.path[0] + '/pretrain.pth')