In [None]:
#    Copyright 2020 Arkadip Bhattacharya

#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at

#        http://www.apache.org/licenses/LICENSE-2.0

#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.

### Wind speed Prediction

This notebook contains all necessary codes to Train a `Deep Learning Model` and Validate it.

The Model is based on `Pytorch`

In [1]:
## Importing all the necessary Libraries along with some self-made functions

%load_ext autoreload
%autoreload 2

import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import dataloader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from preprocessing import WindSpeedTimeSeriesDataset, ComposeTransform, ToTensor

In [2]:
# Reading the Dataset

dataset = pd.read_csv('./dataset-hourly.csv')
dataset.head()

Unnamed: 0,time,air_temperature_mean,pressure,wind_direction,wind_speed
0,2010-01-01 00:00:00,-1.8,997.0,59,3.8
1,2010-01-01 01:00:00,-2.1,996.0,58,3.5
2,2010-01-01 02:00:00,-2.1,996.0,49,3.7
3,2010-01-01 03:00:00,-2.2,996.0,49,3.5
4,2010-01-01 04:00:00,-2.2,996.0,49,3.5


In [3]:
# Normalizing te data using the MinMaxScaler of sklearn

scaler = MinMaxScaler()
dataset[['air_temperature_mean',
         'pressure',
         'wind_direction',
         'wind_speed']] = scaler.fit_transform(dataset[['air_temperature_mean',
                                                        'pressure', 
                                                        'wind_direction', 
                                                        'wind_speed']])
dataset.head()

Unnamed: 0,time,air_temperature_mean,pressure,wind_direction,wind_speed
0,2010-01-01 00:00:00,0.260788,0.407895,0.163889,0.333333
1,2010-01-01 01:00:00,0.255159,0.394737,0.161111,0.307018
2,2010-01-01 02:00:00,0.255159,0.394737,0.136111,0.324561
3,2010-01-01 03:00:00,0.253283,0.394737,0.136111,0.307018
4,2010-01-01 04:00:00,0.253283,0.394737,0.136111,0.307018


In [4]:
# Spliting the Dataset into Train Set, Test Set and Validation Set

trainset, testset = train_test_split(dataset, test_size = 0.1)
trainset, valset = train_test_split(trainset, test_size = 0.1)

In [5]:
# A Example Training Attributes

trainset.iloc[0]

time                    2011-12-18 13:00:00
air_temperature_mean               0.363977
pressure                           0.473684
wind_direction                     0.697222
wind_speed                          0.54386
Name: 17197, dtype: object

In [6]:
# Creating the Time Series dataset Generators with window_size=6
# Using the ComposeTranform to Tranform each Time Series matrics into a Torch.Tensor

train_dataset = WindSpeedTimeSeriesDataset(trainset, window_size=6, transform=ComposeTransform([ToTensor()]))
test_dataset = WindSpeedTimeSeriesDataset(testset, window_size=6, transform=ComposeTransform([ToTensor()]))
val_dataset = WindSpeedTimeSeriesDataset(valset, window_size=6, transform=ComposeTransform([ToTensor()]))

In [10]:
# The first Time Series features matix
# Each row corresponding to a different times
# Each variables in a time signifies a feature

print('Train[0]:',train_dataset[0][0],'\n\nTest[0]:',train_dataset[0][1])

Train[0]: tensor([[0.3640, 0.4737, 0.6972, 0.5439],
        [0.6867, 0.7237, 0.8889, 0.3947],
        [0.3077, 0.6184, 0.0528, 0.3421],
        [0.7111, 0.6447, 0.7722, 0.3158],
        [0.6848, 0.6184, 0.9361, 0.2456],
        [0.4409, 0.7105, 0.7750, 0.3509]], dtype=torch.float64) 

Test[0]: tensor([0.3070], dtype=torch.float64)


In [12]:
print('Train[1]:',train_dataset[1][0],'\n\nTest[1]:',train_dataset[1][1])

Train[1]: tensor([[0.6867, 0.7237, 0.8889, 0.3947],
        [0.3077, 0.6184, 0.0528, 0.3421],
        [0.7111, 0.6447, 0.7722, 0.3158],
        [0.6848, 0.6184, 0.9361, 0.2456],
        [0.4409, 0.7105, 0.7750, 0.3509],
        [0.2495, 0.8816, 0.5111, 0.3070]], dtype=torch.float64) 

Test[1]: tensor([0.1930], dtype=torch.float64)


In [None]:
# Genarating Pytorch DataLoader for the Training and Testing of the Model
# Setting the Batch size

batch_size = 64

trainloader = dataloader.DataLoader(train_dataset, batch_size, shuffle = True)
valloader = dataloader.DataLoader(val_dataset, batch_size, shuffle = False)
testloader = dataloader.DataLoader(test_dataset, batch_size, shuffle = False)

In [None]:
# Checking the Size of the Features and Label matrix
# Features Size = (Batch_size, Time_steps, no_of_features)
# Leabels Size = (Batch_size, Time_steps, no_of_labels)

f, l = next(iter(trainloader))

print(f'Features Size: {f.shape}')
print(f'Labels Size: {l.shape}')

In [None]:
# Checking for the GPU for using Hardware Accelerations
# Set the `cuda` and `device` variables accordingly to use the correct device

cuda = torch.cuda.is_available()
device = torch.device('cuda') if cuda else torch.device('cpu')
if cuda:
    print("Device Count:", torch.cuda.device_count())
    print("Device:", torch.cuda.get_device_name())
    print("Device Capability:", torch.cuda.get_device_capability())

In [None]:
# Importing the Model from model.py
# Setting the Model hyperparameters and create an instance of it
# Send the model to the appropriate device

from model import Model
model = Model(input_size = 2,
              lstm_input_size = 128,
              lstm_hidden_size = 256,
              time_series = 6,
              lstm_num_layers = 1,
              cuda=cuda)
model.to(device)
print(model)

In [None]:
# Setting Up training criteria

from torch import nn
from torch.optim import Adam

optimizer = Adam(params=model.parameters(), lr= 0.001)
criterion = nn.SmoothL1Loss()

In [None]:
# loading the pretrained model

model_name = 'lstm-v1'
model.load_state_dict(torch.load(os.path.join('model', model_name, 'model.pt'), map_location=device))
optimizer.load_state_dict(torch.load(os.path.join('model', model_name, 'optim.pt'), map_location=device))

In [None]:
# Set the Model Optimizer, Using 'Adam'
# Set the Loss Function, Using 'SmoothL1Loss'
# Running the training routing


epochs = 40
trainlosses, testlosses = model.fit(trainloader = trainloader,
                                    validationloader = valloader,
                                    criterion = criterion,
                                    optimizer = optimizer,
                                    epochs = epochs, 
                                    val_every = 20)

In [None]:
# Ploting the Loss stats
%matplotlib qt

plt.plot(trainlosses, label = 'Training Losses')
plt.plot(testlosses, label = 'Testing Losses')
plt.ylabel('Losses')
plt.xlabel('Batch * Epoch')
plt.legend()
plt.title('Losses Graph')
plt.show()

In [None]:
# Running the model test
# Ruturns the result and actual arrayes

result, actual = model.test(testloader)

In [None]:
# Ploting the Validation Graph

# Just some random checking
%matplotlib qt
val_from = 100
val_to = 200

plt.plot(result[val_from:val_to], label = 'Result')
plt.plot(actual[val_from:val_to], label = 'Actual')
plt.ylabel('Wind Speed (Normalized)')
#plt.xticks(range(val_from, val_to))
plt.xlabel('Time Range')
plt.legend()
plt.title('Model Output Validation')
plt.show()

In [None]:
# Saving the Model and the summary
model_name = '03.c-c-f-l-f--SmoothL1-ADAM'

model.save_dict(model_name, save_optim=True)
model.save_summary(model_name, testloader)

In [None]:
mape = torch.Tensor([0])

In [None]:
model.to(device)
for f, l in testloader:
    mape += torch.mean(torch.abs(l.type(torch.FloatTensor) - model.forward(f.type(torch.FloatTensor))) / l.type(torch.FloatTensor))

In [None]:
model(f.type(torch.FloatTensor).cuda()).shape

In [None]:
l.data.numpy()