In [1]:
import numpy as np
import matplotlib.pylab as plt
from math import *
#from sklearn.models import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn import svm
import pandas as pd
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from mpl_toolkits.mplot3d import Axes3D
import time
%matplotlib notebook

In [2]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
RANDOM_SEED = 42
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 8, 4


In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from sklearn.metrics import mean_squared_error as mse

### April Data

In [5]:
df = pd.read_csv('inputs/SVMD_1.csv')

In [6]:
df.shape

(1440, 4)

In [7]:
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset, DataLoader

In [8]:
orig_df = pd.read_csv('inputs/DS1_1440.csv')

In [9]:
imfs = np.array([df[col].values for col in df.columns])

In [10]:
error = orig_df.wind_speed.values - imfs.sum(axis=0)

In [11]:
error.shape

(1440,)

In [12]:
tot_decomp = np.concatenate([imfs, error.reshape(1, -1)], axis=0)

In [13]:
tot_decomp.shape

(5, 1440)

In [14]:
class WindDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        item = self.x[idx]
        label = self.y[idx]
        
        return item, label

In [15]:
class CNNLSTMForecast(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(CNNLSTMForecast, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            # nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1, stride=1)
        )
        self.lstm = nn.LSTM(input_size=128, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        #cnn takes input of shape (batch_size, channels, seq_len)
        x = x.permute(0, 2, 1)
        out = self.cnn(x)
        # lstm takes input of shape (batch_size, seq_len, input_size)
        out = out.permute(0, 2, 1)
        out, _ = self.lstm(out)
        out = self.fc(out[:, -1, :])
        return out

In [26]:
from processing import create_features, split_data

In [27]:
window_size = 5

In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [21]:
global tot_valid_loss

In [22]:
def Train(model, train_loader, optimizer, criterion):
        
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds,labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
        

    
    # print(f'train_loss {train_loss}')
    
def Valid(model, valid_loader, optimizer, criterion):
    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs)
            loss = criterion(preds,labels)
            running_loss += loss


In [23]:
NUM_EPOCHS = 200

In [24]:
all_predctions = []

In [30]:
input_size = 5 #number of features
hidden_size = 200 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 1 #number of output classes 
seq_length = 1

In [32]:
for i in range(len(tot_decomp)):

    model = CNNLSTMForecast(input_size, hidden_size, num_layers, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
    criterion = nn.MSELoss()

    train_len = int(.6*tot_decomp.shape[1])
    train_data, val_data, test_data = split_data(tot_decomp[i, :], train_len)
    train_data = np.float32(train_data)
    val_data = np.float32(val_data)
    test_data = np.float32(test_data)

    xtrain, ytrain = create_features(train_data, window_size)
    xval, yval = create_features(val_data, window_size)
    xtest, ytest = create_features(test_data, window_size)

    train = WindDataset(xtrain.reshape(xtrain.shape[0], xtrain.shape[1], 1), ytrain)
    valid = WindDataset(xval.reshape(xval.shape[0], xval.shape[1], 1), yval)
    test = WindDataset(xtest.reshape(xtest.shape[0], xtest.shape[1], 1), ytest)
    train_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    valid_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)


    for epoch in range(NUM_EPOCHS):
        # print('epochs {}/{}'.format(epoch+1,epochs))
        Train(model, train_loader, optimizer, criterion)
        Valid(model, valid_loader, optimizer, criterion)
        gc.collect()

    predictions = []
    model.eval()
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            for input in inputs:
                preds = model.forward(input)
                
                predictions.append(preds.item())
    all_predctions.append(predictions)
            

RuntimeError: Given groups=1, weight of size [64, 5, 3], expected input[1, 1, 5] to have 5 channels, but got 1 channels instead

In [None]:
ytes.shape

(1435,)

In [None]:
_, _, test = split_data(orig_df.wind_speed.values, train_len)
_, ytes = create_features(test, window_size)

all_preds = np.array([pred for pred in all_predctions]).sum(axis=0)
mse(all_preds, ytes)


0.5172060386573049