In [2]:
import numpy as np
import matplotlib.pylab as plt
from math import *
#from sklearn.models import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn import svm
import pandas as pd
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from mpl_toolkits.mplot3d import Axes3D
import time
%matplotlib notebook

In [3]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
RANDOM_SEED = 42
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 8, 4


In [4]:
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset, DataLoader

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
from sklearn.metrics import mean_squared_error as mse

### April Data

In [7]:
df = pd.read_csv('inputs/SVMD_1.csv')

In [8]:
df.shape

(1440, 4)

In [9]:
orig_df = pd.read_csv('inputs/DS1_1440.csv')

In [10]:
imfs = np.array([df[col].values for col in df.columns])

In [11]:
error = orig_df.wind_speed.values - imfs.sum(axis=0)

In [12]:
error.shape

(1440,)

In [13]:
tot_decomp = np.concatenate([imfs, error.reshape(1, -1)], axis=0)

In [14]:
tot_decomp.shape

(5, 1440)

In [15]:
class WindDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        item = self.x[idx]
        label = self.y[idx]
        
        return item, label

In [16]:
class CNNLSTMForecast(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(CNNLSTMForecast, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            # nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1, stride=1)
        )
        self.lstm = nn.LSTM(input_size=128, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        #cnn takes input of shape (batch_size, channels, seq_len)
        x = x.permute(0, 2, 1)
        out = self.cnn(x)
        # lstm takes input of shape (batch_size, seq_len, input_size)
        out = out.permute(0, 2, 1)
        out, _ = self.lstm(out)
        out = self.fc(out[:, -1, :])
        return out

In [17]:
from processing import create_features, split_data

In [18]:
window_size = 5

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [20]:
global tot_valid_loss

In [21]:
def Train(model, train_loader, optimizer, criterion):
        
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds,labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
        

    
    # print(f'train_loss {train_loss}')
    
def Valid(model, valid_loader, optimizer, criterion):
    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs)
            loss = criterion(preds,labels)
            running_loss += loss


In [22]:
NUM_EPOCHS = 200

In [23]:
all_predctions = []

In [24]:
input_size = 5 #number of features
hidden_size = 200 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 1 #number of output classes 
seq_length = 1

In [25]:
for i in range(len(tot_decomp)):

    model = CNNLSTMForecast(input_size, hidden_size, num_layers, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
    criterion = nn.MSELoss()

    train_len = int(.6*tot_decomp.shape[1])
    train_data, val_data, test_data = split_data(tot_decomp[i, :], train_len)
    train_data = np.float32(train_data)
    val_data = np.float32(val_data)
    test_data = np.float32(test_data)

    xtrain, ytrain = create_features(train_data, window_size)
    xval, yval = create_features(val_data, window_size)
    xtest, ytest = create_features(test_data, window_size)

    train = WindDataset(xtrain.reshape(xtrain.shape[0], 1, xtrain.shape[1]), ytrain)
    valid = WindDataset(xval.reshape(xval.shape[0], 1, xval.shape[1]), yval)
    test = WindDataset(xtest.reshape(xtest.shape[0], 1, xtest.shape[1]), ytest)
    train_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    valid_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)


    for epoch in range(NUM_EPOCHS):
        # print('epochs {}/{}'.format(epoch+1,epochs))
        Train(model, train_loader, optimizer, criterion)
        Valid(model, valid_loader, optimizer, criterion)
        gc.collect()

    predictions = []
    model.eval()
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model.forward(inputs)
                
            predictions.append(preds.item())
    all_predctions.append(predictions)
            

In [26]:
_, _, test = split_data(orig_df.wind_speed.values, train_len)
_, ytes = create_features(test, window_size)

all_preds = np.array([pred for pred in all_predctions]).sum(axis=0)
mse(all_preds, ytes)


0.9079519441232528

In [27]:
all_preds

array([11.01087415, 11.38266122, 11.8337919 , 11.39067348, 10.86771159,
       10.46304697, 10.31153654, 10.70922196, 10.03271736, 10.46446239,
       10.4551545 ,  9.78595465,  9.54066101,  9.1264707 ,  9.05753418,
        8.85437032,  8.73123191,  8.88283913,  8.48610983,  8.01797293,
        7.63595568,  7.70193258,  7.93584391,  7.93049605,  7.95274138,
        8.26728433,  7.95152839,  8.20119161,  8.19619446,  8.08709554,
        8.10952662,  8.84816869,  8.72161406,  8.59169206,  9.69968739,
        9.97348009,  9.74186297,  9.2085916 ,  9.56506838, 10.05344355,
        9.98819032,  9.56720677,  9.4275665 ,  9.91812546,  9.94274881,
        9.92414685, 10.18293482, 10.71238156, 11.02542123, 10.84765381,
       10.1239273 , 10.54459451, 10.53103498, 10.41320439, 10.51785363,
       10.30417887, 10.48530903, 10.20941931, 10.16596554, 10.70308118,
        9.43420567,  9.24152778,  9.36340097,  9.66387448,  9.48352171,
        9.54472854,  9.34079918,  9.24320376,  9.22756653,  9.71

In [28]:
import pickle

In [29]:
file = open('svmd_cnn_lstm_1', 'ab')
pickle.dump(all_preds, file)
file.close()

### May Data

In [30]:
df = pd.read_csv('inputs/SVMD_2.csv')

In [31]:
df.shape

(1488, 6)

In [32]:
orig_df = pd.read_csv('inputs/DS2_1448.csv')

In [33]:
imfs = np.array([df[col].values for col in df.columns])

In [34]:
error = orig_df.wind_speed.values - imfs.sum(axis=0)

In [35]:
error.shape

(1488,)

In [36]:
tot_decomp = np.concatenate([imfs, error.reshape(1, -1)], axis=0)

In [37]:
tot_decomp.shape

(7, 1488)

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [39]:
def Train(model, train_loader, optimizer, criterion):
        
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds,labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
        

    
    # print(f'train_loss {train_loss}')
    
def Valid(model, valid_loader, optimizer, criterion):
    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs)
            loss = criterion(preds,labels)
            running_loss += loss


In [40]:
NUM_EPOCHS = 200

In [41]:
all_predctions_2 = []

In [42]:
input_size = 5 #number of features
hidden_size = 200 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 1 #number of output classes 
seq_length = 1

In [43]:
for i in range(len(tot_decomp)):

    model = CNNLSTMForecast(input_size, hidden_size, num_layers, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
    criterion = nn.MSELoss()

    train_len = int(.6*tot_decomp.shape[1])
    train_data, val_data, test_data = split_data(tot_decomp[i, :], train_len)
    train_data = np.float32(train_data)
    val_data = np.float32(val_data)
    test_data = np.float32(test_data)

    xtrain, ytrain = create_features(train_data, window_size)
    xval, yval = create_features(val_data, window_size)
    xtest, ytest = create_features(test_data, window_size)

    train = WindDataset(xtrain.reshape(xtrain.shape[0], 1, xtrain.shape[1]), ytrain)
    valid = WindDataset(xval.reshape(xval.shape[0], 1, xval.shape[1]), yval)
    test = WindDataset(xtest.reshape(xtest.shape[0], 1, xtest.shape[1]), ytest)
    train_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    valid_loader = torch.utils.data.DataLoader(train, batch_size=1, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)


    for epoch in range(NUM_EPOCHS):
        # print('epochs {}/{}'.format(epoch+1,epochs))
        Train(model, train_loader, optimizer, criterion)
        Valid(model, valid_loader, optimizer, criterion)
        gc.collect()

    predictions = []
    model.eval()
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model.forward(inputs)
                
            predictions.append(preds.item())
    all_predctions_2.append(predictions)
            

In [44]:
_, _, test = split_data(orig_df.wind_speed.values, train_len)
_, ytes = create_features(test, window_size)

all_preds_2 = np.array([pred for pred in all_predctions_2]).sum(axis=0)
mse(all_preds_2, ytes)


0.8076012489401859

In [45]:
import pickle

In [61]:
file = open('svmd_cnnlstm_2', 'ab')
pickle.dump(all_preds_2, file)
file.close()

In [50]:
len(all_preds)

283