## Setup

Here, we'll import some basic libraries, enable CUDA, and mount this to your account.

In [1]:
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [None]:
print('Version', torch.__version__)
print('CUDA enabled:', torch.cuda.is_available())
  
# Running this should then print out:
# Version 1.7.0+cu101 (or something like this)
# CUDA enabled: True

In [None]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

In [None]:
import os
BASE_PATH = '/gdrive/My Drive/colab_files/project'
if not os.path.exists(BASE_PATH):
    os.makedirs(BASE_PATH)
DATA_PATH = '/content/'

if not os.path.exists(os.path.join(DATA_PATH, 'harry_potter.txt')):
    os.chdir(BASE_PATH)
    !wget https://courses.cs.washington.edu/courses/cse599g1/19au/files/homework3.tar.gz
    !tar -zxvf homework3.tar.gz
    !rm homework3.tar.gz
    !cp pt_util.py /content
os.chdir('/content')

## Preparing the Data

The dataset we're using is the Jena Climate dataset from the Max Planck Institute for Biogeochemistry.

In [None]:
from zipfile import ZipFile
import torchtext.utils as utils
import pt_util

url = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip" 

path = utils.download_from_url(url)

file = ZipFile(path)
file.extractall()

csv_path = "jena_climate_2009_2016.csv"

In [7]:
import pickle

TRAIN_SPLIT = 0.7
EVAL_SPLIT = 0.1

def prepare_data(data_path):
  df = pd.read_csv(data_path)

  # Each sample is in ten minute intervals, so select one every hour for hour intervals
  df = df[5::6] 

  wv = df['wv (m/s)']
  df['wv (m/s)'] = wv.where(wv >= 0, 0)

  mv = df['max. wv (m/s)']
  df['max. wv (m/s)'] = mv.where(mv >= 0, 0)

  wv = df.pop('wv (m/s)')
  mv = df.pop('max. wv (m/s)')
  wd = df.pop('wd (deg)')

  wr = wd * np.pi / 180
  df['wx (m/s)'] = wv * np.cos(wr)
  df['wy (m/s)'] = wv * np.sin(wr)

  df['max. wx (m/s)'] = mv * np.cos(wr)
  df['max. wy(m/s)'] = mv * np.sin(wr)

  date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

  timestamp_s = date_time.map(pd.Timestamp.timestamp)
  day = 24 * 60 * 60
  year = 365.2425 * day

  df['Day x'] = np.cos(timestamp_s * (2 * np.pi / day))
  df['Day y'] = np.sin(timestamp_s * (2 * np.pi / day))
  df['Year x'] = np.cos(timestamp_s * (2 * np.pi / year))
  df['Year y'] = np.sin(timestamp_s * (2 * np.pi /year))

  train_df, eval_df, test_df = normalize(df)
  pickle.dump({'data': train_df}, open(DATA_PATH + 'jena_train_climate_data.pkl', 'wb'))
  pickle.dump({'data':eval_df}, open(DATA_PATH + 'jena_eval_climate_data.pkl', 'wb'))
  pickle.dump({'data': test_df}, open(DATA_PATH + 'jena_test_climate_data.pkl', 'wb'))

def normalize(data):
  train_end = int(TRAIN_SPLIT * data.shape[0])
  eval_end = train_end + int(EVAL_SPLIT * data.shape[0])

  train_df = data[:train_end]
  eval_df = data[train_end: eval_end]
  test_df = data[eval_end:]

  train_mean = train_df.mean()
  train_std = train_df.std()

  train_df = (train_df - train_mean) / train_std
  eval_df = (eval_df - train_mean) / train_std
  test_df =  (test_df - train_mean) / train_std
  return train_df, eval_df, test_df

prepare_data(csv_path)
  

## Defining the dataset

The data has been cleaned, but now it needs to be turned into a set of inputs and a set of labels. 

In [8]:
# Make dataset

import torch.nn as nn
from torchvision import datasets
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset

class WeatherDataset(Dataset):
  def __init__(self, data_file, offset, input_length, output_labels=None):
    super(WeatherDataset, self).__init__()

    with open(data_file, 'rb') as data_pkl:
      dataset = pickle.load(data_pkl)
    self.df = dataset['data']

    self.input_length = input_length
    self.offset = offset

    if output_labels is None:
      output_labels = list(self.df.columns)
    inputs = []
    labels = []

    for i in range(self.df.shape[0] - offset - input_length):
      x = self.df[i : i + input_length]
      y = self.df.iloc[i + offset : i + input_length + offset]
      inputs.append(x)
      labels.append(y[output_labels])
    self.inputs = np.array(inputs)
    self.labels = np.array(labels)

    
  def __len__(self):
    return self.inputs.shape[0]

  def __getitem__(self, idx):
    return torch.from_numpy(self.inputs[idx]).float(), torch.from_numpy(self.labels[idx]).float()


## BaseModel

In [93]:

class BaseModel(nn.Module):
  def __init__(self):
    super(BaseModel, self).__init__()

  def forward(self, x):
    x = np.squeeze(x)
    return x[:,1].flatten()

  def inference(self, x, temperature=1):
    x = self.forward(x)
    x = x / max(temperature, 1e-20)
    return x

  def loss(self, prediction, label, reduction='mean'):
    return F.mse_loss(prediction,label, reduction=reduction)

  def evaluate(self, x):
    return self.forward(x)

In [91]:

def train(model, device, train_loader, epoch, log_interval):
  model.train()
  losses = []

  for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
    data = data.to(device)
    label = label.to(device)
    output = model(data)
    loss = model.loss(output, label)
    losses.append(loss.item())
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset), 
          100. * batch_idx * len(train_loader), loss.item()))
  return np.mean(losses)

def test(model, device, test_loader):
  model.eval()
  test_loss = 0

  for batch_idx, (data, label) in enumerate(tqdm.tqdm(test_loader)):
    data = data.to(device)
    label = label.to(device)
    output = model(data)
    test_loss += model.loss(output, label).item()

  test_loss /= len(test_loader)
  print('\nTest set: Average loss: {:.4f} \n'.format(test_loss))
  return test_loss
    
       


In [None]:
def main():
    INPUT_LENGTH = 12
    OFFSET = 12
    BATCH_SIZE = 128
    INPUT_SIZE = 19
    HIDDEN_SIZE = 512
    TEST_BATCH_SIZE = 128
    EPOCHS = 10
    OUTPUT_SIZE = 1 
    LEARNING_RATE = 0.002
    WEIGHT_DECAY = 0.0005
    USE_CUDA = True
    PRINT_INTERVAL = 10
    LOG_PATH = DATA_PATH + 'logs/log.pkl'


    data_train = WeatherDataset(DATA_PATH + 'jena_train_climate_data.pkl', OFFSET, INPUT_LENGTH, 
                                output_labels=['T (degC)'])
    data_test = WeatherDataset(DATA_PATH + 'jena_test_climate_data.pkl', OFFSET, INPUT_LENGTH,
                               output_labels=['T (degC)'])

    use_cuda = USE_CUDA and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    import multiprocessing
    num_workers = multiprocessing.cpu_count()
    print('num workers:', num_workers)

    kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                               shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                              shuffle=True, **kwargs)

    model = BaseModel().to(device)


    start_epoch = 0
    train_losses = []
    test_losses = []
    test_loss = test(model, device, test_loader)

    test_losses.append((start_epoch, test_loss))

    try:
        for epoch in range(start_epoch, EPOCHS + 1):
            lr = LEARNING_RATE * np.power(0.25, (int(epoch / 6)))
            train_loss = train(model, device, train_loader, epoch, PRINT_INTERVAL)
            test_loss = test(model, device, test_loader)
            train_losses.append((epoch, train_loss))
            test_losses.append((epoch, test_loss))
    except KeyboardInterrupt as ke:
        print('Interrupted')
    except:
        import traceback
        traceback.print_exc()
    finally:
        fig, (axis1, axis2) = plt.subplots(2)
        ep, val = zip(*train_losses)
        axis1.plot(ep, val)
        axis1.set(title="Train loss", xlabel="Epoch", ylabel="Error")
        ep, val = zip(*test_losses)
        axis2.plot(ep, val)
        axis2.set(title="Test loss", xlabel="Epoch", ylabel="Error")
        fig.tight_layout()
        plt.show()
        return model, device

base_model, device = main()


## LSTM

In [53]:

class LSTMNet(nn.Module):
  def __init__(self, hidden_dim, input_size, output_size, num_layers=4):
    super(LSTMNet, self).__init__()

    self.input_size = input_size
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_dim, num_layers=num_layers, batch_first=True)
    self.decoder = nn.Linear(hidden_dim, output_size) 
    self.best_loss = 2**32

  def forward(self, x, hidden=None):
    x, hidden = self.lstm(x, hidden)
    x = self.decoder(x)
    return x, hidden

  def init_hidden(self, num_layers, batch_size):
    hidden = torch.zeros((num_layers, batch_size, self.hidden_dim)).cuda()
    cell = torch.zeros((num_layers, batch_size, self.hidden_dim)).cuda()
    return hidden, cell

  def inference(self, x, hidden_state=None, temperature=1):
    x, hidden = self.forward(x, hidden_state)
    x = x / max(temperature, 1e-20)
    return x, hidden

  def loss(self, prediction, label, reduction='mean'):
    loss_val = F.mse_loss(prediction, label, reduction=reduction)
    return loss_val
  
  def evaluate(self, x):
    hidden = self.init_hidden(self.num_layers, x.shape[0])
    pred, _ = self.forward(x, hidden)
    return pred




In [54]:
import tqdm
import math


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

def train(model, device, optimizer, train_loader, lr, epoch, log_interval):
    model.train()
    losses = []

    for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
        data, label = data.to(device), label.to(device)
        hidden = model.init_hidden(model.num_layers, data.shape[0])
        if hidden is not None:
            hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        output, hidden = model(data, hidden)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)


def test(model, device, test_loader):
    model.eval()
    test_loss = 0

    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(test_loader):
            hidden = model.init_hidden(model.num_layers, data.shape[0])
            data, label = data.to(device), label.to(device)
            output, hidden = model(data, hidden)
            loss = model.loss(output, label, reduction='mean').item()
            test_loss += loss

    test_loss /= len(test_loader)

    print('\nTest set: Average loss: {:.4f} \n'.format(
        test_loss))
    return test_loss

In [None]:
def main():
    INPUT_LENGTH = 12
    OFFSET = 12
    BATCH_SIZE = 128
    INPUT_SIZE = 19
    HIDDEN_SIZE = 512
    TEST_BATCH_SIZE = 128
    EPOCHS = 10
    OUTPUT_SIZE = 1
    LEARNING_RATE = 0.002
    WEIGHT_DECAY = 0.0005
    USE_CUDA = True
    PRINT_INTERVAL = 10
    LOG_PATH = DATA_PATH + 'logs/log.pkl'


    data_train = WeatherDataset(DATA_PATH + 'jena_train_climate_data.pkl', OFFSET, INPUT_LENGTH, 
                                output_labels=['T (degC)'])
    data_test = WeatherDataset(DATA_PATH + 'jena_test_climate_data.pkl', OFFSET, INPUT_LENGTH,
                               output_labels=['T (degC)'])

    use_cuda = USE_CUDA and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    import multiprocessing
    num_workers = multiprocessing.cpu_count()
    print('num workers:', num_workers)

    kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                               shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                              shuffle=True, **kwargs)

    model = LSTMNet(HIDDEN_SIZE, INPUT_SIZE, OUTPUT_SIZE).to(device)

    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    start_epoch = 0
    train_losses = []
    test_losses = []
    test_loss = test(model, device, test_loader)

    test_losses.append((start_epoch, test_loss))

    try:
        for epoch in range(start_epoch, EPOCHS + 1):
            lr = LEARNING_RATE * np.power(0.25, (int(epoch / 6)))
            train_loss = train(model, device, optimizer, train_loader, lr, epoch, PRINT_INTERVAL)
            test_loss = test(model, device, test_loader)
            train_losses.append((epoch, train_loss))
            test_losses.append((epoch, test_loss))
    except KeyboardInterrupt as ke:
        print('Interrupted')
    except:
        import traceback
        traceback.print_exc()
    finally:
        fig, (axis1, axis2) = plt.subplots(2)
        ep, val = zip(*train_losses)
        axis1.plot(ep, val)
        axis1.set(title="Train loss", xlabel="Epoch", ylabel="Error")
        ep, val = zip(*test_losses)
        axis2.plot(ep, val)
        axis2.set(title="Test loss", xlabel="Epoch", ylabel="Error")
        fig.tight_layout()
        plt.show()
        return model, device

lstm_model, device = main()


## GRU

In [56]:

class GRUNet(nn.Module):
  def __init__(self, input_size, feature_size, num_layers=4):
    super(GRUNet, self).__init__()

    self.feature_size = feature_size
    self.input_size = input_size
    self.num_layers = num_layers
    self.gru = nn.GRU(self.input_size, self.feature_size, num_layers=self.num_layers, 
                      batch_first=True)
    self.decoder = nn.Linear(self.feature_size, 1) 
    self.best_loss = 2**32

  def init_hidden(self, num_layers, batch_size):
    return torch.zeros((num_layers, batch_size, self.feature_size)).cuda()

  def forward(self, x, hidden_state=None):
    x, hidden = self.gru(x, hidden_state)
    x = self.decoder(x)
    return x, hidden

  def inference(self, x,  temperature=1):
    x = x.view(-1, 1)
    x = self.forward(x)
    x = x.view(-1, 1)
    x = x / max(temperature, 1e-20)
    return x

  def loss(self, prediction, label, reduction='mean'):
    loss_val = F.mse_loss(np.squeeze(prediction), np.squeeze(label), reduction=reduction)
    return loss_val

  def evaluate(self, x):
    hidden = self.init_hidden(self.num_layers, x.shape[0])
    pred, _ = self.forward(x, hidden)
    return pred



In [62]:

import tqdm
import math


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

def train(model, device, optimizer, train_loader, lr, epoch, log_interval):
    model.train()
    losses = []
    for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
        data, label = data.to(device), label.to(device)
        hidden = model.init_hidden(4, data.shape[0])
        if hidden is not None:
            hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        output, hidden = model(data, hidden)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(test_loader):
            hidden = model.init_hidden(4, data.shape[0])
            data, label = data.to(device), label.to(device)
            output, hidden = model(data, hidden)
            loss = model.loss(output, label, reduction='mean').item()
            test_loss += loss

    test_loss /= len(test_loader)

    print('\nTest set: Average loss: {:.4f} \n'.format(
        test_loss))
    return test_loss

In [None]:

def main():
    INPUT_LENGTH = 12
    OFFSET = 12
    BATCH_SIZE = 128
    INPUT_SIZE = 19
    FEATURE_SIZE = 512
    TEST_BATCH_SIZE = 128
    EPOCHS = 10
    LEARNING_RATE = 0.002
    WEIGHT_DECAY = 0.0005
    USE_CUDA = True
    PRINT_INTERVAL = 10


    data_train = WeatherDataset(DATA_PATH + 'jena_train_climate_data.pkl', OFFSET, INPUT_LENGTH,
                                output_labels=['T (degC)'])
    data_test = WeatherDataset(DATA_PATH + 'jena_test_climate_data.pkl', OFFSET, INPUT_LENGTH,
                               output_labels=['T (degC)'])

    use_cuda = USE_CUDA and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    import multiprocessing
    num_workers = multiprocessing.cpu_count()
    print('num workers:', num_workers)

    kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                               shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                              shuffle=True, **kwargs)

    model = GRUNet(INPUT_SIZE, FEATURE_SIZE).to(device)

    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    start_epoch = 0
    train_losses = []
    test_losses = []
    test_loss = test(model, device, test_loader)

    test_losses.append((start_epoch, test_loss))

    try:
        for epoch in range(start_epoch, EPOCHS + 1):
            lr = LEARNING_RATE * np.power(0.25, (int(epoch / 6)))
            train_loss = train(model, device, optimizer, train_loader, lr, epoch, PRINT_INTERVAL)
            test_loss = test(model, device, test_loader)
            train_losses.append((epoch, train_loss))
            test_losses.append((epoch, test_loss))
    except KeyboardInterrupt as ke:
        print('Interrupted')
    except:
        import traceback
        traceback.print_exc()
    finally:
        fig, (axis1, axis2) = plt.subplots(2)
        ep, val = zip(*train_losses)
        axis1.plot(ep, val)
        axis1.set(title="Train loss", xlabel="Epoch", ylabel="Error")
        ep, val = zip(*test_losses)
        axis2.plot(ep, val)
        axis2.set(title="Test loss", xlabel="Epoch", ylabel="Error")
        fig.tight_layout()
        fig.show()

        return model, device

gru_model, device = main()


## Transformer 

In [65]:
class TransformerNet(nn.Module):
  def __init__(self, input_size, num_layers=4, dropout=0.1):
    super(TransformerNet, self).__init__()

    self.input_size = input_size
    self.num_layers = num_layers

    self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.input_size, nhead=1, dropout=dropout)
    self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers)
    self.decoder = nn.Linear(self.input_size, 1) 
    self.best_loss = 2**32

    self.decoder.bias.data.zero_()
    self.decoder.weight.data.uniform_(-0.1, 0.1)

  def forward(self, x, mask):
    x = self.transformer(x, mask)
    x = self.decoder(x)
    return x

  def inference(self, x, mask, temperature=1):
    x = x.view(-1, 1)
    x = self.forward(x, mask)
    x = x.view(-1, 1)
    x = x / max(temperature, 1e-20)
    return x

  def loss(self, prediction, label, reduction='mean'):
    loss_val = F.mse_loss(np.squeeze(prediction), np.squeeze(label), reduction=reduction)
    return loss_val

  def evaluate(self, x):
    mask = nn.Transformer.generate_square_subsequent_mask(x.shape[0]).cuda()
    return self.forward(x, mask)


In [64]:
import tqdm
import math


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

def train(model, device, optimizer, train_loader, lr, epoch, log_interval, batch_size):
    model.train()
    losses = []
    mask = nn.Transformer.generate_square_subsequent_mask(batch_size).to(device)
    for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
        data, label = data.to(device), label.to(device)
        if data.shape[0] != batch_size:
          mask = mask[:data.shape[0],:data.shape[0]]
        optimizer.zero_grad()
        output = model(data, mask)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)


def test(model, device, test_loader, batch_size):
    model.eval()
    test_loss = 0
    mask = nn.Transformer.generate_square_subsequent_mask(batch_size).to(device)
    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(test_loader):
            data, label = data.to(device), label.to(device)
            if data.shape[0] != batch_size:
              mask = mask[:data.shape[0], :data.shape[0]]
            output = model(data, mask)
            loss = model.loss(output, label, reduction='mean').item()
            test_loss += loss

    test_loss /= len(test_loader)

    print('\nTest set: Average loss: {:.4f} \n'.format(
        test_loss))
    return test_loss

In [None]:
def main():
    INPUT_LENGTH = 12
    OFFSET = 12
    NUM_LAYERS = 4
    BATCH_SIZE = 128
    INPUT_SIZE = 19
    TEST_BATCH_SIZE = 128
    EPOCHS = 10
    LEARNING_RATE = 0.002
    WEIGHT_DECAY = 0.0005
    USE_CUDA = True
    PRINT_INTERVAL = 10

    data_train = WeatherDataset(DATA_PATH + 'jena_train_climate_data.pkl', OFFSET, INPUT_LENGTH,
                                output_labels=['T (degC)'])
    data_test = WeatherDataset(DATA_PATH + 'jena_test_climate_data.pkl', OFFSET, INPUT_LENGTH,
                                output_labels=['T (degC)'])

    use_cuda = USE_CUDA and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    import multiprocessing
    num_workers = multiprocessing.cpu_count()
    print('num workers:', num_workers)

    kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                               shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                              shuffle=True, **kwargs)

    model = TransformerNet(INPUT_SIZE, num_layers=NUM_LAYERS).to(device)

    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    start_epoch = 0
    train_losses = []
    test_losses = []
    test_loss = test(model, device, test_loader, TEST_BATCH_SIZE)

    test_losses.append((start_epoch, test_loss))

    try:
        for epoch in range(start_epoch, EPOCHS + 1):
            lr = LEARNING_RATE * np.power(0.25, (int(epoch / 6)))
            train_loss = train(model, device, optimizer, train_loader, lr, 
                               epoch, PRINT_INTERVAL, BATCH_SIZE)
            test_loss = test(model, device, test_loader, TEST_BATCH_SIZE)
            train_losses.append((epoch, train_loss))
            test_losses.append((epoch, test_loss))
    except KeyboardInterrupt as ke:
        print('Interrupted')
    except:
        import traceback
        traceback.print_exc()
    finally:
        fig, (axis1, axis2) = plt.subplots(2)
        ep, val = zip(*train_losses)
        axis1.plot(ep, val)
        axis1.set(title="Train loss", xlabel="Epoch", ylabel="Error")
        ep, val = zip(*test_losses)
        axis2.plot(ep, val)
        axis2.set(title="Test loss", xlabel="Epoch", ylabel="Error")
        fig.tight_layout()
        fig.show()
        return model, device

transformer_model, device = main()


## Neural Net

In [68]:

class NeuralNet(nn.Module):

  def __init__(self, input_size, hidden_size):
    super(NeuralNet, self).__init__()
    self.linear_stack = nn.Sequential(
        nn.Linear(input_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, 1)
    )

  def forward(self, x):
    return self.linear_stack(x)

  def loss(self, prediction, label, reduction='mean'):
    return F.mse_loss(prediction, label, reduction)

  def evaluate(self, x):
    return self.forward(x)


In [69]:
import tqdm
import math


def train(model, device, optimizer, train_loader, lr, epoch, log_interval):
    model.train()
    losses = []

    for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(test_loader):
            data, label = data.to(device), label.to(device)
            output = model(data)
            loss = model.loss(output, label, reduction='mean').item()
            test_loss += loss

    test_loss /= len(test_loader)

    print('\nTest set: Average loss: {:.4f} \n'.format(
        test_loss))
    return test_loss

In [None]:

def main():
    INPUT_LENGTH = 12
    OFFSET = 12
    BATCH_SIZE = 128
    INPUT_SIZE = 19
    HIDDEN_SIZE = 512
    TEST_BATCH_SIZE = 128
    EPOCHS = 10
    LEARNING_RATE = 0.002   
    WEIGHT_DECAY = 0.0005
    USE_CUDA = True
    PRINT_INTERVAL = 10

    data_train = WeatherDataset(DATA_PATH + 'jena_train_climate_data.pkl', OFFSET, INPUT_LENGTH,
                                output_labels=['T (degC)'])
    data_test = WeatherDataset(DATA_PATH + 'jena_test_climate_data.pkl', OFFSET, INPUT_LENGTH,
                                output_labels=['T (degC)'])

    use_cuda = USE_CUDA and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    import multiprocessing
    num_workers = multiprocessing.cpu_count()
    print('num workers:', num_workers)

    kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                               shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                              shuffle=True, **kwargs)

    model = NeuralNet(INPUT_SIZE, HIDDEN_SIZE).to(device)

    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    start_epoch = 0
    train_losses = []
    test_losses = []
    test_loss = test(model, device, test_loader)

    test_losses.append((start_epoch, test_loss))

    try:
        for epoch in range(start_epoch, EPOCHS + 1):
            lr = LEARNING_RATE * np.power(0.25, (int(epoch / 6)))
            train_loss = train(model, device, optimizer, train_loader, lr, 
                               epoch, PRINT_INTERVAL)
            test_loss = test(model, device, test_loader)
            train_losses.append((epoch, train_loss))
            test_losses.append((epoch, test_loss))
    except KeyboardInterrupt as ke:
        print('Interrupted')
    except:
        import traceback
        traceback.print_exc()
    finally:
        fig, (axis1, axis2) = plt.subplots(2)
        ep, val = zip(*train_losses)
        axis1.plot(ep, val)
        axis1.set(title="Train loss", xlabel="Epoch", ylabel="Error")
        ep, val = zip(*test_losses)
        axis2.plot(ep, val)
        axis2.set(title="Test loss", xlabel="Epoch", ylabel="Error")
        fig.tight_layout()
        fig.show()
        return model, device

neural_model, device = main()


## Evaluate Models

In [None]:
eval_offset = 12
eval_input = 12
USE_CUDA = True
data_eval = WeatherDataset(DATA_PATH + 'jena_eval_climate_data.pkl', eval_offset, eval_input,
                         output_labels=['T (degC)'])
sample_size = 1

use_cuda = USE_CUDA and torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")
print('Using device', device)
import multiprocessing
num_workers = multiprocessing.cpu_count()
print('num workers:', num_workers)

kwargs = {'num_workers': num_workers,
              'pin_memory': True} if use_cuda else {}

eval_loader = torch.utils.data.DataLoader(data_eval, batch_size=sample_size,
                                          shuffle=True, **kwargs)


def make_plot_data(input, target, input_labels, pred, input_length, offset):
  pred_start = input_length + offset
  pred_end = pred_start + input_length

  pred_x = np.arange(pred_start, pred_end)
  combined_x = np.append(np.arange(input_length), np.arange(pred_start, pred_end)).flatten()
  combined_y = np.append(input_labels.cpu(), target.cpu()).flatten()

  return pred_x, combined_x, combined_y


def plot_data(pred_x, pred_y, target, combined_x, 
              combined_y, title, xlabel, ylabel):
  plt.scatter(pred_x, target.cpu(), marker='o', label="Actual")
  plt.scatter(pred_x, pred_y.cpu(), marker='X', label="Prediction")
  plt.plot(combined_x, combined_y)
  plt.title(title)
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.legend()
  plt.show()


def evaluate_models(models, device, input_length, offset):
  data, label = next(iter(eval_loader))

  for i in range(data.shape[0]):
    input = data[i].to(device)
    target = label[i].to(device)
    temp_y = input[:, 1]
    input = input.view((1, input.shape[0], input.shape[1]))
    target = target.view((1, target.shape[0], target.shape[1]))
    with torch.no_grad():
      for key in models:
        model = models[key]
        pred = model.evaluate(input)
        print('Sample {}:\n'.format(i))
        pred_x, combined_x, combined_y = make_plot_data(input, target,
                                                      temp_y, pred, input_length, offset)
        plot_data(pred_x, pred, target, combined_x, combined_y, "Weather Prediction " + key,
                  "Hour", "Temperature (degC)")
        print(key + " Results")
        print('Eval loss: {:.4f} \n'.format(model.loss(pred, target)))
    
models = {"Base": BaseModel(), "LSTM": lstm_model, "GRU": gru_model, "Transformer": transformer_model, 
          "Neural Net": neural_model}
evaluate_models(models, device, eval_input, eval_offset)
 
