In [None]:
# Exercise_2
# main simple regression WHO data
# TODO**
# Names of group members: 
# Date:
# TODO**

In [None]:
#%% Load modules
import numpy as np
import pandas as pd
# import csv

# torch modules
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# plot module
import matplotlib.pyplot as plt

In [None]:
# %% CUDA for PyTorch
# Right at the beginning: check if a cuda compatible GPU is available in your computer. 
# If so, set device = cuda:0 which means that later all calculations will be performed on the graphics card. 
# If no GPU is available, the calculations will run on the CPU, which is also absolutely sufficient for the examples in these exercises.
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
#cudnn.benchmark = True

if device.type == 'cpu':
    device_num = 0
    print('No GPU available.')
else:
    device_num = torch.cuda.device_count()
    print('Device:', device, '-- Number of devices:', device_num)

In [None]:
# Mounting Google Drive locally 
from google.colab import drive
#drive.mount("/content/drive", force_remount=True)
drive.mount('/content/drive')
# you can also choose one of the other options to load data
# therefore see https://colab.research.google.com/notebooks/io.ipynb#scrollTo=u22w3BFiOveA

In [None]:
# %% read data
# path to WHO data
data_path = '/content/drive/My Drive/bda_lab/ex2/data/life-expectancy-who/Life Expectancy Data.csv'

# read csv sheet with pandas
df = pd.read_csv(data_path)

# drop each row where is nan data
df = df.dropna()
# drop each row where is not at least 21 not nan data
# df = df.dropna(thresh=21) # there are many nan in row 4 12 14, which can cause errors

# get numpy out of pandas dataframe
data = df.values
# data=df.to_numpy()

# get column names to see, which columns we have to extract as x and y
column_names = np.array(df.columns[:], dtype=np.str)

# TODO**
print('Dimension of the dataset:', )
# TODO**

In [None]:
# %% split in X and Y
# extract any feature you want as X 
# extract target values as Y
# TODO**
x = np.array(data[], dtype=np.float32)
y = np.array(data[], dtype=np.float32)
# TODO**
print('x shape:', x.shape)
print('y shape:', y.shape)

In [None]:
# %% normalize X and Y between (0,1). If multiple features in X are selected, each feature is normalized individually
scale_x = np.max(x, axis=0)
scale_y = np.max(y, axis=0)
x = x/scale_x
y = y/scale_y
print('Scale_x:',scale_x)
print('Scale_y:',scale_y)

In [None]:
# %% convert to torch tensors
# if tensors have only one dimension, an artificial dimension is created with unsqueeze (e.g. [10]->[10,1], so 1D->2D)
Y = torch.from_numpy(y)
Y = Y.float()
if len(Y.shape)==1:
    Y = Y.unsqueeze(dim=1)

X = torch.from_numpy(x)
X = X.float()
if len(X.shape)==1:
    X = X.unsqueeze(dim=1)

In [None]:
# %% Split dataset in training, validation and test tensors
# TODO**
prop_train = 
prop_val = 
prop_test = 
# TODO**

sample_num = {'all': X.shape[0], 
              'train': round(prop_train*X.shape[0]),
              'val': round(prop_val*X.shape[0]),
              'test': round(prop_test*X.shape[0])}

# idx shuffle
idx = np.random.choice(sample_num['all'], sample_num['all'], replace=False)
# assign idx to each sample
sample_idx = {'all': idx[:], 
              'train': idx[0:sample_num['train']],
              'val': idx[sample_num['train']:sample_num['train']+sample_num['val']],
              'test': idx[sample_num['train']+sample_num['val']:]}

# Create train data
X_train = X[sample_idx['train']]
Y_train = Y[sample_idx['train']]

# Create validation data
X_val = X[sample_idx['val']]
Y_val = Y[sample_idx['val']]

# Create test data
X_test = X[sample_idx['test']]
Y_test = Y[sample_idx['test']]


# %% Show data point
print('Input of first ten train Sample:', X_train[0:10])
print('Target of first ten train Sample:', Y_train[0:10])

In [None]:
#%% class of neural network 'RegressNet'
# set up layer and architecture of network in constructor __init__
# define operations on layer in forward pass method
class RegressNet(nn.Module):
    
    def __init__(self, inputSize, outputSize):
        super(RegressNet, self).__init__()
        self.fc1 = nn.Linear(inputSize, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, outputSize)
    
    def forward(self, x):
        # max pooling over (2, 2) window
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
#%% Specify network hyperparameter and create instance of RegressNet
# TODO**        
inputDim =
outputDim =

# Create instance of RegressNet
net = RegressNet(inputDim, outputDim)
# TODO**
print(net)

In [None]:
#%% Send tensors and networks to GPU (if you have one which supports cuda) for faster computations
X_train, Y_train = X_train.to(device), Y_train.to(device)
X_val, Y_val = X_val.to(device), Y_val.to(device)
X_test, Y_test = X_test.to(device), Y_test.to(device)

# The network itself must also be sent to the GPU. Either you write net = RegressNet() and then later net.to(device) or directly net = RegressNet().to(device)
# The latter option may have the advantage that the instance net is created directly on the GPU, whereas in variant 1 it must first be sent to the GPU.
if device_num>1:
    print("Let's use", device_num, "GPU's")
    net = nn.DataParallel(net)
net.to(device)

In [None]:
#%% Specify hyperparameter
# hyperparemter: num_epoch, num_lr, loss_func, optimizer
# how many epochs do we want to train?
# TODO** 
num_epoch = 
learn_rate = 
# TODO**
# Loss and optimizer
loss_func = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=learn_rate)

In [None]:
#%% Loss before training
# Compute loss of test data before training the network (with random weights)
Y_pred_train_before = net(X_train)
loss_train_before = loss_func(Y_pred_train_before, Y_train)
Y_pred_val_before = net(X_val)
loss_val_before = loss_func(Y_pred_val_before, Y_val)
Y_pred_test_before = net(X_test)
loss_test_before = loss_func(Y_pred_test_before, Y_test)

In [None]:
#%% Training
plt.figure() # monitor loss curve during training
# for loop over epochs
for epoch in range(num_epoch):
    # classical forward pass -> predict new output from train data
    Y_pred_train = net(X_train)
    # compute loss    
    loss_train = loss_func(Y_pred_train, Y_train)
    
    # Compute gradients
    optimizer.zero_grad()
    # Calling .backward() mutiple times accumulates the gradient (by addition) for each parameter. This is why you should call optimizer.zero_grad() after each .step() call
    # Note that following the first .backward call, a second call is only possible after you have performed another forward pass.
    loss_train.backward()
    # perform a parameter update based on the current gradient (stored in .grad attribute of a parameter)
    optimizer.step()
    
    # TODO**
    # forward pass for validation
    Y_pred_val = 
    loss_val = 
    # TODO**
    
    # plot train and val loss
    plt.scatter(epoch, loss_train.data.item(), color='b', s=10, marker='o')    
    plt.scatter(epoch, loss_val.data.item(), color='r', s=10, marker='o')
    
    # print message with actual losses
    print('Train Epoch: {}/{} ({:.0f}%)\ttrain_Loss: {:.6f}\tval_Loss: {:.6f}'.format(
    epoch+1, num_epoch, epoch/num_epoch*100, loss_train.item(), loss_val.item()))


# show training and validation loss    
plt.legend(['train-loss','val-loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.savefig('/content/drive/My Drive/bda_lab/ex2/results/who_loss.png')
#plt.show()

print('Train loss before training was:', loss_train_before.item())
print('Train loss after training is:', loss_train.item())
print('Val loss before training was:', loss_val_before.item())
print('Val loss after training is:', loss_val.item())

In [None]:
#%% Pred vs. Ref Figure Train/Val set
# plot the prediction against the reference for the train/val points
# if the prediction equals the reference the dots will appear at the 'perfect model' line
plt.figure()
plt.title('pred vs. ref: train/val points')
plt.scatter(Y_train.cpu().numpy(), Y_pred_train.cpu().detach().numpy(), color='b', s=5, marker='o')
plt.scatter(Y_val.cpu().numpy(), Y_pred_val.cpu().detach().numpy(), color='r', s=5, marker='o')
plt.scatter(Y_val.cpu().numpy(), Y_pred_val_before.cpu().detach().numpy(), color='m', s=5, marker='^')
plt.plot((0,1),(0,1), color='k')
plt.xlabel('reference')
plt.ylabel('prediction')
plt.legend(['perfect model', 'train-sample after tr','val-sample after tr', 'val-sample before tr'])
plt.xlim((0,1))
plt.ylim((0,1))
plt.savefig('/content/drive/My Drive/bda_lab/ex2/results/who_pred_vs_ref_val.png')

In [None]:
#%% Test results
# TODO**
# forward pass 
# Y_pred_test_oh is on the GPU, because net and X_test are on the GPU, but we want it on the CPU from now on.
Y_pred_test = 
loss_test = 
# TODO**
print('Test loss before training was:', loss_test_before.item())
print('Test loss after training is:', loss_test.item())

# Plot mean abs difference between prediction and reference
print('Mean abs difference:', np.mean(abs(Y_pred_test.cpu().detach().numpy()-Y_test.cpu().numpy()), axis=0)*scale_y, 'years')

In [None]:
#%% Pred vs. Ref Figure Test set
# plot the prediction against the reference for the test points
# if the prediction equals the reference the dots will appear at the 'perfect model' line
plt.figure()
plt.title('pred vs. ref: test points')
plt.scatter(Y_test.cpu().numpy(), Y_pred_test.cpu().detach().numpy(), color='g', s=5, marker='o')
plt.scatter(Y_test.cpu().numpy(), Y_pred_test_before.cpu().detach().numpy(), color='m', s=5, marker='^')
plt.plot((0,1),(0,1), color='k')
plt.xlabel('reference')
plt.ylabel('prediction')
plt.legend(['perfect model','test-sample after tr', 'test-sample before tr'])
plt.xlim((0,1))
plt.ylim((0,1))
plt.savefig('/content/drive/My Drive/bda_lab/ex2/results/who_pred_vs_ref_test.png')