# Top6

https://github.com/xduan7/UnoPytorch/blob/master/uno_pytorch.py<br>
https://github.com/xduan7/UnoPytorch/blob/master/networks/structures/response_net.py<br>
https://github.com/xduan7/UnoPytorch/blob/master/networks/functions/resp_func.py<br>
https://github.com/xduan7/UnoPytorch/blob/master/utils/datasets/drug_resp_dataset.py<br>

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import os
from pathlib import Path
import sys
from time import time
import numpy as np
import pandas as pd

import sklearn
from collections import OrderedDict
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt
import matplotlib.cm as cm

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

SEED = None

In [2]:
datadir = Path('../data/raw')

## Create mini top6 dataset

In [3]:
# data = pd.read_parquet(datadir/'uniq.top6.reg.parquet', engine='auto', columns=None)
# data = data.sample(frac=1.0, axis=0, random_state=SEED).reset_index(drop=True)
# print(data.shape)

# col_idx = data.nunique(dropna=True).values == 1  # col indexes to drop
# data = data.iloc[:, ~col_idx]
# print(data.shape)

# def subset(data, s):
#     if s <= 1.0:
#         data_size = int(data.shape[0]*s)
#     return data[:data_size]

# data = subset(data, s=0.4)
# print(data.shape)
# data.to_csv(datadir/'uniq.top6.reg.mini.csv', index=False)

## Load and pre-process dataset

In [5]:
# Load data
data = pd.read_csv(datadir/'uniq.top6.reg.mini.csv')
print(data.shape)

(113260, 3765)


In [6]:
df_tr, df_te = train_test_split(data, test_size=0.2)
df_tr = df_tr.reset_index(drop=True)
df_te = df_te.reset_index(drop=True)
print(df_tr.shape)
print(df_te.shape)

(90608, 3765)
(22652, 3765)


In [7]:
ytr, xtr = df_tr.iloc[:,0], df_tr.iloc[:,1:]
yte, xte = df_te.iloc[:,0], df_te.iloc[:,1:]

In [8]:
# Scale
col_names = xtr.columns

scaler = StandardScaler()
xtr = pd.DataFrame( scaler.fit_transform(xtr) ).astype(np.float32)
xte = pd.DataFrame( scaler.transform(xte) ).astype(np.float32)

xtr.columns = col_names
xte.columns = col_names

In [9]:
print(xtr.shape)
print(xte.shape)
print(ytr.shape)
print(yte.shape)

(90608, 3764)
(22652, 3764)
(90608,)
(22652,)


# Pytorch
https://nbviewer.jupyter.org/github/FraPochetti/KagglePlaygrounds/blob/master/NYC%20Taxi%20Fares%20Prediction.ipynb

In [10]:
import torch
from torch import nn, optim
from torch.optim import lr_scheduler

import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

In [11]:
x =  torch.Tensor([[1, 2, 3], [1, 2, 3]]).view(-1, 2)
y =  torch.Tensor([[2, 1]]).view(2, -1)

print('x.shape', x.shape)
print('y.shape', y.shape)

x.shape torch.Size([3, 2])
y.shape torch.Size([2, 1])


In [12]:
x

tensor([[1., 2.],
        [3., 1.],
        [2., 3.]])

In [13]:
y

tensor([[2.],
        [1.]])

In [14]:
torch.mm(x,y)

tensor([[4.],
        [7.],
        [7.]])

In [15]:
def get_model_device(model):
    return str(next(model.parameters()).device)

In [16]:
def r2_torch(y_true, y_pred):
    epsilon = 1e-7  # this epsilon value used in TF
    SS_res = torch.sum( (y_true - y_pred)**2 )
    SS_tot = torch.sum( (y_true - torch.mean(y_true))**2 )
    r2 = 1 - SS_res / (SS_tot + epsilon)
    return r2

In [17]:
# Convert pandas df to torch tensors
# xtr, ytr, xte, yte = map(torch.tensor(dtype=torch.float32), (xtr.values, ytr.values, xte.values, yte.values))

def np_to_tensor(a, dtype=torch.float32):
    return torch.tensor(a, dtype=dtype)

xtr = np_to_tensor(xtr.values)
ytr = np_to_tensor(ytr.values)
xte = np_to_tensor(xte.values)
yte = np_to_tensor(yte.values)

print(type(xtr))
print(xtr.dtype)

<class 'torch.Tensor'>
torch.float32


## Define dataset

In [18]:
class Top6DataReg(Dataset):
    # discuss.pytorch.org/t/data-processing-as-a-batch-way/14154
    # github.com/utkuozbulak/pytorch-custom-dataset-examples#incorporating-pandas
    # nbviewer.jupyter.org/github/FraPochetti/KagglePlaygrounds/blob/master/NYC%20Taxi%20Fares%20Prediction.ipynb
    
    def __init__(self, xdata, ydata):
        # self.x = xdata.values
        # self.y = ydata.values
        self.x = xdata
        self.y = ydata
        self.y = self.y.view(-1, 1)
        
    def __len__(self):
        return len(self.y)
        
    def __getitem__(self, idx):
        x = self.x[idx, :]
        y = self.y[idx]
        return x, y

In [19]:
tr_ds = Top6DataReg(xdata=xtr, ydata=ytr)
te_ds = Top6DataReg(xdata=xte, ydata=yte)

## Define data loaders

In [20]:
# Define data loaders
batch_size = 32
num_workers = 1
tr_loader_prms = {'batch_size': batch_size, 'shuffle': True, 'num_workers': num_workers}
te_loader_prms = {'batch_size': 4*batch_size, 'shuffle': False, 'num_workers': num_workers}

tr_loader = DataLoader(tr_ds, **tr_loader_prms)
te_loader = DataLoader(te_ds, **te_loader_prms)

In [21]:
# xb, yb = next(iter(tr_loader))
# print(xb.shape)
# print(yb.shape)
# print(xb[:2])
# print(yb[:2])

## Define network

In [22]:
# class TORCH_REGRESSOR(nn.Module):
#     def __init__(self, input_dim):
#         super().__init__()
#         self.fc1 = nn.Linear(input_dim, 1000)
#         self.fc2 = nn.Linear(1000, 1000)
#         self.fc3 = nn.Linear(1000, 500)
#         self.fc4 = nn.Linear(500, 250)
#         self.fc5 = nn.Linear(250, 125)
#         self.fc6 = nn.Linear(125, 60)
#         self.fc7 = nn.Linear(60, 30)
#         self.fc8 = nn.Linear(30, 1)
#         self.dropout = nn.Dropout(0.2)
        
#     def forward(self, x):
#         x = self.dropout(F.relu(self.fc1(x)))
#         x = self.dropout(F.relu(self.fc2(x)))
#         x = self.dropout(F.relu(self.fc3(x)))
#         x = self.dropout(F.relu(self.fc4(x)))
#         x = self.dropout(F.relu(self.fc5(x)))
#         x = self.dropout(F.relu(self.fc6(x)))
#         x = self.dropout(F.relu(self.fc7(x)))
#         x = F.relu(self.fc8(x))
#         return x

In [23]:
class TORCH_REGRESSOR(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 1000)
        self.fc2 = nn.Linear(1000, 500)
        self.fc3 = nn.Linear(500, 250)
        self.fc4 = nn.Linear(250, 60)
        self.fc5 = nn.Linear(60, 1)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = F.relu(self.fc5(x))
        return x

### CUDA

In [24]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")
# print(device)

In [25]:
# pytorch.org/docs/stable/cuda.html
# towardsdatascience.com/speed-up-your-algorithms-part-1-pytorch-56d8a4ae7051
print('is_available:  ', torch.cuda.is_available())
print('device_name:   ', torch.cuda.get_device_name(0))
print('device_count:  ', torch.cuda.device_count())
print('current_device:', torch.cuda.current_device())

is_available:   True
device_name:    GeForce RTX 2080 Ti
device_count:   4
current_device: 0


In [26]:
device = torch.device('cuda:3')
model = TORCH_REGRESSOR(input_dim=tr_ds.x.shape[1]).to(device=device) # send model to gpu/cpu device
print(get_model_device(model))
print('current_device:', torch.cuda.current_device()) # why current device is 0??

cuda:3
current_device: 0


In [23]:
if device.type == 'cuda':
    print(get_model_device(model))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3, 1), 'GB')

cuda:2
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [24]:
# Choose cuda device with context manager
with torch.cuda.device(2):
    print('\ncurrent_device:', torch.cuda.current_device())
    model = TORCH_REGRESSOR(input_dim=tr_ds.x.shape[1]).to(device=device)
print('current_device:', torch.cuda.current_device())


current_device: 2
current_device: 0


In [25]:
loss_fnc = nn.MSELoss(reduction='mean')
opt = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)  # pytorch.org/docs/stable/optim.html

### Try a single training iteration

In [26]:
# xb, yb = next(iter(tr_loader))
# xb = xb.to(device)
# yb = yb.to(device)
# print(xb.device)
# print(yb.device)

# # Forward
# opt.zero_grad()
# pred = model(xb)

# print(f'pred.shape {pred.shape}')
# print(f'yb.shape {yb.shape}')
# yb = yb.view(pred.shape)
# print(f'yb.shape {yb.shape}\n')

# print('pred:\n', pred[:3])
# print('yb:\n', yb[:3])
# pred = pred.type(torch.float32)
# print('pred:\n', pred[:3])

# # Backprop
# loss = loss_fnc(pred, yb)
# loss.backward() # compute loss gradients wrt to model parameters and inputs
# opt.step()      # update model parameters;  pytorch.org/docs/stable/optim.html

In [27]:
# print(f'pred.shape {pred.shape}')
# print(f'yb.shape   {yb.shape}\n')
# mae = torch.abs(pred - yb)
# r2_torch(y_true=yb, y_pred=pred)

## Training loop

In [26]:
# discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
# groups.google.com/forum/#!topic/torch7/CkB57025yRY
# torch.backends.cudnn.benchmark = True

In [27]:
def proc_batch(xb, yb, model, loss_fnc, opt=None):
    pred = model(xb)
    loss = loss_fnc(pred, yb)
    
    # Backward pass
    if opt is not None:
        # opt.zero_grad()
        loss.backward()
        opt.step()
        
    return loss, pred

In [28]:
def calc_metrics(pred, yb, scores, val=False, metrics=None):
    pred, yb = pred.numpy(), yb.numpy()
    prfx = 'val_' if val is True else ''
    
    for m in metrics:
        if m in ['mae', 'mean_absolute_error']:
            scores[prfx + 'mean_abs_err'] = sklearn.metrics.mean_absolute_error(yb, pred)
            
        elif m in ['r2', 'r2_score']:
            scores[prfx + 'r2'] = sklearn.metrics.r2_score(yb, pred)
            
        elif m in ['median_absolute_error']:
            scores[prfx + 'median_abs_err'] = sklearn.metrics.median_absolute_error(yb, pred)
            
        elif m in ['mean_squared_error']:
            scores[prfx + 'mean_squared_error'] = sklearn.metrics.mean_squared_error(yb, pred)
        
    return scores

In [29]:
def log_metrics(scores, ph):
    """ Similar to the history of keras. """
    prefix = 'val_' if ph=='val' else ''
    logs[prefix + 'loss'] = bt_loss
    logs[prefix + 'mae'] = bt_mae
    logs[prefix + 'r2'] = bt_r2
    return logs

In [34]:
# def fit(model: nn.Module,
#         loss_fnc: 
#         opt: torch.optim,
#         tr_dl: torch.utils.data.DataLoader,
#         vl_dl: torch.utils.data.DataLoader=None,
#         epochs: int=1,
#         device: torch.device='cuda:0',
#         verbose: bool=True,
#         metrics=None) -> dict:
#     # github.com/stared/livelossplot/blob/master/examples/pytorch.ipynb
    
#     print(f'device: {device}')
#     model.to(device)  
#     logs = OrderedDict()
    
#     for ep in range(epochs):
#         ep_t0 = time()
        
#         model.train()
#         bt_loss, bt_mae, bt_r2 = 0, 0, 0
#         for xb, yb in tr_dl:
#             xb = xb.to(device)
#             yb = yb.to(device)
#             loss, pred = proc_batch(xb, yb, model, loss_fnc, opt=opt)
#             # logs = calc_metrics(yb, pred, logs, phase=False, metrics=metrics)
#             # logs = pd.DataFrame(logs)
            
#             pred, yb = pred.numpy(), yb.numpy()
#             bt_mae += sklearn.metrics.mean_absolute_error(yb, pred)
#             bt_r2 += sklearn.metrics.r2_score(yb, pred)
            
#         # logs = logs.sum(axis=0)/len(dl)
        
#         bt_loss /= len(dl)
#         bt_mae /= len(dl)
#         bt_r2 /= len(dl)

#         # Log metrics
#         prefix = ''
#         logs[prefix + 'loss'] = bt_loss
#         logs[prefix + 'mae'] = bt_mae
#         logs[prefix + 'r2'] = bt_r2        
        
#         if vl_dl is not None:
#             model.eval()
#             bt_loss, bt_mae, bt_r2 = 0, 0, 0
#             with torch.no_grad():
#                 for xb, yb in vl_dl:
#                     xb = xb.to(device)
#                     yb = yb.to(device)
#                     loss, pred = proc_batch(xb, yb, model, loss_fnc, opt=None)
#                     # logs = calc_metrics(yb, pred, logs, val=True, metrics=metrics)
                
#                     pred, yb = pred.numpy(), yb.numpy()
#                     bt_mae += sklearn.metrics.mean_absolute_error(yb, pred)
#                     bt_r2 += sklearn.metrics.r2_score(yb, pred)
  
#                 bt_loss /= len(dl)
#                 bt_mae /= len(dl)
#                 bt_r2 /= len(dl) 

#                 # Log metrics
#                 prefix = 'val_'
#                 logs[prefix + 'loss'] = bt_loss
#                 logs[prefix + 'mae'] = bt_mae
#                 logs[prefix + 'r2'] = bt_r2            
        
#         if verbose:
#             print(f'Epoch {ep+1}/{epochs}; ',
#                   f'{int(time()-ep_t0)}s; ',
#                   [f'{k}: {v:.3f}' for k, v in logs.items()])
        
#     return logs

In [27]:
# -------------------------------
# Using phases = ['train', 'val']
# -------------------------------
def fit(model: nn.Module,
        loss_fnc, 
        opt: torch.optim,
        tr_dl: torch.utils.data.DataLoader,
        vl_dl: torch.utils.data.DataLoader=None,
        epochs: int=1,
        device: torch.device='cuda:0',
        verbose: bool=True,
        metrics=None) -> dict:
    """ ... """
    print(f'\ndevice: {device}')
    model.to(device)  
    
    with torch.cuda.device(device):
        print('current_device:', torch.cuda.current_device())    
    
        # Similar to keras `history`
        logs = OrderedDict()

        for ep in range(epochs):
            ep_t0 = time()
            phases = ['train', 'val'] if vl_dl is not None else ['train']

            for ph in phases:
                if ph == 'train':
                    model.train()
                    dl = tr_dl
                elif ph == 'val':
                    model.eval()
                    dl = vl_dl

                bt_loss = 0
                bt_mae = 0
                bt_r2 = 0

                for xb, yb in dl:
                    xb = xb.to(device)
                    yb = yb.to(device)

                    # Zero parameter gradients
                    opt.zero_grad()

                    with torch.set_grad_enabled(ph=='train'):
                        input_opt = opt if ph=='train' else None
                        loss, pred = proc_batch(xb, yb, model, loss_fnc, opt=input_opt)

                        # Compute metrics
                        # logs = calc_metrics(yb, pred, logs, phase=False, metrics=metrics)
                        bt_loss += loss.item() # item() returns a number from a tensor that contains a single value
                        bt_mae += torch.mean(torch.abs(pred-yb)).item()
                        bt_r2 += r2_torch(y_true=yb, y_pred=pred).item()

                        # Gives error: RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.
                        # pred, yb = pred.numpy(), yb.numpy()
                        # bt_mae += np.mean(np.abs(pred-yb))
                        # bt_r2 += r2_score(y_true=yb, y_pred=pred)

                bt_loss /= len(dl)
                bt_mae /= len(dl)
                bt_r2 /= len(dl)          

                # Log metrics
                # logs = log_metrics(scores, ph)
                prefix = 'val_' if ph=='val' else ''
                logs[prefix + 'loss'] = bt_loss
                logs[prefix + 'mae'] = bt_mae
                logs[prefix + 'r2'] = bt_r2

            if verbose:
#                 print(f'Epoch {ep+1}/{epochs}; ',
#                       f'{int(time()-ep_t0)}s; ',
#                       [f'{k}: {v:.3f}' for k, v in logs.items()])
                l = [f'{k}: {v:.3f}' for k, v in logs.items()]
                print(f'Epoch {ep+1}/{epochs}; ',
                      f'{int(time()-ep_t0)}s; ',
                      *l)                
    return logs

In [28]:
epochs = 50

tr_loss_list = []
tr_mae_list = []
tr_r2_list = []

te_loss_list = []
te_mae_list = []
te_r2_list = []

# logs = OrderedDict()

# Choose cuda device with context manager
with torch.cuda.device(device):
    print('\ncurrent_device:', torch.cuda.current_device())
    
    for ep in range(epochs):
        t0 = time()

        # Training loop
        model.train() # turns-on dropout for training
        tr_loss, tr_mae, tr_r2 = 0, 0, 0

        for xb, yb in tr_loader:
            xb = xb.to(device) # move data to gpu/cpu device
            yb = yb.to(device) # move data to gpu/cpu device
            
            # Feedforward
            pred = model(xb)
            loss = loss_fnc(pred, yb)

            # Backprop and optimization
            opt.zero_grad()
            loss.backward()   # compute loss gradients wrt to model parameters and inputs
            opt.step()  # update model parameters;  pytorch.org/docs/stable/optim.html
            
            # Compute metrics
            tr_loss += loss.item() # item() returns a number from a tensor that contains a single value
            tr_mae += torch.mean(torch.abs(pred-yb))
            tr_r2 += r2_torch(y_true=yb, y_pred=pred)

        tr_loss /= len(tr_loader)
        tr_mae /= len(tr_loader)
        tr_r2 /= len(tr_loader)

        tr_loss_list.append(tr_loss)
        tr_mae_list.append(tr_mae)
        tr_r2_list.append(tr_r2)

        del xb, yb


        # Validation loop
        model.eval()  # turn-off dropout in inferenece
        with torch.no_grad():
            te_loss, te_mae, te_r2 = 0, 0, 0

            for xb, yb in te_loader:
                xb = xb.to(device)
                yb = yb.to(device)
            
                # Feedforward
                pred = model(xb)
                loss = loss_fnc(pred, yb)

                # Compute metrics
                te_loss += loss.item() # item() returns a number from a tensor that contains a single value
                te_mae += torch.mean(torch.abs(pred-yb))
                te_r2 += r2_torch(y_true=yb, y_pred=pred)

            te_loss /= len(te_loader)
            te_mae /= len(te_loader)
            te_r2 /= len(te_loader)

        te_loss_list.append(te_loss)
        te_mae_list.append(te_mae)
        te_r2_list.append(te_r2)

        del xb, yb

        print(f'Epoch {ep+1}/{epochs}; ',
              f'{int(time()-t0)}s; '
              f'tr_loss: {tr_loss:.3f}; ',
              f'vl_loss: {te_loss:.3f}; ',
              f'tr_mae: {tr_mae:.3f}; ',
              f'vl_mae: {te_mae:.3f}; ',
              f'tr_r2: {tr_r2:.3f}; ',
              f'vl_r2: {te_r2:.3f}; ')


current_device: 2


NameError: name 'te_loss_list' is not defined

In [None]:
# print(f'Epoch {ep}/{epochs}; ',
#       f'{int(time()-t0):>10}s; ',
#       f'tr_loss: {tr_loss:.4f}; ',
#       f'vl_loss: {te_loss:.4f}; ',
#       f'tr_mae: {tr_mae:.4f}; ',
#       f'vl_mae: {te_mae:.4f}; ')

In [None]:
plt.plot(tr_mae_vec);
plt.plot(te_mae_vec);

In [33]:
tr_loss

0.03142730247974396

In [44]:
a = 1
b = 'hello'
l = [x for x in range(5)]
# print(f'{a}; {b}; {*[x for x in range(5)]}')
# print(f'{a}; {b}; {' '.join(l)}')
print(a, b, *l)

1 hello 0 1 2 3 4
