# Training of DNNs with different weights, predictor sets, and cost functions


## Load necessary packages

In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import math
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torch.optim import lr_scheduler
import pytorchltr.loss
import time

# Import custom functions
import models

## Global definitions

In [None]:
# Change working directory
%cd '/path/to/working_directory'

In [20]:
# Define input data file
data_file = 'Sample_dat2.csv'

resp = 'Label'
mtype = ['full','weighted','ndcg']

# Possibilities are
# mtype[0]: 'full', 'eco' -> predictor sets
# mtype[1]: 'weighted', 'unweighted' -> observation weights
#                                       Note that option 'unweighted' expects a 'weighted' model object (output) from 
#                                       which training is continued
# mtype[3]: 'cel', 'ndcg' -> cost functions

# Define Features
if mtype[0] == 'full':
    feana=['TRI','TAVE_Summer','PREC_Winter','PREC_Summer','FCF',
       'SoilR','SoilF',
       'ForestQ95','NDVI','NDVI_SD',
       "Doy_sin","Doy_cos"]
    
elif mtype[0] == 'eco':
    feana=['TRI','TAVE_Summer','PREC_Winter','PREC_Summer','FCF',
           'SoilR','SoilF',
           'ForestQ95','NDVI','NDVI_SD']
    
# Define Output name
whole_mod='Model_' + mtype[0] + '_' + mtype[1] + '_' + mtype[2] + '.pth'

## Parameter dictionnary for Fitting    
# Main
params = {}

params['num_workers'] = 17
params['lr'] = {}
params['device'] = 'cuda' # shoud be 'cpu' if no CUDA-ready GPU is available.
params['num_filts'] = 380
params['batch_size'] = 250

   
if mtype[1] == 'unweighted':
    params['num_epochs'] = 100
else:
    params['num_epochs'] = 50

## Design DataLoader

In [21]:
class FullDataset(torch.utils.data.Dataset):
    
    def __init__(self, data_file, label, phase, feats):       
        """
        Args:
            data_file (string): Path to the csv file with annotations.
            label: which column should be selected as label
            phase: training or validation data?
        """
        full_frame = pd.read_csv(data_file,low_memory=False)

        meta_frame = full_frame[full_frame['Set'] == phase].copy()

        lisbet=list(range(0,len(meta_frame)))
        meta_frame.index=lisbet         
        
        feat_frame = meta_frame[feats]

        self.feat_frame = feat_frame
        self.labels = meta_frame[label].astype(int)
        
    def __len__(self):
        return len(self.feat_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        labels = np.array(self.labels.loc[idx])
        
        feats = np.array(self.feat_frame.loc[idx,:])

        return feats, labels

## Create the Datasets

In [22]:
# Training
dase_train = FullDataset(data_file = data_file, 
                         label = resp, 
                         phase ='train',
                         feats = feana)

# Test
dase_test = FullDataset(data_file = data_file, 
                        label = resp, 
                        phase = 'test',
                        feats = feana)

# Define dataset sizes to calculate loss
dataset_sizes = {'train': len(dase_train), 'test': len(dase_test)}

In [None]:
dataset_sizes

## Define balanced sampling for the DataLoader used for training

In [24]:
# Pull out label vector from training data
target = dase_train.labels.to_numpy()

# Create dictionnary with sampling weights for each label (inversely proportional to frequency)
cls=np.array(torch.unique(torch.tensor(target))).astype(int)
class_sample_count = np.array(torch.unique(torch.tensor(target), return_counts = True)[1])
weight = 1. / class_sample_count
weidict=dict(zip(cls,weight))

# Create WeightedRandomSampler
samples_weight = torch.from_numpy(np.array([weidict[t] for t in target])).double()

# Add definitions
params['sampler'] = WeightedRandomSampler(samples_weight, len(samples_weight))
params['num_classes'] = len(set(target))
params['freqs'] = 1/samples_weight

## Define dataloaders

In [25]:
if mtype[1] == 'weighted':
    dataloaders = {'train': DataLoader(dase_train, 
                                       batch_size=params['batch_size'], 
                                       num_workers=params['num_workers'], 
                                       shuffle=False,
                                       sampler = params['sampler']),
                  'test': DataLoader(dase_test, 
                                    batch_size=params['batch_size'], 
                                    num_workers=params['num_workers'], 
                                    shuffle=False)}
elif mtype[1] == 'unweighted':
    dataloaders = {'train': DataLoader(dase_train, 
                                   batch_size=params['batch_size'], 
                                   num_workers=params['num_workers'], 
                                   shuffle=True),
                  'test': DataLoader(dase_test, 
                                    batch_size=params['batch_size'], 
                                    num_workers=params['num_workers'], 
                                    shuffle=False)}

## Define the model

In [26]:
params['num_feats'] = len(feana)

if mtype[1] == 'weighted':
    model = models.SDMNet(in_features=params['num_feats'], num_classes=params['num_classes'], 
                      num_filts=params['num_filts']).to(params['device'])
elif mtype[1] == 'unweighted':
    mod_file = whole_mod.replace('unweighted', 'weighted')
    model=torch.load(mod_file)

## Define loss

In [27]:
if mtype[2] == 'cel':
     criterion = nn.CrossEntropyLoss()
    
elif mtype[2] == 'ndcg':
    criterion = pytorchltr.loss.LambdaNDCGLoss1()

## Define learning rates that change with epochs

In [28]:
# # Set up different learning rate
if mtype[1] == 'weighted':
    params['lr']['loc_lr'] = 0.5e-2
    params['lr']['base_lr'] = 0.25e-2
elif mtype[1] == 'unweighted':
    params['lr']['loc_lr'] = 0.5e-2
    params['lr']['base_lr'] = 0.25e-3  


## Define optimizer & scheduler

In [29]:
optimizer = torch.optim.SGD([{"params": model.feats.parameters(), "lr": params['lr']['loc_lr']},
                             {"params": model.class_emb.parameters(), "lr": params['lr']['loc_lr']}],
                             lr= params['lr']['base_lr'], momentum = 0.9)
    
# Define learing rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', threshold = 0.0005, factor=0.5, patience=15, verbose = True)

## The main model-fitting function

In [30]:
# Define a function to set up the training process
def train_model(model, criterion, optimizer, params):
    start = time.time()
    best_acc = 0.0
    
    num_epochs = params['num_epochs']
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for feats, labels in dataloaders[phase]: #feats,
                feats = feats.to(params['device'])
                labs = labels.to(params['device'])

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward                
                with torch.set_grad_enabled(phase == 'train'):

                    outputs = model(feats) 

                    if mtype[2] == "cel":

                        loss = criterion(outputs, labs) 

                    elif mtype[2] == "ndcg":

                        lab_prob = torch.gather(outputs,1,labs.view(len(labs),1))

                        mask = torch.ones_like(outputs).scatter_(1, labs.unsqueeze(1), 0.)
                        zero_prob = outputs[mask.bool()].view(outputs.size()[0], (outputs.size()[1]-1))                      

                        srted, indices = torch.sort(zero_prob, descending = True)
                        preds = torch.cat((lab_prob.view(len(labs),1),srted[:,0:500]),1)

                        n = torch.tensor(500).repeat(outputs.size()[0]).to(params['device'])

                        relev = torch.zeros(outputs.size()[0], 501).to(params['device'])
                        relev[:, 0] = 1

                        loss = criterion(preds, relev, n).mean()                                  

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()                       
                        optimizer.step()

                running_loss += loss.item() * feats.size(0)
                running_corrects += torch.sum(preds == labs)/dataset_sizes[phase]

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() 

            if phase == 'test':
                    scheduler.step(epoch_loss)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

    time_el = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_el // 60, time_el % 60))
    
    return model

## Fit the model

In [None]:
model = train_model(model, criterion, optimizer, params)

## Save

In [15]:
torch.save(model, whole_mod) 