# Deep Learning Models etc

In [1]:
from IPython.display import HTML
from tqdm.notebook import tqdm
import import_ipynb
import random
from utils import MyDS
import math, time, os
import pandas as pd
from matplotlib import pyplot as plt
from pyarrow.parquet import ParquetFile
import pyarrow as pa

importing Jupyter notebook from utils.ipynb


In [2]:
from matplotlib import pyplot as plt

In [3]:
import numpy as np
import torch
from torch import nn
from torch import optim
from IPython import display
import psutil, gc
# utils.hide_toggle('Imports 2')

In [4]:
import psutil
def get_memory_usage():
    mem = psutil.virtual_memory()
    return f'{mem.used / 1024 ** 3} of {mem.total / 1024 ** 3}; {mem.available / 1024 ** 3} available'

Compute accuracy of predictions.

In [None]:
def accuracy(Net,X_test,y_test,verbose=True):
    Net.eval()
    m = X_test.shape[0]
    y_pred = Net(X_test)
    predicted = torch.max(y_pred, 1)[1]
    correct = (predicted == y_test).float().sum().item()
    if verbose: print(correct,m)
    accuracy = correct/m
    Net.train()
    return accuracy
# utils.hide_toggle('Function: accuracy')

In [None]:
def accuracy_variable(Net,data):
    step=0
    acc=0
    for (X,y) in data:
            y_pred = Net(X)
            step+=1
            acc+=accuracy(Net,X,y,verbose=False)
    a = acc/step
    return a

Generic training loop

In [None]:
def TrainVals(Net,X_vals,y_vals,epochs=20,lr=5e-2,Loss=nn.NLLLoss(),verbose=False,device='cpu',batch_size=32):
    #optimizer = optim.Adam(Net.parameters(),lr=lr)
    losses = []
    accs = []
    Net.to(device)
    N=X_vals.shape[0]
    n_batches=int(N/batch_size)
    for e in range(epochs):
        step=0
        tot_loss=0.0
        start_time = time.time()
        acc=0.0
        for idx in range(0,N,n_batches):
            if idx+32>1000: start,end=idx,N
            else: start,end=idx,idx+batch_size
            X,y=torch.tensor(X_vals[start:end],dtype=torch.float32),torch.tensor(y_vals[start:end],dtype=torch.int64)
            X=X.to(device)
            y=y.to(device)
            y_pred = Net(X)
            loss = Loss(y_pred,y)
            Net.optimizer.zero_grad()
            loss.backward()
            Net.optimizer.step()
            step+=1
            tot_loss+=loss
            if verbose: acc+=accuracy(Net,X,y,verbose=False)
        end_time = time.time()
        t = end_time-start_time
        l = tot_loss.item()/step
        if verbose:
            a = acc/step
            accs += [a]
        losses += [l]
        if verbose: 
            print('Epoch  % 2d Loss: %2.5e Accuracy: %2.5f Epoch Time: %2.5f' %(e,l,a,t))
            print(get_memory_usage())
            gc.collect()
            display.clear_output(wait=True)
    return Net,losses,accs
# utils.hide_toggle('Function Train')

Multi-layer perceptron with ReLU non-lineartities; for classification or regression.

In [None]:
class MLP(nn.Module):
    def __init__(self,dims=[5,3,2],task='classification',lr=1e-3,weight_decay=0):
        super(MLP,self).__init__()
        self.dims=dims
        self.n = len(self.dims)-1
        self.task=task
        self.layers=nn.ModuleList()
        for i in range(self.n-1):
            self.layers.append(nn.Linear(dims[i],dims[i+1]))
            self.layers.append(nn.ReLU())
        if task=='classification': 
            self.layers.append(nn.Linear(dims[i+1],dims[i+2]))
            self.layers.append(nn.LogSoftmax(dim=1))
        elif task=='regression': 
            self.layers.append(nn.Linear(dims[i+1],dims[i+2]))
            self.layers.append(nn.Linear(dims[i+2],1))
        else: self.layers.append(nn.Linear(dims[i+1],dims[i+2]))
        self.optimizer = optim.Adam(self.parameters(),lr=lr,weight_decay=weight_decay)
    def forward(self,x):
        for l in self.layers:
            x = l(x)
        return(x)
# utils.hide_toggle('Class MLP')

Recurrent network using RNN/LSTM

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size,lr):
        # This just calls the base class constructor
        super().__init__()
        # Neural network layers assigned as attributes of a Module subclass
        # have their parameters registered for training automatically.
        self.input_size=input_size
        self.rnn = torch.nn.RNN(input_size, hidden_size, nonlinearity='relu', batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, output_size)
        self.logsoft = nn.LogSoftmax(dim=-1)
        self.optimizer = optim.Adam(self.parameters(),lr=lr)
    def forward(self, x):
        # The RNN also returns its hidden state but we don't use it.
        # While the RNN can also take a hidden state as input, the RNN
        # gets passed a hidden state initialized with zeros by default.
        if self.input_size==1: x=x.unsqueeze(-1)
        h = self.rnn(x)[0]
        x = self.linear(h)
        x = self.logsoft(x)
        x=x[:,-1,:]
        return x

In [None]:
class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, lr):
        super().__init__()
        self.input_size=input_size
        self.lstm = torch.nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, output_size)
        self.logsoft = nn.LogSoftmax(dim=-1)
        self.optimizer = optim.Adam(self.parameters(),lr=lr)
    def forward(self, x):
        if self.input_size==1: x=x.unsqueeze(-1)
        h = self.lstm(x)[0]
        x = self.linear(h)
        x = self.logsoft(x)
        x = x[:,-1,:]
        return x
    def get_states_across_time(self, x):
        h_c = None
        h_list, c_list = list(), list()
        with torch.no_grad():
            for t in range(x.size(1)):
                h_c = self.lstm(x[:, [t], :], h_c)[1]
                h_list.append(h_c[0])
                c_list.append(h_c[1])
            h = torch.cat(h_list)
            c = torch.cat(c_list)
        return h, c

In [None]:
class Transformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size,lr,num_layers=1):
        # This just calls the base class constructor
        super().__init__()
        self.input_size = input_size
        self.d_model = hidden_size
        self.linear_1 = nn.Linear(self.input_size, self.d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.d_model, nhead=8, dim_feedforward=hidden_size)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.relu = nn.ReLU()
        self.linear_2 = nn.Linear(self.d_model, output_size)
        self.logsoft = nn.LogSoftmax(dim=-1)
        self.optimizer = optim.Adam(self.parameters(),lr=lr)
    def forward(self, x):
        if self.input_size==1: x=x.unsqueeze(-1)
        x = x.permute(1, 0, 2)
        x = self.linear_1(x)
        x = self.relu(x)
        #Positional Encoding
        max_len = 25
        position = torch.arange(25).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2) * (-np.math.log(10000.0) / self.d_model))
        pe = torch.zeros(max_len, 1, self.d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        x =x + pe[:x.size(0)]
        x = self.transformer_encoder(x)
        x = x.transpose(1, 0)
        x = x.max(1)[0]
        x = self.relu(x)
        x = self.linear_2(x)
        x = self.logsoft(x)
        return x

In [None]:
def Train(Net,data,epochs=20,lr=5e-2,Loss=nn.NLLLoss(),verbose=False,device='cpu',
          val_ds=None,plot_accs=False,plot_losses=False):
    #optimizer = optim.Adam(Net.parameters(),lr=lr)
    losses = []
    val_losses=[]
    accs = []
    val_accL=[]
    Net.to(device)
    for e in range(epochs):
        step=0
        tot_loss=0.0
        start_time = time.time()
        acc=0.0
        for (X,y) in data:
            X=X.to(device)
            y=y.to(device)
            y_pred = Net(X)
            loss = Loss(y_pred,y)
            Net.optimizer.zero_grad()
            loss.backward()
            Net.optimizer.step()
            step+=1
            tot_loss+=loss
            if verbose: acc+=accuracy(Net,X,y,verbose=False)
        end_time = time.time()
        t = end_time-start_time
        l = tot_loss.item()/step
        if verbose:
            a = acc/step
            accs += [a]
        losses += [l]
        if verbose: 
            display.clear_output(wait=True)
            print('Epoch  % 2d Loss: %2.5e Accuracy: %2.5f Epoch Time: %2.5f' %(e,l,a,t))
        if plot_accs and val_ds is not None:
            val_accL+=[accuracy(Net,val_ds.samples,val_ds.labels,verbose=False)]
            plt.plot(np.array(val_accL),color='red')
            plt.plot(np.array(accs),color='blue')
            plt.show()
        if plot_losses and val_ds is not None:
            val_losses+=[Loss(Net(torch.tensor(val_ds.samples)),torch.tensor(val_ds.labels)).detach().numpy()]
            plt.plot(val_losses,color='red')
            plt.plot(losses,color='blue')
            plt.show()
    return Net,losses,accs

In [None]:
def TrainInner(Net,data,epochs=20,lr=5e-2,Loss=nn.NLLLoss(),verbose=False,device='cpu',
                ds_test=None,checkpoint_path='/tmp//model.pth',optimizer=None):
    #optimizer = optim.Adam(Net.parameters(),lr=lr)
    if optimizer is None: optimizer=Net.optimizer
    losses = []
    accs = []
    val_accs=[]
    Net.to(device)
    for e in range(epochs):
        step=0
        tot_loss=0.0
        start_time = time.time()
        acc=0.0
        for (X,y) in data:
            X=X.to(device)
            y=y.to(device)
            y_pred = Net(X)
            loss = Loss(y_pred,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            step+=1
            tot_loss+=loss
            acc+=accuracy(Net,X,y,verbose=False)
        end_time = time.time()
        t = end_time-start_time
        l = tot_loss.item()/step
        a = acc/step
        accs += [a]
        losses += [l]
        if ds_test is not None:
            val_acc=accuracy(Net,ds_test.samples,ds_test.labels,verbose=False)
            val_accs+=[val_acc]
        if verbose: 
            print(f'Validation accuracy {val_acc} after epoch {e}')
            print(f'Saving model checkpoint after epoch {e} at {checkpoint_path}')
        if checkpoint_path is not None: torch.save(Net.state_dict(),checkpoint_path)
        if verbose: 
            print('Epoch  % 2d Loss: %2.5e Accuracy: %2.5f Epoch Time: %2.5f' %(e,l,a,t))
            # display.clear_output(wait=True)
    return Net,losses,accs,val_accs

In [None]:
class TrainingMonitor():
    def __init__(self,dir='/tmp/'):
        self.dir=dir
        os.system('rm /tmp/losses.csv /tmp/accs.csv /tmp/val_accs.csv')
        self.cols=['epoch','val']
        self.lossf=pd.DataFrame(columns=self.cols)
        self.accf=pd.DataFrame(columns=self.cols)
        self.val_accf=pd.DataFrame(columns=self.cols)
    def append(self,e,lossL=[],accL=[],val_accL=[]):
        losses=pd.DataFrame([[e,l] for l in lossL],columns=self.cols)
        accs=pd.DataFrame([[e,l] for l in accL],columns=self.cols)
        val_accs=pd.DataFrame([[e,l] for l in val_accL],columns=self.cols)
        self.lossf=pd.concat([self.lossf,losses])
        self.lossf.to_csv(self.dir+'losses.csv',index=False)
        self.accf=pd.concat([self.accf,accs])
        self.accf.to_csv(self.dir+'accs.csv',index=False)
        self.val_accf=pd.concat([self.val_accf,val_accs])
        self.val_accf.to_csv(self.dir+'val_accs.csv',index=False)
    def plot(dir='/tmp/',curve=['losses','accs','val_accs']):
        for c in curve:
            df=pd.read_csv(dir+c+'.csv')
            plt.plot(df.val.values,label=c)
        plt.legend(loc="best")
        plt.show()

In [None]:
def TrainCSV(net,filename,feature_names,target='target',batch_size=32,row_batch_size=1000,epochs=20,
                Loss=nn.NLLLoss(),verbose=True,device='cpu',txfTarget=lambda x:int(x*4),debug=False,
                checkpoint_path='/tmp/model.pth',validation_data=None,optimizer=None,monitor=None):
    # Shuffled data needs to be provided
    #Batches of rows
    ds_test=MyDS(validation_data[feature_names].values,validation_data[target].values,task='regression')
    # ds_test.labels=torch.LongTensor([int(l*4) for l in ds_test.labels])
    ds_test.labels=torch.LongTensor([txfTarget(l) for l in ds_test.labels])
    for e in range(epochs):
        row_batch_id=0
        chunksf=pd.read_csv(filename,chunksize=row_batch_size)
        chunksL=[]
        for chunkf in chunksf:
            chunksL+=[chunkf]
        for df in chunksL[::-1]:
            ds_train=MyDS(df[feature_names].values,df[target].values,task='regression')
            # ds_train.labels=torch.LongTensor([int(l*4) for l in ds_train.labels])
            ds_train.labels=torch.LongTensor([txfTarget(l) for l in ds_train.labels])
            dsloader = torch.utils.data.DataLoader(dataset=ds_train,batch_size=batch_size,shuffle=True)        
            net,losses,accs,val_accs=TrainInner(net,dsloader,epochs=1,verbose=False,ds_test=ds_test,
                               Loss=Loss,device=device,checkpoint_path=None,
                               optimizer=optimizer)
            if monitor is not None: 
                monitor.append(e,losses,accs,val_accs)
            row_batch_id+=1
        if ds_test is not None: val_acc=accuracy(net,ds_test.samples,ds_test.labels,verbose=False)
        if verbose: 
            print(f'Validation accuracy {val_acc} after epoch {e}')
            if checkpoint_path is not None: print(f'Saving model checkpoint after epoch {e} at {checkpoint_path}')
        if checkpoint_path is not None: torch.save(net.state_dict(),checkpoint_path)
    if ds_test is not None: val_acc=accuracy(net,ds_test.samples,ds_test.labels,verbose=False)
    print(f'Final validation accuracy {val_acc} after epoch {e}')
    return net

In [None]:
def TrainParquet(net,filename,feature_names,target='target',batch_size=32,row_batch_size=1000,epochs=20,
                Loss=nn.NLLLoss(),verbose=True,device='cpu',txfTarget=lambda x:int(x*4),debug=False,
                checkpoint_path='/tmp/model.pth',validation_data=None,optimizer=None,monitor=None):
    # Shuffled data needs to be provided
    ds_test=MyDS(validation_data[feature_names].values,validation_data[target].values,task='regression')
    ds_test.labels=torch.LongTensor([txfTarget(l) for l in ds_test.labels])
    for e in range(epochs):
        row_batch_id=0
        pf=ParquetFile(filename)
        chunks=pf.iter_batches(batch_size=row_batch_size,columns=feature_names+[target])
        # chunksf=pd.read_csv(filename,chunksize=row_batch_size)
        chunksL=[]
        for chunk in chunks:
            chunkf=pa.Table.from_batches([chunk]).to_pandas()
            chunksL+=[chunkf]
        for df in chunksL[::-1]:
            ds_train=MyDS(df[feature_names].values,df[target].values,task='regression')
            ds_train.labels=torch.LongTensor([txfTarget(l) for l in ds_train.labels])
            dsloader = torch.utils.data.DataLoader(dataset=ds_train,batch_size=batch_size,shuffle=True)        
            net,losses,accs,val_accs=TrainInner(net,dsloader,epochs=1,verbose=False,ds_test=ds_test,
                               Loss=Loss,device=device,checkpoint_path=None,
                               optimizer=optimizer)
            if monitor is not None: 
                monitor.append(e,losses,accs,val_accs)
            row_batch_id+=1
        if ds_test is not None: val_acc=accuracy(net,ds_test.samples,ds_test.labels,verbose=False)
        if verbose: 
            print(f'Validation accuracy {val_acc} after epoch {e}')
            if checkpoint_path is not None: print(f'Saving model checkpoint after epoch {e} at {checkpoint_path}')
        if checkpoint_path is not None: torch.save(net.state_dict(),checkpoint_path)
    if ds_test is not None: val_acc=accuracy(net,ds_test.samples,ds_test.labels,verbose=False)
    print(f'Final validation accuracy {val_acc} after epoch {e}')
    return net