In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch import optim
import torch.optim.lr_scheduler as lr_scheduler

In [None]:
def GEOdataLoader(path, bSize= 64):
    """
    Specify the path to the parent folder containing the .npy files
    """
    X_tr= np.load(os.path.join(path, "X_tr.npy"))
    Y_tr= np.load(os.path.join(path, "Y_tr.npy"))
    X_va= np.load(os.path.join(path, "X_va.npy"))
    Y_va= np.load(os.path.join(path, "Y_va.npy"))

    test_data= torch.utils.data.TensorDataset(torch.from_numpy(X_tr).float(), torch.from_numpy(Y_tr).float())
    val_data= torch.utils.data.TensorDataset(torch.from_numpy(X_va).float(), torch.from_numpy(Y_va).float())
    trainLoader= torch.utils.data.DataLoader(test_data, batch_size=bSize, shuffle=True) 
    valLoader= torch.utils.data.DataLoader(val_data, batch_size=bSize, shuffle=True) 
    return (trainLoader, valLoader)

In [None]:
# defining all the criterions to be used in the following experiments:
def tiltedLC(x, y, tau, h):
    e= y-x # errors
    ind= (torch.sign(e)+1)/2 # the division in the log-cosh is only about the origin
    quantFactor= (1-tau)*(1-ind) + tau*ind
    loss= quantFactor*torch.log(torch.cosh(e))
    loss= torch.mean(loss)
    return loss

class TiltedLC(nn.Module):
    def __init__(self):
        super(TiltedLC, self).__init__()
    def forward(self, x, y, tau, h):
        return tiltedLC(x, y, tau, h)


In [None]:
# global initialisations:
h= 0.4 # smoothing parameter for the log-cosh 
tau= 0.5
device= ('cuda' if torch.cuda.is_available() else 'cpu')
trainLoader, valLoader= GEOdataLoader("/home/aryamanj/Downloads/LGdata")
criterion1= TiltedLC()
criterion2= nn.L1Loss()
criterion3= nn.MSELoss()
N_EPOCHS= 500

In [None]:
# A new network class for LALR training, that supports returning penultimate activations
class LALRnetwork(nn.Module):
    def __init__(self, size1, size2, drop):
        super(LALRnetwork, self).__init__()
        self.l1= nn.Linear(943, size1)
        self.l2= nn.Dropout(p= drop)
        self.l3= nn.Linear(size1, size2)
        self.l4= nn.Dropout(p= drop)
        self.l5= nn.Linear(size2, 4760)

    def forward(self, x):
        x= F.tanh(self.l1(x))
        x= F.tanh(self.l3(self.l2(x)))
        x= self.l5(self.l4(x))
        return x
    
    def penU(self, x):
        x= F.tanh(self.l2(self.l1(x)))
        x= F.tanh(self.l4(self.l3(x)))
        return x


In [None]:
# instantiating objects for all constantLR tests:

size1,size2= 300,300
# LC initialisations
model_CLR_LC= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_CLR_LC= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_CLR_LC= []
valList_CLR_LC= []
model_LALR_LC= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_LALR_LC= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_LALR_LC= []
valList_LALR_LC= []
model_LBFGS_LC= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_LBFGS_LC= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_LBFGS_LC= []
valList_LBFGS_LC= []

# L1 initialisations
model_CLR_L1= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_CLR_L1= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_CLR_L1= []
valList_CLR_L1= []
model_LALR_L1= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_LALR_L1= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_LALR_L1= []
valList_LALR_L1= []
model_LBFGS_L1= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_LBFGS_L1= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_LBFGS_L1= []
valList_LBFGS_L1= []

# MSE initialisations:
model_CLR_MSE= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_CLR_MSE= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_CLR_MSE= []
valList_CLR_MSE= []
model_LALR_MSE= LALRnetwork(size1, size2, 0.1).to(device)
optimizer_LALR_MSE= optim.Adam(model_CLR_S.parameters(), lr= 0.1)
lossList_LALR_MSE= []
valList_LALR_MSE= []


In [None]:
# Training loops:
def trainConstantLR(model, optimizer, criterion, tau, epochs, ls_list, valList, loss_name):
    """
    Training loop used for constantLR
    """
    for epoch in range(epochs):
        epoch_loss= 0.0
        # training loop
        model.train()
        for inputs, labels in trainLoader: 
            inputs= inputs.to(device) 
            labels= labels.to(device)
            optimizer.zero_grad() 
            outputs= model(inputs) 
            if loss_name == "LC":
                loss= criterion(outputs, labels, tau, h) 
            else:
                loss= criterion(outputs, labels)
            loss.backward()
            optimizer.step() 
            epoch_loss+= loss.item()
        ls_list.append(epoch_loss/len(trainLoader))

        # validation loop
        val_loss= 0.0
        model.eval()
        for inputs, labels in valLoader:
            inputs= inputs.to(device)
            labels= labels.to(device)
            outputs= model(inputs)
            if loss_name == "LC":
                loss= criterion(outputs, labels, tau, h) 
            else:
                loss= criterion(outputs, labels)
            val_loss+= loss.item()
        valList.append(val_loss/len(valLoader))
        print("Epoch: {} Training loss: {} Validation loss: {}".format(epoch, epoch_loss/len(trainLoader), val_loss/len(valLoader)))

def trainLBFGS(model, optimizer, criterion, tau, epochs, ls_list, valList, loss_name):
    """
    Training loop used for LBFGS and conjugate gradient training
    """
    for epoch in range(epochs):
        epoch_loss= 0.0
        # training loop
        model.train()
        for inputs, labels in trainLoader: 
            inputs= inputs.to(device) 
            labels= labels.to(device)
            def closure():
                optimizer.zero_grad()
                outputs= model(inputs)
                if loss_name== "MSE":
                    loss= criterion(outputs, labels)
                else:
                    loss= criterion(outputs, labels, tau, h)
                loss.backward()
                return loss
            optimizer.step(closure) 
        # ls_list.append(epoch_loss/len(trainLoader))

        # validation loop
        val_loss= 0.0
        model.eval()
        for inputs, labels in testLoader:
            inputs= inputs.to(device)
            labels= labels.to(device)
            outputs= model(inputs)
            loss= torch.sqrt(criterion2(outputs, labels))
            val_loss+= loss.item()
        valList.append(val_loss/len(testLoader))
        print("Epoch: {} Training loss: {} Validation loss: {}".format(epoch, epoch_loss/len(trainLoader), val_loss/len(testLoader)))

def trainLALR(model,optimizer, criterion,  tau, epochs, ls_list, valList, loss_name):
    """
    Training loop used for LALR training
    """
    for epoch in range(epochs):
        epoch_loss= 0.0
        lr_val= computeLR(model, ls, bSize=16)
        optimizer.param_groups[0]['lr']= lr_val
        # training loop
        model.train()
        for inputs, labels in trainLoader: 
            inputs= inputs.to(device) 
            labels= labels.to(device)
            optimizer.zero_grad() 
            outputs= model(inputs) 
            if loss_name == "LC":
                loss= criterion(outputs, labels, tau, h) 
            else:
                loss= criterion(outputs, labels)
            loss.backward() 
            optimizer.step() 
            epoch_loss+= loss.item()
        ls_list.append(epoch_loss/len(trainLoader))

        # validation loop
        val_loss= 0.0
        model.eval()
        for inputs, labels in valLoader:
            inputs= inputs.to(device)
            labels= labels.to(device)
            outputs= model(inputs)
            if loss_name == "LC":
                loss= criterion(outputs, labels, tau, h) 
            else:
                loss= criterion(outputs, labels)
            val_loss+= loss.item()
        valList.append(val_loss/len(valLoader))
        print("Epoch: {} Training Loss: {} Validation loss: {} LR: {}".format(epoch, epoch_loss/len(trainLoader), val_loss/len(valLoader), optimizer.param_groups[0]['lr']))

In [None]:
# Learning rate computation functions:
def computeKa(x):
    maxNorm= 0.0
    for vector in x:
        if (maxNorm < torch.linalg.vector_norm(vector)):
            maxNorm= torch.linalg.vector_norm(vector)
    return maxNorm

def computeLR(model, ls, bSize= 16):
    """
    Takes in a network of the LALRnetwork class(during some arbitrary EPOCH of training) and the current input, and returns Kz for the EPOCH
    """
    Kz = 0.0
    Ka= 0.0
    Y= 0.0
    z_k= 0.0
    model.eval()
    with torch.no_grad():
        for i,j in enumerate(trainLoader):
            inputs,labels= j[0],j[1]
            inputs= inputs.to(device)
            labels= labels.to(device)
            op1= model.penU(inputs)
            op2= model(inputs)
            # first taking the max and min for each batch
            activ1, arg1= torch.max(op1, dim= 1)
            activ2, arg2= torch.min(op2, dim= 1)
            # now, we take the max and min across batches
            val1, indx1= torch.max(activ1, dim= 0)
            val2, indx2= torch.min(activ2, dim= 0)
            val3= computeKa(op2)
            val4= computeKa(labels)
            # print(indx, i)
            if val1 > Kz:
                # in the case of K_z, we do not need the index where the max occurs, hence only deal with the value
                Kz= val1 
            z_k= val2
            if val3 > Ka:
                Ka= val3
            if val3 > Y:
                Y= val4 
            argMin= arg2[indx2]

    LR= 1
    if ls == "LC":
        LR= (1/bSize)*torch.tanh(-op2[int(indx2)][int(argMin)])*Kz
    elif ls == "L1":
        LR= Kz/bSize
    elif ls == "MSE":
        LR= (1/bSize)*(Ka+Y)*Kz

    if LR==0:
        return 0.1
    return 1/LR

In [None]:
# ConstantLR training

In [None]:
trainConstantLR(model_CLR_LC, optimizer_CLR_LC, criterion1, tau, N_EPOCHS, lossList_CLR_LC, valList_CLR_LC, "LC"

In [None]:

In [ ]:
trainConstantLR(model_CLR_L1, optimizer_CLR_L1, criterion2, tau, N_EPOCHS, lossList_CLR_L1, valList_CLR_L1, "L1")

In [None]:
trainConstantLR(model_CLR_MSE, optimizer_CLR_MSE, criterion3, tau, N_EPOCHS, lossList_CLR_MSE, valList_CLR_MSE, "MSE"

In [None]:
# LALR training

In [None]:
trainLALR(model_LALR_LC, optimizer_LALR_LC, criterion1, tau, N_EPOCHS, lossList_LALR_LC, valList_LALR_LC, "LC")

In [None]:
trainLALR(model_LALR_L1, optimizer_LALR_L1, criterion2, tau, N_EPOCHS, lossList_LALR_L1, valList_LALR_L1, "L1")

In [None]:
trainLALR(model_LALR_MSE, optimizer_LALR_MSE, criterion3, tau, N_EPOCHS, lossList_LALR_MSE, valList_LALR_MSE, "MSE")In [ ]:

In [None]:
# LBFGS training:

In [None]:
trainLBFGS(model_LBFGS_LC, optimizer_LBFGS_LC, criterion1, tau, N_EPOCHS, lossList_LBFGS_LC, valList_LBFGS_LC, "LC")

In [None]:
trainLBFGS(model_LBFGS_L1, optimizer_LBFGS_L1, criterion2, tau, N_EPOCHS, lossList_LBFGS_L1, valList_LBFGS_L1, "L1")

In [None]:
trainLBFGS(model_LBFGS_MSE, optimizer_LBFGS_MSE, criterion3, tau, N_EPOCHS, lossList_LBFGS_MSE, valList_LBFGS_MSE, "MSE")In [ ]:

In [None]:
# Saving losses and models

In [None]:
A= np.asarray(lossList_CLR_LC)
B= np.asarray(lossList_LALR_LC)
C= np.asarray(lossList_LBFGS_LC)

A_= np.asarray(lossList_CLR_L1)
B_= np.asarray(lossList_LALR_L1)
C_= np.asarray(lossList_LBFGS_L1)

A__= np.asarray(lossList_CLR_MSE)
B__= np.asarray(lossList_LALR_MSE)
C__= np.asarray(lossList_LBFGS_MSE)

np.save("./D-GEX_checkpoint/LossLists/lossList_CLR_LC.npy", A)
np.save("./D-GEX_checkpoint/LossLists/lossList_LALR_LC.npy", B)
np.save("./D-GEX_checkpoint/LossLists/lossList_LBFGS_LC.npy", C)


np.save("./D-GEX_checkpoint/LossLists/lossList_CLR_L1.npy", A_)
np.save("./D-GEX_checkpoint/LossLists/lossList_LALR_L1.npy", B_)
np.save("./D-GEX_checkpoint/LossLists/lossList_LBFGS_L1.npy", C_)

np.save("./D-GEX_checkpoint/LossLists/lossList_CLR_MSE.npy", A__)
np.save("./D-GEX_checkpoint/LossLists/lossList_LALR_MSE.npy", B__)
np.save("./D-GEX_checkpoint/LossLists/lossList_LBFGS_MSE.npy", C__)

In [None]:
A= np.asarray(valList_CLR_LC)
B= np.asarray(valList_LALR_LC)
C= np.asarray(valList_LBFGS_LC)

A_= np.asarray(valList_CLR_L1)
B_= np.asarray(valList_LALR_L1)
C_= np.asarray(valList_LBFGS_L1)

A__= np.asarray(valList_CLR_MSE)
B__= np.asarray(valList_LALR_MSE)
C__= np.asarray(valList_LBFGS_MSE)

np.save("./D-GEX_checkpoint/ValLists/valList_CLR_LC.npy", A)
np.save("./D-GEX_checkpoint/ValLists/valList_LALR_LC.npy", B)
np.save("./D-GEX_checkpoint/ValLists/valList_LBFGS_LC.npy", C)


np.save("./D-GEX_checkpoint/ValLists/valList_CLR_L1.npy", A_)
np.save("./D-GEX_checkpoint/ValLists/valList_LALR_L1.npy", B_)
np.save("./D-GEX_checkpoint/ValLists/valList_LBFGS_L1.npy", C_)

np.save("./D-GEX_checkpoint/ValLists/valList_CLR_MSE.npy", A__)
np.save("./D-GEX_checkpoint/ValLists/valList_LALR_MSE.npy", B__)
np.save("./D-GEX_checkpoint/ValLists/valList_LBFGS_MSE.npy", C__)

In [None]:
torch.save(model_CLR_LC.state_dict(), "./D-GEX_checkpoint/model_params/CLR_LC.pt")
torch.save(model_LALR_LC.state_dict(), "./D-GEX_checkpoint/model_params/LALR_LC.pt")
torch.save(model_LBFGS_LC.state_dict(), "./D-GEX_checkpoint/model_params/LBFGS_LC.pt")

torch.save(model_CLR_L1.state_dict(), "./D-GEX_checkpoint/model_params/CLR_L1.pt")
torch.save(model_LALR_L1.state_dict(), "./D-GEX_checkpoint/model_params/LALR_L1.pt")
torch.save(model_LBFGS_L1.state_dict(), "./D-GEX_checkpoint/model_params/LBFGS_L1.pt")

torch.save(model_CLR_MSE.state_dict(), "./D-GEX_checkpoint/model_params/CLR_MSE.pt")
torch.save(model_LALR_MSE.state_dict(), "./D-GEX_checkpoint/model_params/LALR_MSE.pt")
torch.save(model_LBFGS_MSE.state_dict(), "./D-GEX_checkpoint/model_params/LBFGS_MSE.pt")