In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data_utils
import time
import warnings


warnings.filterwarnings("ignore", category=UserWarning) # Prevents Tanh warning messages 

Scaler = StandardScaler()


In [2]:
def create_xy(dataset, attribute_columns, target_column, delim, split_ratio,ditch_head=False):
    with open(dataset, 'r') as f:
        lines = f.readlines()
    if ditch_head:
        lines = lines[1:]
    X = []
    Y = []
    for line in lines:
        while len(line) > 0 and line[-1] == "\n":
            line = line[:len(line)-1]
        split_array = line.split(delim)
        all_columns = []
        for value in split_array:
            if value !="" and value !=" ":
                all_columns.append(value)
        if len(all_columns)==0:
            break
        point = []
        for i in attribute_columns:
            point.append(float(all_columns[i]))
        try:
            Y.append(float(all_columns[target_column]))
            X.append(point)
        except:
            pass
    X_arr = np.asarray(X)
    Scaler.fit(X_arr)
    X_arr = Scaler.transform(X_arr)
    Y_arr = np.asarray(Y)
    thresh = np.median(Y_arr)
    Y_arr_binary = np.where(Y_arr<=0,0,1)
    unique, counts = np.unique(Y_arr_binary, return_counts=True)
    x_train, x_test, y_train, y_test = train_test_split(X_arr, Y_arr_binary, test_size = split_ratio)
    return x_train, x_test, y_train, y_test, Y_arr, X_arr, Y_arr_binary

In [3]:
torch.manual_seed(111)
class Network(nn.Module):
    def __init__(self, indim):
        super(Network,self).__init__()
        self.l1 = nn.Linear(indim,100)
        self.l2 = nn.Linear(100,50)
        self.l3 = nn.Linear(50,9)
    
    def forward(self,x):
        x = F.leaky_relu(self.l1(x))
        x = F.leaky_relu(self.l2(x))
        x = self.l3(x)
        return x
    
    def penultimate(self, x):
        op = F.leaky_relu(self.l1(x))
        op = F.leaky_relu(self.l2(op))
        return op


In [4]:
# Loss and Accuracy Computation functions

def cumLaplaceDistribution(y_pred,mean,standard_deviation,all_qs):
    term1 = ((1-all_qs) * (y_pred - mean))/standard_deviation
    term1.clamp_(max = 0) # Prevents NaN - Only one of term 1 or 2 is used, whichever is -ve
    lesser_term = all_qs * torch.exp(term1)
    term2 = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
    term2.clamp_(max = 0) # Again, Prevents NaN
    greater_term = 1 - ((1-all_qs) * torch.exp(term2))
    mean_tensor = torch.ones_like(mean)
    y_mask = torch.div(y_pred,mean_tensor)
    y_mask[y_pred >= mean] = 1.0
    y_mask[y_pred < mean] = 0.0
    return ((1 - y_mask) * lesser_term )+  (y_mask * greater_term)


def logLikelihoodLoss(y_true,y_pred,mean,standard_deviation,all_qs):
    new_pred = y_pred
    prob = cumLaplaceDistribution(0.0,mean = new_pred,
                                  standard_deviation = standard_deviation,all_qs = all_qs)
    prob.clamp_(min = 1e-7,max = 1 - 1e-7)
    if_one = y_true * torch.log(1 - prob)
    if_zero = (1 - y_true) * torch.log(prob)
    final_loss = - 1 * torch.mean(if_one + if_zero)
    return final_loss

def customLoss(y_true, y_pred, mean, standard_deviation, all_qs, penalty):
    ind_losses = []
    for i,j in enumerate(all_qs):
        single_quantile_loss = logLikelihoodLoss(y_true[:,0],y_pred[:,i] ,
                                                 mean, standard_deviation, j)
        ind_losses.append(single_quantile_loss)
    zero = torch.Tensor([0]).to(device)
    dummy1 = y_pred[:,1:] - y_pred[:,:-1]
    dummy2 = penalty * torch.mean(torch.max(zero,-1.0 * dummy1))
    total_loss  = torch.mean(torch.stack(ind_losses)) +dummy2
    return total_loss

def customTestPred(y_pred,mean,standard_deviation,all_qs,batch_size = 1):
    acc = []
    cdfs = []
    val = (y_pred - mean)/standard_deviation 
    
    for xx in range(batch_size):
        if(y_pred < mean[xx]):
            lesser_term = all_qs * torch.exp((1.0 - all_qs) * torch.tensor(val[xx], dtype=torch.double)) 
            # Typecast above needed for some versions of torch
            lesser_term  = 1 - lesser_term
            cdfs.append(lesser_term.item())
            if(lesser_term.item() >= 0.5):
                acc.append([1])
            else:
                acc.append([0])
        
        elif(y_pred >= mean[xx]):
            greater_term = 1.0 - ((1.0-all_qs) * torch.exp(-1.0 * all_qs * torch.tensor(val[xx], dtype=torch.double)))
            # Typecast above needed for some versions of torch
            greater_term = 1 - greater_term
            cdfs.append(greater_term.item())
            if(greater_term.item() >= 0.5):
                acc.append([1])
            else:
                acc.append([0])
    return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def acc_tests(test_preds,test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,test_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = test_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,test_labels):
        if(i.item() == j[0]):
            count += 1
    return count/test_labels.shape[0]

In [5]:
def lr_schedule_combined_sgd(model, loader, batch_size):
    Kz = 0.0
    model.eval()
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op = model.penultimate(inputs)
            activ = np.linalg.norm(op.detach().cpu().numpy())
            if activ > Kz:
                Kz = activ
    
    factor = 1    
    K_ = (factor * Kz) / (batch_size)
    lr = 1 / K_
    return lr

In [6]:
def train(model,optimizer,loader,epochs, verbose=False):
    train_preds_Q = []
    train_labels = []
    model.train()
    
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
            
    acc_is_Q = acc_tests(train_preds_Q,train_labels)
    
    if verbose:
        print("[%d/%d] Train Acc Q : %f "%(epochs,total_epochs,acc_is_Q))    
    return acc_is_Q

def train_adaptive_lr(model,optimizer,loader, epochs, verbose=False):
    train_preds_Q = []
    train_labels = []
    lr_val = lr_schedule_combined_sgd(model, loader, batch_is)
    optimizer.param_groups[0]['lr'] = lr_val
    model.train()
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
    
    acc_is_Q = acc_tests(train_preds_Q,train_labels)
    if verbose:
        print("[%d/%d] Train Acc Q : %f "%(epochs,total_epochs,acc_is_Q))
    return acc_is_Q

def test(model,loader,epochs,verbose=False):
    model.eval()
    test_preds_Q = []
    test_preds_bce = []
    test_labels = []
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op_qs = model(inputs)
            
            for lag in op_qs[:,4].detach().reshape(-1,1):
                test_preds_Q.append(lag.item())
            for lag in labels.reshape(-1,1):
                test_labels.append(lag.item())
                
    acc_is_Q = acc_tests(test_preds_Q,test_labels)
    
    if verbose:
        print("[%d/%d] Test Acc Q : %f  "%(epochs,total_epochs,acc_is_Q))
    return acc_is_Q

In [7]:
def quantileCDF(x, tau):
    if x>0:
        return 1 - tau*np.exp((tau-1)*x)
    else:
        return (1 - tau)*np.exp(tau*x)

In [8]:
batch_is = 64
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic=True
print("Torch Device:",device)
torch.set_default_dtype(torch.double)

Torch Device: cpu


In [9]:
# General Control Parameters for the Quantile loss. Need not be changed
lr_is = 1e-1
mean_is = 0
std_is = 1
penalty = 1
alpha = 0.0

# Tau tensor
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
all_qs = all_qs.double()

In [10]:
# These control the dataset
dataset = '../Datasets/Classification/BankNote_Authentication.csv'
x_cols = list(range(4))
y_col = 4
separator = ","
remove_head = True
target_acc = 0.99

In [11]:
X_train,X_val,y_train,y_val, data_Y, data_X, all_classes = create_xy(dataset, x_cols, y_col, separator, 0.3,remove_head)
X_train = torch.Tensor(X_train)
y_train = torch.Tensor(y_train)
X_val = torch.Tensor(X_val)
y_val = torch.Tensor(y_val)
X_cov = torch.Tensor(data_X)
y_cov = torch.Tensor(data_Y)
cov_dataset = data_utils.TensorDataset(X_cov, y_cov)
cov_loader = data_utils.DataLoader(cov_dataset, batch_size =512, pin_memory=True,shuffle=False,num_workers = 1)

train_dataset = data_utils.TensorDataset(X_train, y_train)
test_dataset = data_utils.TensorDataset(X_val, y_val)
train_loader = data_utils.DataLoader(train_dataset, batch_size =128, pin_memory=True,shuffle=True,num_workers = 1)
test_loader = data_utils.DataLoader(test_dataset,batch_size =512,pin_memory=True,shuffle = False,num_workers = 1)

indim = X_train.shape[1]

model_fixed = Network(indim)
model_fixed = model_fixed.to(device)
optimizer_fixed = torch.optim.SGD(model_fixed.parameters(), lr = lr_is)

model_adapt = Network(indim)
model_adapt = model_adapt.to(device)
optimizer_adapt = torch.optim.SGD(model_adapt.parameters(), lr = lr_is)

acc_train_fixed = 0
acc_test_fixed = 0
acc_train_adapt = 0
acc_test_adapt = 0

epoch_count_fixed = 0
epoch_count_adapt = 0


epoch_count_adapt = 0
while (acc_train_adapt < target_acc):
    adapt_start = time.time()
    acc_train_adapt = train_adaptive_lr(model_adapt,optimizer_adapt,train_loader, epoch_count_adapt)
    adapt_end = time.time()
    if epoch_count_adapt==0:
        print("Epoch time: {:.4f}".format((adapt_end-adapt_start)/60))
    acc_test_adapt  = test(model_adapt,cov_loader,epoch_count_adapt)
    if epoch_count_adapt %50 == 0:
        print("Adapt epoch:", epoch_count_adapt+1, "{:.3f} {:.3f}".format(acc_train_adapt, acc_test_adapt))
    epoch_count_adapt +=1
print(acc_train_adapt, acc_test_adapt)

print()

while (acc_train_fixed < target_acc) and epoch_count_fixed<5000:
    fixed_start = time.time()
    acc_train_fixed = train(model_fixed,optimizer_fixed,train_loader, epoch_count_fixed)
    fixed_end = time.time()
    if epoch_count_fixed==0:
        print("Epoch time: {:.4f}".format((fixed_end-fixed_start)/60))
    acc_test_fixed  = test(model_fixed,cov_loader,epoch_count_fixed)
    if epoch_count_fixed %50 ==0:
        print("Fixed epoch:", epoch_count_fixed+1, "{:.3f} {:.3f}".format(acc_train_fixed, acc_test_fixed))
    epoch_count_fixed +=1
print(acc_train_fixed, acc_test_fixed)

Epoch time: 0.0253
Adapt epoch: 1 0.699 0.433
0.9916666666666667 0.9883381924198251

Epoch time: 0.0144
Fixed epoch: 1 0.640 0.638
Fixed epoch: 51 0.852 0.859
Fixed epoch: 101 0.904 0.910
Fixed epoch: 151 0.944 0.947
Fixed epoch: 201 0.957 0.959
Fixed epoch: 251 0.967 0.968
Fixed epoch: 301 0.973 0.973
Fixed epoch: 351 0.975 0.974
Fixed epoch: 401 0.976 0.976
Fixed epoch: 451 0.976 0.977
Fixed epoch: 501 0.977 0.977
Fixed epoch: 551 0.980 0.980
Fixed epoch: 601 0.983 0.984
Fixed epoch: 651 0.983 0.984
Fixed epoch: 701 0.983 0.984
Fixed epoch: 751 0.983 0.984
Fixed epoch: 801 0.983 0.984
Fixed epoch: 851 0.983 0.984
Fixed epoch: 901 0.988 0.988
Fixed epoch: 951 0.988 0.988
0.9916666666666667 0.9912536443148688


In [12]:
print("Adaptive iteration count:", epoch_count_adapt)
print("Fixed LR Iteration Count:", epoch_count_fixed)

Adaptive iteration count: 13
Fixed LR Iteration Count: 998


In [14]:
with torch.no_grad():
    all_preds = [[] for i in range(9)]
    test_labels = []
    for i,j in cov_loader:
        inputs,labels = i.to(device),j.to(device)
        op_qs = model_adapt(inputs)
        
        for itemset in op_qs.detach():
            for quant in range(9):
                all_preds[quant].append(itemset[quant].item())
            for lag in labels.reshape(-1,1):
                test_labels.append(lag.item())
    
    delta_total = [0,0,0,0,0]
    delta_misc = [0,0,0,0,0]
    for i in range(len(data_Y)):
        start = 4
        left = start
        right = start
        found = False
        count = 0
        medprob = quantileCDF(all_preds[start][i], 0.5)
        while (left>-1 and not found):
            q_left = all_preds[left][i]
            q_right = all_preds[right][i]
            p_left = quantileCDF(q_left, 0.5)
            p_right = quantileCDF(q_right, 0.5)
            left -=1
            right +=1
            if (q_left <= 0.5 and q_right>=0.5):
                found = True
            else:
                count +=1
        delta_total[count-1] +=1
        if (data_Y[i]==0 and medprob<=0.5) or (data_Y[i]==1 and medprob>0.5):
            correct_pred = True
        else:
            correct_pred = False
        if not correct_pred:
            delta_misc[count-1] +=1

In [17]:
delta_header = "Delta      |"
miscrate     = "Misc. Rate |"

for i in range(5):
    if delta_total[i] != 0:
        mr = delta_misc[i]/delta_total[i]
    else:
        mr = 0
    miscrate += "{:.2f}".format(mr) + " | "
    delta_header += "{:.2f}".format((0.1*(i+1)))+ " | "

In [18]:
print(delta_header)
print(miscrate)

Delta      |0.10 | 0.20 | 0.30 | 0.40 | 0.50 | 
Misc. Rate |0.72 | 0.17 | 0.07 | 0.02 | 0.00 | 
