This notebook contains code to compare the effect of label noise on BQR and BCE losses

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data_utils
import warnings

warnings.filterwarnings("ignore", category=UserWarning) 

Scaler = StandardScaler()

In [2]:
def create_xy(dataset, attribute_columns, target_column, delim, split_ratio, ditch_head=True):
    with open(dataset, 'r') as f:
        lines = f.readlines()
    if ditch_head:
        lines = lines[1:]
    X = []
    Y = []
    for line in lines:
        while len(line) > 0 and line[-1] == "\n":
            line = line[:len(line)-1]
        split_array = line.split(delim)
        all_columns = []
        for value in split_array:
            if value !="" and value !=" ":
                all_columns.append(value)
        if len(all_columns)==0:
            break
        point = []
        for i in attribute_columns:
            point.append(float(all_columns[i]))
        X.append(point)
        Y.append(float(all_columns[target_column]))
    X_arr = np.asarray(X)
    Scaler.fit(X_arr)
    X_arr = Scaler.transform(X_arr)
    Y_arr = np.asarray(Y)
    thresh = np.median(Y_arr)
    Y_arr_binary = np.where(Y_arr<=0,0,1)
    unique, counts = np.unique(Y_arr_binary, return_counts=True)
    x_train, x_test, y_train, y_test = train_test_split(X_arr, Y_arr_binary, test_size = split_ratio)
    return x_train, x_test, y_train, y_test, Y_arr, X_arr


In [3]:
# Flips 100*ratio% of the labels in target

def corrupt(target, ratio):
    result = target.copy()
    indices = np.random.choice(len(target),int(len(target)*ratio),replace=False)
    for i in indices:
        result[i] = np.abs(target[i]-1)    
    return result

In [4]:
torch.manual_seed(111)


class Network(nn.Module):
    def __init__(self, indim):
        super(Network,self).__init__()
        self.l1 = nn.Linear(indim,100)
        self.l2 = nn.Linear(100,10)
        self.l3 = nn.Linear(10,9)
    
    def forward(self,x):
        x = F.leaky_relu(self.l1(x))
        x = F.leaky_relu(self.l2(x))
        x = self.l3(x)
        return x


class BCENetwork(nn.Module):
    def __init__(self, indim):
        super(BCENetwork,self).__init__()
        self.l1 = nn.Linear(indim,100)
        self.l2 = nn.Linear(100,10)
        self.l3 = nn.Linear(10,1)
    
    def forward(self,x):
        x = F.leaky_relu(self.l1(x))
        x = F.leaky_relu(self.l2(x))
        x = F.sigmoid(self.l3(x))
        return x


In [5]:
# Loss and Accuracy Computation functions

def cumLaplaceDistribution(y_pred,mean,standard_deviation,all_qs):
    term1 = ((1-all_qs) * (y_pred - mean))/standard_deviation
    term1.clamp_(max = 0) # Prevents NaN - Only one of term 1 or 2 is used, whichever is -ve
    lesser_term = all_qs * torch.exp(term1)
    term2 = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
    term2.clamp_(max = 0) # Again, Prevents NaN
    greater_term = 1 - ((1-all_qs) * torch.exp(term2))
    mean_tensor = torch.ones_like(mean)
    y_mask = torch.div(y_pred,mean_tensor)
    y_mask[y_pred >= mean] = 1.0
    y_mask[y_pred < mean] = 0.0
    return ((1 - y_mask) * lesser_term )+  (y_mask * greater_term)


def logLikelihoodLoss(y_true,y_pred,mean,standard_deviation,all_qs):
    new_pred = y_pred
    prob = cumLaplaceDistribution(0.0,mean = new_pred,
                                  standard_deviation = standard_deviation,all_qs = all_qs)
    prob.clamp_(min = 1e-7,max = 1 - 1e-7)
    if_one = y_true * torch.log(1 - prob)
    if_zero = (1 - y_true) * torch.log(prob)
    final_loss = - 1 * torch.mean(if_one + if_zero)
    return final_loss

def customLoss(y_true, y_pred, mean, standard_deviation, all_qs, penalty):
    ind_losses = []
    for i,j in enumerate(all_qs):
        single_quantile_loss = logLikelihoodLoss(y_true[:,0],y_pred[:,i] ,
                                                 mean, standard_deviation, j)
        ind_losses.append(single_quantile_loss)
    zero = torch.Tensor([0]).to(device)
    dummy1 = y_pred[:,1:] - y_pred[:,:-1]
    dummy2 = penalty * torch.mean(torch.max(zero,-1.0 * dummy1))
    total_loss  = torch.mean(torch.stack(ind_losses)) +dummy2
    return total_loss

def customTestPred(y_pred,mean,standard_deviation,all_qs,batch_size = 1):
    acc = []
    cdfs = []
    val = (y_pred - mean)/standard_deviation 
    
    for xx in range(batch_size):
        if(y_pred < mean[xx]):
            lesser_term = all_qs * torch.exp((1.0 - all_qs) * torch.tensor(val[xx], dtype=torch.double)) 
            # Typecast above needed for some versions of torch
            lesser_term  = 1 - lesser_term
            cdfs.append(lesser_term.item())
            if(lesser_term.item() >= 0.5):
                acc.append([1])
            else:
                acc.append([0])
        
        elif(y_pred >= mean[xx]):
            greater_term = 1.0 - ((1.0-all_qs) * torch.exp(-1.0 * all_qs * torch.tensor(val[xx], dtype=torch.double)))
            # Typecast above needed for some versions of torch
            greater_term = 1 - greater_term
            cdfs.append(greater_term.item())
            if(greater_term.item() >= 0.5):
                acc.append([1])
            else:
                acc.append([0])
    return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def acc_tests(test_preds,test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,test_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = test_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,test_labels):
        if(i.item() == j[0]):
            count += 1
    return count/test_labels.shape[0]


def bce_test(preds, labels):
    count = 0
    for i,j in zip(preds,labels):
        if i < 0.5:
            prediction = 0
        else:
            prediction = 1
        if(prediction == j):
            count += 1
    return count/len(labels)

In [6]:
# Training and Testing Methods

def train(model,optimizer,loader,epochs, verbose=False):
    train_preds_Q = []
    train_labels = []
    model.train()
    
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
            
    acc_is_Q = acc_tests(train_preds_Q,train_labels)
    
    if verbose:
        print("[%d/%d] Train Acc Q : %f "%(epochs,total_epochs,acc_is_Q))
    return acc_is_Q

def test(model,loader,epochs,verbose=False):
    model.eval()
    test_preds_Q = []
    test_preds_bce = []
    test_labels = []
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op_qs = model(inputs)
            
            for lag in op_qs[:,4].detach().reshape(-1,1):
                test_preds_Q.append(lag.item())
            for lag in labels.reshape(-1,1):
                test_labels.append(lag.item())
                
    acc_is_Q = acc_tests(test_preds_Q,test_labels)
    
    if verbose:
        print("[%d/%d] Test Acc Q : %f  "%(epochs,total_epochs,acc_is_Q))
    return acc_is_Q

def train_bce(model,opt,loader,epochs,verbose=False):
    bce_loss = nn.BCELoss()
    model.train()
    preds = []
    true_labels = []
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        opt.zero_grad()
        op = model(inputs)
        loss = bce_loss(op,labels.reshape(-1,1))
        loss.backward()
        opt.step()
        for lag in op.detach().reshape(-1,1):
            preds.append(lag.item())
        for lag in labels.reshape(-1,1):
            true_labels.append(lag.item())
    acc = bce_test(preds, true_labels)
    if verbose:
        print("[%d/%d] Test Acc Q : %f  "%(epochs,total_epochs,acc))
    return acc

def test_bce(model,loader,epochs,verbose=False):
    model.eval()
    preds = []
    true_labels = []
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        op = model(inputs)
        for lag in op.detach().reshape(-1,1):
            preds.append(lag.item())
        for lag in labels.reshape(-1,1):
            true_labels.append(lag.item())
    acc = bce_test(preds, true_labels)
    if verbose:
        print("[%d/%d] Test Acc Q : %f  "%(epochs,total_epochs,acc))
    return acc



In [7]:
def quantileCDF(x, tau):
    if x>0:
        return 1 - tau*np.exp((tau-1)*x)
    else:
        return (1 - tau)*np.exp(tau*x)


In [9]:
batch_is = 64
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic=True
print("Torch Device:",device)
torch.set_default_dtype(torch.double)

Torch Device: cpu


In [10]:
# Adjust the dataset details here. Refer the dataset_params.txt file for the specifics of each dataset
dataset = '../Datasets/Classification/WBC.csv'
x_cols = list(range(1,10))
y_col = 10
separator = ","
remove_head = True
split_ratio = 0.2

In [11]:
# General Control Parameters for the Quantile loss. Need not be changed
lr_is = 1e-2
mean_is = 0
std_is = 1
penalty = 1
alpha = 0.0

# Tau tensor
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
all_qs = all_qs.double()

In [12]:
# Experiment Control Parameters
total_runs = 10    # Number of times to run the experiment
total_epochs = 20  # No of training epochs per run
verbosity = False  # Toggle verbose training

In [13]:
nr = [0,0.1,0.2,0.3,0.4] # Noise Ratios
total_epochs = 20
iterations = 10
median_average = []
bce_average = []
pred_range_average = []

for noise_ratio in nr:
    print("Noise Ratio",noise_ratio)
    bce_total = 0
    median_total = 0
    pred_delta_total = 0
    
    for iter in range(iterations):
        print("Iteration:",iter+1)
        X_train,X_val,y_train_legit,y_val, data_Y, data_X = create_xy(dataset, x_cols, y_col, ",", split_ratio)
        y_train = corrupt(y_train_legit, noise_ratio)
        X_train = torch.Tensor(X_train)
        y_train = torch.Tensor(y_train)
        X_val = torch.Tensor(X_val)
        y_val = torch.Tensor(y_val)
        train_dataset = data_utils.TensorDataset(X_train, y_train)
        test_dataset = data_utils.TensorDataset(X_val, y_val)
        train_loader = data_utils.DataLoader(train_dataset, batch_size =64, pin_memory=True,shuffle=True,num_workers = 1)
        test_loader = data_utils.DataLoader(test_dataset,batch_size =64,pin_memory=True,shuffle = False,num_workers = 1)
        
        indim = X_train.shape[1]
        model = Network(indim)
        model = model.to(device)

        bce_model = BCENetwork(indim)
        bce_model = bce_model.to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr = lr_is)
        bce_opt = torch.optim.Adam(bce_model.parameters(), lr=lr_is)
        
        for i in range(total_epochs):
            acc_train_med = train(model,optimizer, train_loader,i,verbosity)
            acc_train_bce = train_bce(bce_model,bce_opt, train_loader,i,verbosity)
            
    
        X_cov = torch.Tensor(data_X)
        y_cov = torch.Tensor(data_Y)

        cov_dataset = data_utils.TensorDataset(X_cov, y_cov)
        cov_loader = data_utils.DataLoader(cov_dataset, batch_size =64, pin_memory=True,shuffle=True,num_workers = 1)
        preds_Q = []
        prob_Q = []
        preds_bce = []
        true_labels = []
        for i,j in enumerate(cov_loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op_qs = model(inputs)
            op_bce = bce_model(inputs)
            for lag in op_qs[:,4].detach().reshape(-1,1):
                preds_Q.append(lag.item())
                prob_Q.append(quantileCDF(lag.item(),0.5))
            for lag in op_bce.detach().reshape(-1,1):
                preds_bce.append(lag.item())
            for lag in labels.reshape(-1,1):
                true_labels.append(lag.item()) 
            
        bce_total += bce_test(preds_bce, true_labels)/iterations
        median_total += acc_tests(preds_Q, true_labels)/iterations

    median_average.append(median_total)
    bce_average.append(bce_total)
    print("----------")


Noise Ratio 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
----------
Noise Ratio 0.1
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
----------
Noise Ratio 0.2
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
----------
Noise Ratio 0.3
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
----------
Noise Ratio 0.4
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
----------


In [14]:
nr_header = "Noise Ratio | "
bce_res   = "BCE         | "
bqr_res   = "BQR         | "
endstring = " | "
for i in range(len(nr)):
    bce_res += "{:.3f} ".format(bce_average[i]) + endstring
    bqr_res += "{:.3f} ".format(median_average[i]) + endstring
    nr_header += "{:.3f} ".format(nr[i]) + endstring

print(nr_header)
print(bce_res)
print(bqr_res)

Noise Ratio | 0.000  | 0.100  | 0.200  | 0.300  | 0.400  | 
BCE         | 0.098  | 0.097  | 0.095  | 0.093  | 0.087  | 
BQR         | 0.098  | 0.097  | 0.095  | 0.094  | 0.095  | 
