In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import time

In [2]:
# Generate Dataloaders

np.random.seed(321)
train_dir = 'train'
train_files = os.listdir(train_dir)
all_dogs = []
all_cats = []
for i in train_files:
    if('dog' in i):
        all_dogs.append('train/' + i)
    elif('cat' in i):
        all_cats.append('train/' + i)
ind_dog = []
ind_cat = []
names_dog_test = []
names_cat_test = []

for i in range(4000):
    r = np.random.randint(0,12500)
    if r not in ind_dog: 
        ind_dog.append(r)
    if(len(np.unique(np.array(ind_dog))) == 2500):
        break

for i in range(4000):
    r = np.random.randint(0,12500)
    if r not in ind_cat: 
        ind_cat.append(r)
    if(len(np.unique(np.array(ind_cat))) == 2500):
        break
        
for i in ind_dog:
    names_dog_test.append(all_dogs[i])
for i in ind_cat:
    names_cat_test.append(all_cats[i])
    
names_dog_train  = []
names_cat_train = []
cc = 0
for i in range(12500):
    if(i in ind_dog):
        cc = cc + 1
        continue
    else:
        names_dog_train.append(all_dogs[i])

        cc = 0
for i in range(12500):
    if(i in ind_cat):
        cc = cc + 1
        continue
    else:
        names_cat_train.append(all_cats[i])

all_train_ids = np.concatenate((names_dog_train,names_cat_train))
all_test_ids = np.concatenate((names_dog_test,names_cat_test))
all_train_ids.shape,all_test_ids.shape
all_train_labels = np.concatenate((np.zeros((10000,1)),np.ones((10000,1))))
all_test_labels = np.concatenate((np.zeros((2500,1)),np.ones((2500,1))))


In [3]:
def my_transform(key):
    train_sequence = [
                      transforms.Resize((224,224)),
                      transforms.RandomHorizontalFlip(),
                      transforms.ToTensor()]
    test_sequence = [transforms.Resize((224,224)),
                    transforms.ToTensor()]
    data_transforms = {'train': transforms.Compose(train_sequence),
                       'test': transforms.Compose(test_sequence)}
    return data_transforms[key]

class CatsandDogs(Dataset):
    def __init__(self,path_is,targets,transform):
        self.path_is = path_is
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.path_is)
    
    def __getitem__(self, idx):
        image_path = self.path_is[idx]
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)
        target = np.int(self.targets[idx])
        return image,target

dats_train = CatsandDogs(all_train_ids,all_train_labels,transform=my_transform(key="train"))
dats_test = CatsandDogs(all_test_ids,all_test_labels,transform=my_transform(key="test"))
BATCH_SIZE = 128
train_dataloader = DataLoader(dats_train, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
test_dataloader = DataLoader(dats_test, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Torch Device:", device)

Torch Device: cuda:0


In [4]:
def cumLaplaceDistribution(y_pred,mean,standard_deviation,all_qs):
    term1 = ((1-all_qs) * (y_pred - mean))/standard_deviation
    term1.clamp_(max = 0)
    lesser_term = all_qs * torch.exp(term1)
    term2 = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
    term2.clamp_(max = 0)
    greater_term = 1 - ((1-all_qs) * torch.exp(term2))
    dummy_ones = torch.ones_like(mean)
    y_dummy_pred = torch.div(y_pred,dummy_ones)
    y_dummy_pred[y_pred >= mean] = 1.0
    y_dummy_pred[y_pred < mean] = 0.0
    return ((1 - y_dummy_pred) * lesser_term )+  (y_dummy_pred * greater_term)


def logLikelihoodLoss(y_true,y_pred,mean,standard_deviation,all_qs):
    new_pred = y_pred
    prob_tens = cumLaplaceDistribution(0.0,mean = new_pred,standard_deviation = standard_deviation,all_qs = all_qs)
    prob_tens.clamp_(min = 1e-7,max = 1 - 1e-7)
    if_one = y_true * torch.log(1 - prob_tens)
    if_zero = (1 - y_true) * torch.log(prob_tens)
    result = - 1 * torch.mean(if_one + if_zero)
    return result

def customLoss(y_true, y_pred, mean, standard_deviation, all_qs, penalty):
    ind_losses = []
    for i,j in enumerate(all_qs):
        solo_loss = logLikelihoodLoss(y_true[:,0],y_pred[:,i] ,mean, standard_deviation, j)
        ind_losses.append(solo_loss)
    zero = torch.Tensor([0]).to(device)
    dummy1 = y_pred[:,1:] - y_pred[:,:-1]
    dummy2 = penalty * torch.mean(torch.max(zero,-1.0 * dummy1))
    total_loss  = torch.mean(torch.stack(ind_losses)) +dummy2
    return total_loss

def customTestPred(y_pred,mean,standard_deviation,all_qs,batch_size = 1):
    if(batch_size == 1):
        acc = []
        cdfs = []
        eps = 1e-10
        val = (y_pred - mean)/standard_deviation 
        for xx in range(batch_size):
            if(y_pred < mean.item()):
                lesser_term = all_qs * torch.exp((1 - all_qs) * val.item())
                lesser_term  = 1 - lesser_term
                cdfs.append(lesser_term.item())
                if(lesser_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
            
            elif(y_pred >= mean.item()):
                greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val.item()))
                greater_term = 1 - greater_term
                cdfs.append(greater_term.item())
                if(greater_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
    
    elif(batch_size > 1):
        acc = []
        cdfs = []
        eps = 1e-10
        val = (y_pred - mean)/standard_deviation 
        for xx in range(batch_size):
            if(y_pred < mean[xx]):
                lesser_term = all_qs * torch.exp((1 - all_qs) * val[xx])
                lesser_term  = 1 - lesser_term
                cdfs.append(lesser_term.item())
                if(lesser_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
            elif(y_pred >= mean[xx]):
                greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val[xx]))
                greater_term = 1 - greater_term
                cdfs.append(greater_term.item())
                if(greater_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
    return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def acc_Q(train_preds,train_labels):
    train_preds = np.array(train_preds).reshape(-1,1)
    train_labels = np.array(train_labels).reshape(-1,1)

    cdfs_acc,_ = customTestPred(0,train_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = train_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,train_labels):
        if(i.item() == j[0]):
            count += 1
    return count/train_labels.shape[0]

def acc_tests(test_preds,test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,test_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = test_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,test_labels):
        if(i.item() == j[0]):
            count += 1
    return count/test_labels.shape[0]


In [5]:
def lr_schedule_combined_sgd(model, loader, batch_size):
    Kz = 0.0
    model.eval()
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op = model.conv1(inputs)
            op = model.bn1(op)
            op = model.relu(op)
            op = model.maxpool(op)
            op = model.layer1(op)
            op = model.layer2(op)
            op = model.layer3(op)
            op = model.layer4(op)
            op = model.avgpool(op)
            op = op.reshape(labels.shape[0], 512)
            for i in range(len(model.fc)-2):
                op = model.fc[i](op)
            activ = np.linalg.norm(op.detach().cpu().numpy())
            if activ > Kz:
                Kz = activ
    factor = 1
    K_ = (factor * Kz) / (batch_size)
    lr = 1 / K_
    return lr


In [6]:
batch_size = 128


def train_adaptive_lr(model,loader,optimizer):
    train_preds_Q = []
    train_preds_bce = []
    train_labels = []
    lr_val = lr_schedule_combined_sgd(model, loader, batch_size=128)
    optimizer.param_groups[0]['lr'] = lr_val
    model.train()
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.shape
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())

    acc_is_Q = acc_Q(train_preds_Q,train_labels)
    print("Train Acc Q : %f "%(acc_is_Q))
    return acc_is_Q


def train_fixed(model,loader,optimizer):
    train_preds_Q = []
    train_preds_bce = []
    train_labels = []
    model.train()
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.shape
    
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
    
    acc_is_Q = acc_Q(train_preds_Q,train_labels)
    print("Train Acc Q : %f "%(acc_is_Q))
    return acc_is_Q

In [7]:
def test(model,loader):
    model.eval()
    test_preds_Q = []
    test_preds_bce = []
    test_labels = []
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op_qs = model(inputs)
            for lag in op_qs[:,4].detach().reshape(-1,1):
                test_preds_Q.append(lag.item())
            for lag in labels.reshape(-1,1):
                test_labels.append(lag.item())
    acc_is_Q = acc_tests(test_preds_Q,test_labels)
    print("Test Acc Q : %f  "%(acc_is_Q))
    return acc_is_Q





In [8]:
torch.manual_seed(756)
model_1 = torchvision.models.resnet18(pretrained=False)
model_1.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
num_features = model_1.fc.in_features
model_1.fc = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.2),
    
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256,9)
)
model_1 = model_1.to(device)

lr_is = 1e-2
optimizer = torch.optim.SGD(model_1.parameters(), lr = lr_is)
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
mean_is = 0
std_is = 1
penalty = 1
epsilon = 0.00

In [9]:
max_epochs = 20

target_acc = 0.90
fixed_acc = 0
fixed_epochs = 0
fixed_epoch_val =0
adaptive_acc = 0
adaptive_epochs = 0

adaptive_time = []
fixed_time = []

In [10]:
print("Best Fixed LR Results")
for iter in range(max_epochs):
    fixed_epochs +=1
    print("Fixed Iteration:", fixed_epochs)
    start = time.time()
    fixed_acc_train = train_fixed(model_1,train_dataloader,optimizer)
    end = time.time()
    fixed_time.append(end-start)
    fixed_acc_test = test(model_1,test_dataloader)
    if fixed_acc_test > fixed_acc and fixed_acc_train>fixed_acc:
        fixed_acc = fixed_acc_test
        fixed_epoch_val = fixed_epochs

print("Acc:{:.2f} at epoch {} at {:.2f} min per epoch".format(fixed_acc,fixed_epoch_val,np.mean(fixed_time)/60))


Best Fixed LR Results
Fixed Iteration: 1
Train Acc Q : 0.567650 
Test Acc Q : 0.616600  
Fixed Iteration: 2
Train Acc Q : 0.624400 
Test Acc Q : 0.546400  
Fixed Iteration: 3
Train Acc Q : 0.643650 
Test Acc Q : 0.639400  
Fixed Iteration: 4
Train Acc Q : 0.667400 
Test Acc Q : 0.632400  
Fixed Iteration: 5
Train Acc Q : 0.692250 
Test Acc Q : 0.583200  
Fixed Iteration: 6
Train Acc Q : 0.718900 
Test Acc Q : 0.664200  
Fixed Iteration: 7
Train Acc Q : 0.739550 
Test Acc Q : 0.551200  
Fixed Iteration: 8
Train Acc Q : 0.759200 
Test Acc Q : 0.663400  
Fixed Iteration: 9
Train Acc Q : 0.770250 
Test Acc Q : 0.733000  
Fixed Iteration: 10
Train Acc Q : 0.784250 
Test Acc Q : 0.730600  
Fixed Iteration: 11
Train Acc Q : 0.793550 
Test Acc Q : 0.730000  
Fixed Iteration: 12
Train Acc Q : 0.813550 
Test Acc Q : 0.687200  
Fixed Iteration: 13
Train Acc Q : 0.819800 
Test Acc Q : 0.579200  
Fixed Iteration: 14
Train Acc Q : 0.831250 
Test Acc Q : 0.742800  
Fixed Iteration: 15
Train Acc Q : 0

In [11]:
torch.cuda.empty_cache()
torch.manual_seed(756)
model_1 = torchvision.models.resnet18(pretrained=False)
model_1.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
num_features = model_1.fc.in_features
model_1.fc = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.2),
    
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256,9)
)
model_1 = model_1.to(device)

lr_is = 1e-2
optimizer = torch.optim.SGD(model_1.parameters(), lr = lr_is)
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
mean_is = 0
std_is = 1
penalty = 1
epsilon = 0.00

In [12]:
for iter in range(max_epochs):
    adaptive_epochs +=1
    print("Adaptive Iteration:", adaptive_epochs)
    start = time.time()
    adaptive_acc_train = train_adaptive_lr(model_1,train_dataloader,optimizer)
    end = time.time()
    print("Train Time: {:.3f}".format(end-start))
    adaptive_time.append(end-start)
    adaptive_acc_test = test(model_1,test_dataloader)
    if adaptive_acc_test > fixed_acc and adaptive_acc_train>fixed_acc:
        print()
        print("Acc:{:.2f} at epoch {} at {:.2f} min per epoch".format(adaptive_acc_test,adaptive_epochs,
                                                                      np.mean(adaptive_time)/60))
        break


print()


Adaptive Iteration: 1
Train Acc Q : 0.586850 
Train Time: 241.387
Test Acc Q : 0.509000  
Adaptive Iteration: 2
Train Acc Q : 0.700300 
Train Time: 289.132
Test Acc Q : 0.507600  
Adaptive Iteration: 3
Train Acc Q : 0.754850 
Train Time: 289.372
Test Acc Q : 0.639600  
Adaptive Iteration: 4
Train Acc Q : 0.798850 
Train Time: 288.841
Test Acc Q : 0.665200  
Adaptive Iteration: 5
Train Acc Q : 0.816650 
Train Time: 289.230
Test Acc Q : 0.661600  
Adaptive Iteration: 6
Train Acc Q : 0.815200 
Train Time: 288.760
Test Acc Q : 0.717400  
Adaptive Iteration: 7
Train Acc Q : 0.879100 
Train Time: 289.538
Test Acc Q : 0.769600  

Acc:0.77 at epoch 7 at 4.71 min per epoch

