In [None]:
#Importing Neccessary libraries and modules

from __future__ import print_function, division
import os, random, time, copy, scipy, pickle, sys, math
from skimage import io, transform
import numpy as np
import os.path as path
import scipy.io as sio
from scipy import misc
from scipy import ndimage, signal
import matplotlib.pyplot as plt
import PIL.Image
from io import BytesIO
from skimage import data, img_as_float

import pandas as pd
import seaborn as sn
import sklearn.metrics 
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler 
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms

In [None]:
import torch.nn.functional as nnf
torch.manual_seed(0)
np.random.seed(0)
# set device, which gpu to use.
device ='cpu'
if torch.cuda.is_available(): 
    device='cuda'

In [None]:
curr_working_dir = os.getcwd()
project_name = 'demo_1'
imb_type = 'exp' # samling long-tailed training set with an exponetially-decaying function
imb_factor = 0.01 # imbalance factor = 100 = 1/0.01
    
nClasses = 100  # number of classes in CIFAR100-LT with imbalance factor 100
encoder_num_layers = 13 # network architecture is VGG16
batch_size = 16 # batch size 
isPretrained = True

torch.cuda.device_count()
torch.cuda.empty_cache()
save_dir = path.join(curr_working_dir, 'exp', project_name)
if not os.path.exists(save_dir): os.makedirs(save_dir)

    
log_filename = os.path.join(save_dir, 'train.log')


In [None]:
path_to_DB = './datasets'
if not os.path.exists(path_to_DB): os.makedirs(path_to_DB)
_ = torchvision.datasets.CIFAR10(root=path_to_DB, train=True, download=True)

In [None]:
path_to_DB='C:\\Users\\HP\\CIFAR100_L_T\\datasets'
path_to_DB

In [None]:
def get_img_num_per_cls(cls_num, total_num, imb_type, imb_factor):
    # This function is excerpted from a publicly available code [commit 6feb304, MIT License]:
    # https://github.com/kaidic/LDAM-DRW/blob/master/imbalance_cifar.py
    img_max = total_num / cls_num
    img_num_per_cls = []
    if imb_type == 'exp':
        for cls_idx in range(cls_num):
            num = img_max * (imb_factor**(cls_idx / (cls_num - 1.0)))
            img_num_per_cls.append(int(num))
    elif imb_type == 'step':
        for cls_idx in range(cls_num // 2):
            img_num_per_cls.append(int(img_max))
        for cls_idx in range(cls_num // 2):
            img_num_per_cls.append(int(img_max * imb_factor))
    else:
        img_num_per_cls.extend([int(img_max)] * cls_num)
    return img_num_per_cls


def gen_imbalanced_data(img_num_per_cls, imgList, labelList):
    # This function is excerpted from a publicly available code [commit 6feb304, MIT License]:
    # https://github.com/kaidic/LDAM-DRW/blob/master/imbalance_cifar.py
    new_data = []
    new_targets = []
    targets_np = np.array(labelList, dtype=np.int64)
    classes = np.unique(targets_np)
    # np.random.shuffle(classes)  # remove shuffle in the demo fair comparision
    num_per_cls_dict = dict()
    for the_class, the_img_num in zip(classes, img_num_per_cls):
        num_per_cls_dict[the_class] = the_img_num
        idx = np.where(targets_np == the_class)[0]
        #np.random.shuffle(idx) # remove shuffle in the demo fair comparision
        selec_idx = idx[:the_img_num]
        new_data.append(imgList[selec_idx, ...])
        new_targets.extend([the_class, ] * the_img_num)
    new_data = np.vstack(new_data)
    return (new_data, new_targets)



class CIFAR100LT(Dataset):
    def __init__(self, set_name='train', imageList=[], labelList=[], labelNames=[], isAugment=True):
        self.isAugment = isAugment
        self.set_name = set_name
        self.labelNames = labelNames
        if self.set_name=='train':            
            self.transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        else:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        
        self.imageList = imageList
        self.labelList = labelList
        self.current_set_len = len(self.labelList)
        
    def __len__(self):        
        return self.current_set_len
    
    def __getitem__(self, idx):   
        curImage = self.imageList[idx]
        curLabel =  np.asarray(self.labelList[idx])
        curImage = PIL.Image.fromarray(curImage.transpose(1,2,0))
        curImage = self.transform(curImage)     
        curLabel = torch.from_numpy(curLabel.astype(np.float32)).unsqueeze(0).unsqueeze(0)
        return curImage, curLabel

In [None]:
path_to_DB = path.join(path_to_DB, 'cifar-100-python')
## Unpickling data files
datasets = {}
dataloaders = {}

setname = 'meta'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    labelnames = pickle.load(obj, encoding='bytes')
    labelnames = labelnames[b'fine_label_names']
for i in range(len(labelnames)):
    labelnames[i] = labelnames[i].decode("utf-8") 
    
    
setname = 'train'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    DATA = pickle.load(obj, encoding='bytes')
imgList = DATA[b'data'].reshape((DATA[b'data'].shape[0],3, 32,32))
labelList = DATA[b'fine_labels']
total_num = len(labelList)
img_num_per_cls = get_img_num_per_cls(nClasses, total_num, imb_type, imb_factor)
new_imgList, new_labelList = gen_imbalanced_data(img_num_per_cls, imgList, labelList)
datasets[setname] = CIFAR100LT(
    imageList=new_imgList, labelList=new_labelList, labelNames=labelnames,
    set_name=setname, isAugment=setname=='train')
print('#examples in {}-set:'.format(setname), datasets[setname].current_set_len)



setname = 'test'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    DATA = pickle.load(obj, encoding='bytes')
imgList = DATA[b'data'].reshape((DATA[b'data'].shape[0],3, 32,32))
labelList = DATA[b'fine_labels']
total_num = len(labelList)
datasets[setname] = CIFAR100LT(
    imageList=imgList, labelList=labelList, labelNames=labelnames,
    set_name=setname, isAugment=setname=='train')


print('#examples in {}-set:'.format(setname), datasets[setname].current_set_len)

In [None]:
#preview img_num_per_cls distribution
plt.plot(img_num_per_cls)
plt.xlabel('class ID sorted by cardinality')
plt.ylabel('#training examples')

In [None]:
dataloaders = {set_name: DataLoader(datasets[set_name],
                                    batch_size=batch_size,
                                    shuffle=set_name=='train', 
                                    ) # num_work can be set to batch_size
               for set_name in ['train', 'test']} # 'train',





print('#train batch:', len(dataloaders['train']), '\t#test batch:', len(dataloaders['test']))

In [None]:
b=[0]*100
for f in range(100):
    b[f]=f

In [None]:
# Finding number of images
num_of_img=[0]*100
for v in range(len(new_labelList)):
    for t in range(len(b)):
        
        if new_labelList[v]==b[t]:
            num_of_img[t]+=1

In [None]:
# Finding tail classes
tail_classes=[]
for t in range(len(b)):
    if num_of_img[t]<10:
        tail_classes.append(t)

In [None]:
#preview training data representation
plt.plot(num_of_img)
plt.xlabel('class ID sorted by cardinality')
plt.ylabel(' No. of training examples')

In [None]:
dataset_sizes={'train':10847,
               'test':10000}
traindata_labels=new_labelList
testdata_labels=labelList

In [None]:
code_m=torch.randint(0,2,(100,10)).unique
# Creating a gold matrix(actually a code matrix) using output of code_m
gold_matrix=[[1, 1, 0, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
        [1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
        [1, 0, 0, 0, 1, 1, 0, 0, 1, 0],
        [0, 1, 0, 1, 1, 0, 0, 1, 1, 1],
        [0, 0, 1, 0, 0, 1, 1, 0, 0, 0],
        [0, 1, 1, 1, 1, 0, 0, 1, 0, 0],
        [1, 0, 0, 0, 1, 0, 1, 1, 0, 1],
        [1, 0, 0, 0, 0, 0, 1, 1, 1, 0],
        [1, 1, 0, 1, 0, 0, 0, 0, 1, 1],
        [1, 1, 1, 0, 1, 0, 1, 1, 1, 1],
        [1, 0, 0, 1, 1, 0, 0, 1, 1, 1],
        [1, 0, 0, 0, 0, 0, 0, 1, 1, 0],
        [0, 0, 0, 1, 1, 0, 0, 1, 0, 1],
        [1, 0, 0, 1, 1, 0, 1, 0, 0, 1],
        [1, 0, 0, 1, 0, 0, 0, 1, 1, 1],
        [0, 1, 1, 0, 0, 1, 1, 1, 1, 0],
        [1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
        [0, 1, 0, 1, 0, 1, 1, 0, 1, 1],
        [1, 1, 1, 1, 0, 0, 1, 1, 0, 0],
        [0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
        [0, 0, 1, 1, 1, 0, 1, 1, 0, 1],
        [0, 1, 1, 0, 0, 1, 1, 0, 1, 0],
        [0, 0, 1, 1, 1, 1, 1, 0, 1, 0],
        [0, 1, 0, 1, 0, 0, 0, 1, 1, 1],
        [1, 0, 1, 1, 0, 0, 1, 0, 1, 0],
        [1, 0, 1, 0, 1, 1, 1, 1, 1, 1],
        [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
        [0, 0, 0, 1, 0, 1, 1, 1, 1, 0],
        [1, 0, 1, 0, 1, 1, 0, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 1, 0, 1, 0],
        [0, 0, 1, 1, 0, 1, 1, 0, 1, 0],
        [0, 1, 0, 1, 1, 1, 1, 0, 1, 1],
        [1, 0, 0, 1, 1, 1, 1, 1, 0, 1],
        [0, 0, 1, 0, 1, 0, 0, 1, 0, 1],
        [1, 1, 1, 1, 1, 0, 0, 1, 0, 0],
        [1, 0, 1, 1, 0, 1, 1, 1, 1, 0],
        [1, 0, 1, 0, 0, 1, 0, 1, 1, 0],
        [0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
        [1, 0, 0, 1, 1, 1, 1, 1, 1, 1],
        [0, 1, 0, 0, 1, 0, 0, 1, 0, 1],
        [0, 0, 1, 0, 0, 1, 1, 0, 0, 1],
        [0, 1, 1, 0, 1, 1, 0, 1, 0, 0],
        [0, 1, 0, 0, 0, 0, 1, 1, 0, 1],
        [1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
        [1, 0, 0, 0, 0, 1, 1, 1, 1, 1],
        [0, 1, 0, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 1],
        [0, 1, 0, 0, 1, 0, 1, 1, 1, 1],
        [1, 1, 0, 1, 0, 0, 1, 1, 1, 0],
        [1, 1, 0, 1, 0, 1, 0, 1, 0, 1],
        [1, 0, 1, 1, 0, 0, 1, 0, 1, 1],
        [0, 0, 1, 0, 1, 0, 1, 1, 1, 1],
        [1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
        [0, 1, 1, 0, 1, 1, 1, 1, 1, 0],
        [1, 0, 1, 0, 0, 1, 0, 1, 0, 0],
        [1, 0, 1, 0, 1, 0, 0, 1, 1, 0],
        [1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
        [1, 0, 1, 1, 1, 1, 1, 0, 0, 0],
        [0, 1, 1, 1, 0, 0, 1, 0, 0, 0],
        [1, 0, 1, 0, 1, 0, 0, 1, 1, 1],
        [0, 1, 0, 1, 0, 0, 0, 1, 0, 1],
        [0, 0, 0, 0, 1, 0, 0, 1, 0, 1],
        [1, 0, 0, 1, 1, 1, 1, 1, 1, 1],
        [1, 0, 1, 0, 0, 1, 1, 0, 0, 1],
        [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0, 0, 1, 0, 0, 0],
        [0, 1, 1, 1, 1, 0, 1, 0, 1, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 0, 1, 1, 0, 1],
        [1, 0, 1, 1, 0, 0, 0, 0, 1, 1],
        [1, 0, 1, 0, 1, 0, 1, 1, 0, 0],
        [0, 1, 1, 0, 0, 0, 0, 1, 0, 0],
        [1, 1, 0, 1, 0, 1, 1, 0, 1, 0],
        [0, 1, 1, 0, 1, 1, 1, 0, 1, 1],
        [0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1, 0, 1, 1, 1, 0],
        [1, 1, 0, 1, 1, 1, 1, 0, 1, 0],
        [1, 1, 1, 1, 1, 0, 0, 0, 1, 0],
        [1, 0, 0, 0, 1, 0, 1, 0, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
        [0, 0, 1, 0, 1, 1, 0, 1, 0, 0],
        [0, 0, 1, 1, 1, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
        [1, 0, 1, 1, 0, 0, 1, 1, 0, 0],
        [0, 0, 1, 1, 1, 1, 1, 0, 0, 1],
        [0, 1, 1, 1, 1, 0, 1, 1, 1, 0],
        [1, 0, 0, 1, 0, 0, 1, 1, 0, 1],
        [0, 1, 1, 1, 0, 0, 0, 1, 0, 1],
        [1, 0, 1, 0, 1, 0, 1, 1, 0, 0],
        [1, 0, 1, 1, 1, 1, 1, 0, 0, 0],
        [1, 0, 0, 0, 1, 0, 0, 0, 1, 0],
        [1, 1, 0, 1, 0, 1, 0, 0, 1, 0],
        [1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
        [0, 0, 1, 1, 1, 1, 0, 0, 1, 0],
        [0, 1, 0, 0, 1, 0, 0, 0, 1, 1],
        [0, 1, 0, 1, 0, 1, 0, 0, 1, 0],
        [0, 1, 1, 0, 0, 0, 1, 1, 0, 0]]

In [None]:
gold_matrix=torch.tensor(gold_matrix)
from tqdm import tqdm
# Defining train model

true_value=[0]*100
pred_value=[0]*100

def train_model(model, criterion, optimizer, scheduler, num_epochs=8):
    since = time.time()



    for epoch in tqdm(range(num_epochs)):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                
                inputs = inputs.to(device)
                labels = labels.to(device)
                ## defining modi_label according to the batch_size(or len of labels)
                modi_labels=torch.zeros(len(labels))
                

                index=[0]*len(labels)    
                for h in range(len(labels)):

                            modi_labels[h]=labels[h][0,0].int()
              
                if phase=='test':  
                      for g in range(len(labels)):  
                            m=modi_labels[g]

                            m=m.int()
                            true_value[m]+=1
                            index[g]=m

                  
                else:
                        pass
   
                outputs = model(inputs)
                m=nn.Softmax(dim=1)
                out=torch.zeros(len(labels),10)


                out=m(outputs)



                print(' ',modi_labels.shape)




                preds=torch.zeros(len(labels))
                preds=preds.int()                    

                for s in range(len(labels)):

                    for a in range(100):
                        # finding dot product of each combination of a out array with each row of gold_matrix and max dot product value chosen as closest codeword

                        max=0
                        dot_product=np.dot(out[s].detach().numpy(),gold_matrix[a].detach().numpy())
                        if dot_product>max:
                           max=dot_product
                           preds[s]=a                    
    
                

                new_label=torch.zeros(len(labels))

                target_code=torch.zeros(len(labels),10)


                print(preds)

                new_label=modi_labels.long()
                for g in range(len(labels)):
                       l=new_label[g] 
                       target_code[g]=gold_matrix[l]
    


                # forward
                # track history if only in train
                with torch.set_grad_enabled( phase=='train'):
                   
#                       outputs = model(inputs)
                      
                  
                     

                                            
                      
                      
                      
                      loss = criterion(out
                                       ,target_code)
                      
                   
                      


                         

                      # backward + optimize only if in training phase
                      if phase == 'train':
                       
                        print(len(labels),' ',labels[0],' ',out[0],' ',preds[0],' ',len(inputs),' ',target_code[0])
                        optimizer.zero_grad()

                        loss.backward()


                        optimizer.step()
                print (' ',len(preds),' ',len(modi_labels))
                # statistics
                running_loss += loss.item() * inputs.size(0)

                running_corrects += torch.sum(preds == modi_labels)


                if phase=='test':
                   
                  for g in range(len(labels)):
                    if preds[g]== modi_labels[g]:
                        pred_value[index[g]]=pred_value[index[g]]+1
                    else:
                        pass
            if phase == 'train':
                 scheduler.step()                    


            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
             
        torch.save(model.state_dict(), 'C:\\Users\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\model'+str(epoch)+'.pt')
        torch.save(optimizer.state_dict(), 'C:\\Users\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\optimizer'+str(epoch)+'.pt')
        torch.save(scheduler.state_dict(), 'C:\\Users\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\scheduler'+str(epoch)+'.pt')     



        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    
    return model

In [None]:
##  Freezing the pre-trained model layer
model=models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

##Fine Tuning Conv Net 
model.classifier=nn.Linear(25088,10)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.01, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
model=train_model(model,criterion,optimizer,scheduler,num_epochs=8)