In [None]:
# Importing Neccessary Libraries

from __future__ import print_function, division
import os, random, time, copy, scipy, pickle, sys, math
from skimage import io, transform
import numpy as np
import os.path as path
import scipy.io as sio
from scipy import misc
from scipy import ndimage, signal
import matplotlib.pyplot as plt
import PIL.Image
from io import BytesIO
from skimage import data, img_as_float

import pandas as pd
import seaborn as sn
import sklearn.metrics 
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler 
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms

In [None]:
torch.manual_seed(0)
np.random.seed(0)
# set device, which gpu to use.
device ='cpu'
if torch.cuda.is_available(): 
    device='cuda'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
os.chdir('/content/drive/MyDrive/Data_2/CIFAR100_L_T/datasets')

In [None]:
curr_working_dir = os.getcwd()
project_name = 'demo_1'
imb_type = 'exp' # samling long-tailed training set with an exponetially-decaying function
imb_factor = 0.01 # imbalance factor = 100 = 1/0.01
    
nClasses = 100  # number of classes in CIFAR100-LT with imbalance factor 100
encoder_num_layers = 13 # network architecture is VGG16
batch_size = 64 # batch size 
isPretrained = True

torch.cuda.device_count()
torch.cuda.empty_cache()
save_dir = path.join(curr_working_dir, 'exp', project_name)
if not os.path.exists(save_dir): os.makedirs(save_dir)

    
log_filename = os.path.join(save_dir, 'train.log')

In [None]:
path_to_DB = '/content/drive/MyDrive/Data_2/CIFAR100_L_T/datasets'

In [None]:
def get_img_num_per_cls(cls_num, total_num, imb_type, imb_factor):
    # This function is excerpted from a publicly available code [commit 6feb304, MIT License]:
    # https://github.com/kaidic/LDAM-DRW/blob/master/imbalance_cifar.py
    img_max = total_num / cls_num
    img_num_per_cls = []
    if imb_type == 'exp':
        for cls_idx in range(cls_num):
            num = img_max * (imb_factor**(cls_idx / (cls_num - 1.0)))
            img_num_per_cls.append(int(num))
    elif imb_type == 'step':
        for cls_idx in range(cls_num // 2):
            img_num_per_cls.append(int(img_max))
        for cls_idx in range(cls_num // 2):
            img_num_per_cls.append(int(img_max * imb_factor))
    else:
        img_num_per_cls.extend([int(img_max)] * cls_num)
    return img_num_per_cls


def gen_imbalanced_data(img_num_per_cls, imgList, labelList):
    # This function is excerpted from a publicly available code [commit 6feb304, MIT License]:
    # https://github.com/kaidic/LDAM-DRW/blob/master/imbalance_cifar.py
    new_data = []
    new_targets = []
    targets_np = np.array(labelList, dtype=np.int64)
    classes = np.unique(targets_np)
    # np.random.shuffle(classes)  # remove shuffle in the demo fair comparision
    num_per_cls_dict = dict()
    for the_class, the_img_num in zip(classes, img_num_per_cls):
        num_per_cls_dict[the_class] = the_img_num
        idx = np.where(targets_np == the_class)[0]
        #np.random.shuffle(idx) # remove shuffle in the demo fair comparision
        selec_idx = idx[:the_img_num]
        new_data.append(imgList[selec_idx, ...])
        new_targets.extend([the_class, ] * the_img_num)
    new_data = np.vstack(new_data)
    return (new_data, new_targets)



class CIFAR100LT(Dataset):
    def __init__(self, set_name, imageList=[], labelList=[], labelNames=[], isAugment=True):
        self.isAugment = isAugment
        self.set_name = set_name
        self.labelNames = labelNames
        if self.set_name=='train':            
            self.transform = transforms.Compose([
                # transforms.RandomRotation(45),
                transforms.RandomResizedCrop(224),                                                 
                # transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        else:
            self.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),                                 
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        
        self.imageList = imageList
        self.labelList = labelList
        self.current_set_len = len(self.labelList)
        
    def __len__(self):        
        return self.current_set_len
    
    def __getitem__(self, idx):   
        curImage = self.imageList[idx]
        curLabel =  np.asarray(self.labelList[idx])
        curImage = PIL.Image.fromarray(curImage.transpose(1,2,0))
        curImage = self.transform(curImage)     
        curLabel = torch.from_numpy(curLabel.astype(np.float32)).unsqueeze(0).unsqueeze(0)
        return curImage, curLabel

In [None]:
# Unpickling Data Files

path_to_DB = path.join(path_to_DB, 'cifar-100-python')

datasets = {}
dataloaders = {}

setname = 'meta'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    labelnames = pickle.load(obj, encoding='bytes')
    labelnames = labelnames[b'fine_label_names']
for i in range(len(labelnames)):
    labelnames[i] = labelnames[i].decode("utf-8") 
    
    
setname = 'train'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    DATA = pickle.load(obj, encoding='bytes')
new_imgList = DATA[b'data'].reshape((DATA[b'data'].shape[0],3, 32,32))
new_labelList = DATA[b'fine_labels']
total_num = len(new_labelList)
img_num_per_cls = get_img_num_per_cls(nClasses, total_num, imb_type, imb_factor)
new_imgList, new_labelList = gen_imbalanced_data(img_num_per_cls, imgList, labelList)
datasets[setname] = CIFAR100LT(
    imageList=new_imgList, labelList=new_labelList, labelNames=labelnames,
    set_name=setname, isAugment=setname=='train')
print('#examples in {}-set:'.format(setname), datasets[setname].current_set_len)



setname = 'test'
with open(os.path.join(path_to_DB, setname), 'rb') as obj:
    DATA = pickle.load(obj, encoding='bytes')
imgList = DATA[b'data'].reshape((DATA[b'data'].shape[0],3, 32,32))
labelList = DATA[b'fine_labels']
total_num = len(labelList)
datasets[setname] = CIFAR100LT(
    imageList=imgList, labelList=labelList, labelNames=labelnames,
    set_name=setname, isAugment=setname=='train')


print('#examples in {}-set:'.format(setname), datasets[setname].current_set_len)

In [None]:
dataloaders = {set_name: DataLoader(datasets[set_name],
                                    batch_size=batch_size,
                                    shuffle=set_name=='train', 
                                    ) # num_work can be set to batch_size
               for set_name in ['train', 'test']} # 'train',





print('#train batch:', len(dataloaders['train']), '\t#test batch:', len(dataloaders['test']))

In [None]:
b=[0]*100
for f in range(100):
    b[f]=f
num_of_img=[0]*100
for v in range(len(new_labelList)):
    for t in range(len(b)):
        
        if new_labelList[v]==b[t]:
            num_of_img[t]+=1   

In [None]:
dataset_sizes={'train':50000,
               'test':10000}

In [None]:
from tqdm import tqdm
true_value=[0]*100
pred_value=[0]*100

# defining train model

def train_model(model, criterion, optimizer, scheduler, num_epochs=75):
    since = time.time()

#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_acc = 0.0
    n=0
    iters=[]
    epoch_loss={"train":[],
                "test":[]}
    epoch_acc={"train":[],
               "test":[]}
    for epoch in tqdm(range(num_epochs)):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            


            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                inputs = inputs.to(device)
                labels = labels.to(device)
                modi_labels=torch.zeros(len(labels))
                modi_labels=modi_labels.to(device)
              
                for h in range(len(labels)):

                          modi_labels[h]=labels[h][0,0].int()
                print(' ',modi_labels.shape)            
                index=[0]*len(labels)
                if phase=='test':
                    for g in range(len(labels)):                      
                            m= modi_labels[g]
                            m=m.int()
                            true_value[m]+=1                               
                            index[g]=m
                            index[g]=index[g].int()
                          
                else:
                        pass
   


                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                   
                    outputs = model(inputs) 
                    outputs=outputs.to(device)
                    
                    _, preds = torch.max(outputs, 1)
                    preds=preds.to(device)

                    


                   
                    loss = criterion(outputs, modi_labels.long())
                   
                        
                         

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        if epoch%20==0 and epoch>30:
                          print(len(labels),' ',labels[0],' ',outputs[0],' ',preds[0],' ',len(inputs),' ')
                        optimizer.zero_grad()
                        
                        loss.backward()

                            
                        optimizer.step()
                        scheduler.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                if epoch%30==0:
                 print(' ',preds[0],' ',modi_labels[0])
                running_corrects += torch.sum(preds == modi_labels)


                if phase=='test':
                   
                  for g in range(len(labels)):
                    if preds[g]== modi_labels[g]:
                        pred_value[index[g]]=pred_value[index[g]]+1
                    else:
                        pass

                                     


            epoch_loss[phase].append(running_loss / dataset_sizes[phase])
            epoch_acc[phase].append(running_corrects.double() / dataset_sizes[phase])

            
            iters.append(n)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, running_loss / dataset_sizes[phase], running_corrects.double() / dataset_sizes[phase]))
            n+=1
        if epoch%50==0:     
            torch.save(model.state_dict(), 'C:\\Users\\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\model'+str(epoch)+'.pt')
            torch.save(optimizer.state_dict(), 'C:\\Users\\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\optimizer'+str(epoch)+'.pt')
            torch.save(scheduler.state_dict(), 'C:\\Users\\HP\\CIFAR100_L_T\\datasets\\cifar-100-python\\scheduler'+str(epoch)+'.pt')     
        iters.append(n)




        print()
    plt.plot(iters, epoch_acc['train'], label='train')

    plt.xlabel("Iterations")
    plt.ylabel("Train Accuracy")
    plt.legend(loc='best')
    plt.show()    

    plt.plot(iters, epoch_acc['test'], label='test')

    plt.xlabel("Iterations")
    plt.ylabel("Test Accuracy")
    plt.legend(loc='best')
    plt.show()    

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    
    return model

In [None]:
model=models.vgg16(pretrained=True)
model
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.classifier.append(nn.Sequential(nn.Linear(in_features=1000, out_features=512, bias=True),
                                  nn.BatchNorm1d(512),    
                                  nn.ReLU(inplace=True),
                                  
    #  nn.Dropout(p=0.5, inplace=False),
                                  nn.Linear(in_features=512, out_features=256, bias=True),
                                  nn.BatchNorm1d(256),
                                  nn.ReLU(inplace=True),
                                  nn.Linear(in_features=256, out_features=128, bias=True),
                                  nn.BatchNorm1d(128),
                                  nn.ReLU(inplace=True),
                                  nn.Linear(in_features=128, out_features=100, bias=True)
                                  ))

In [None]:
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.005)

# Decay LR by a factor of 0.1 every 30 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
model=train_model(model,criterion,optimizer,scheduler,num_epochs=150)