In [1]:
import sys
sys.path.append("../..")
import numpy as np
from iirc.datasets_loader import get_lifelong_datasets
from iirc.definitions import PYTORCH, IIRC_SETUP
from iirc.utils.download_cifar import download_extract_cifar100
from __future__ import print_function
from __future__ import division

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

from torchvision.datasets import CIFAR100
import torchvision.transforms as tt
import torch.nn.functional as F
from torchmetrics.classification import MultilabelJaccardIndex

from tqdm import tqdm

import time
import os
import copy
print("PyTorch Version: ", torch.__version__)
print("Torchvision Version: ", torchvision.__version__)

from IIRC_CIFAR_HIERARCHY import classHierarchy
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")

PyTorch Version:  1.13.0
Torchvision Version:  0.14.0
device: cuda:0


In [2]:
download_extract_cifar100("../../data")

extracting CIFAR 100
dataset extracted




In [3]:
import torchvision.transforms as transforms

essential_transforms_fn = transforms.ToTensor()
augmentation_transforms_fn = transforms.Compose([
    transforms.RandomCrop(32,padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

In [4]:
dataset_splits, tasks, class_names_to_idx = \
    get_lifelong_datasets(dataset_name = "iirc_cifar100",
                          dataset_root = "../../data", # the imagenet folder (where the train and val folders reside, or the parent directory of cifar-100-python folder
                          setup = IIRC_SETUP,
                          framework = PYTORCH,
                          tasks_configuration_id = 0,
                          essential_transforms_fn = essential_transforms_fn,
                          augmentation_transforms_fn = augmentation_transforms_fn,
                          joint = True
                         )

Creating iirc_cifar100
Setup used: IIRC
Using PyTorch
Dataset created


In [5]:
# print(len(tasks))
n_classes_per_task = []
for task in tasks:
    n_classes_per_task.append(len(task))
n_classes_per_task = np.array(n_classes_per_task)

In [6]:
# lifelong_datasets['train'].choose_task(2)
# print(list(zip(*lifelong_datasets['train']))[1])
for i in dataset_splits:
    print(i)


train
intask_valid
posttask_valid
test


In [7]:
# initialize a pretrained model (imageNet)
model_name = "resnet" #choosing alexnet since it is "relatively" easy to train
# model_name = "squeezenet" # changed to squeezeNet since it gets same acc as alex but smaller
num_classes = 9 # in cifar100

batch_size = 4

num_epochs = 14

feature_extract = False #set to false so we can finetune entire model

ngpu = 1

In [8]:
def train_model(model, trainloader, testloader, criterion, optimizer, num_classes, num_epochs=5 ):
    since = time.time() # including this just because
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        
                
        running_loss = 0.0
        running_corrects = 0

        # iterate over data
        for inputs,label1,label2 in tqdm(trainloader):
            inputs = inputs.to(device)
            label1 = torch.from_numpy(np.array([class_names_to_idx[i] for i in label1]))
            label1 = F.one_hot(label1, num_classes=num_classes)
            label1 = label1.to(torch.float32)
            label1 = label1.to(device)
#             label2 = label2.to(device)


            #empty the gradients
            optimizer.zero_grad()

            outputs = model(inputs)
#             print(label1.dtype, outputs.dtype)
#             print(outputs,label1)
            loss = criterion(outputs, label1)
            loss.backward()
            optimizer.step()





            # statistics
            running_loss += loss.item() * inputs.size(0)
#             running_corrects += torch.sum(preds == labels.data)
                
        epoch_loss = running_loss / len(trainloader.dataset)
        print("len dataset = ",len(trainloader.dataset))
#             epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
        print('{} Loss: {:.4f}'.format('train', epoch_loss))

        print()
        test_model(model, testloader, num_classes, mode=0)
        model = model.to(device)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    # load best model weights
    return model

In [9]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [21]:
class ResNet(nn.Module):
    def __init__(self, num_classes=10, num_layers=18):
        super(ResNet, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        if num_layers not in [18, 34, 50, 101, 152]:
            raise ValueError("For ResNet, choose a number of layers out of 18, 34, 50, 101, and 152")
        elif num_layers == 18:
            self.model = models.resnet18()
        elif num_layers == 34:
            self.model = models.resnet34()
        elif num_layers == 50:
            self.model = models.resnet50()
        elif num_layers == 101:
            self.model = models.resnet101()
        elif num_layers == 152:
            self.model = models.resnet152()

        latent_dim = self.model.fc.in_features
        self.model.fc = nn.Identity()  # replace the builtin fully connected layer with an identity layer
        self.model.output_layer = nn.Linear(latent_dim, self.num_classes)

    def forward(self, input_):
        x = self.model(input_)
        output = self.model.output_layer(x)
        return output, x

In [22]:
def initialize_model(num_classes):
    model_ft = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    
    return model_ft

In [23]:
def test_model(model,testloader,num_classes,mode=0):
    correct = 0
    total = len(testloader.dataset)
    running_corrects = 0 
    JS = MultilabelJaccardIndex(num_labels = int(num_classes), average='weighted')
    with torch.no_grad():
        print(f"Begining Testing on {num_classes} classes")
        for i,data in enumerate(tqdm(testloader)):
            images, label1,label2 = data
            images = images.to(device)
            # since subclass labels are introduced after their corresponding superclass labels,
            # in case we encounter a subclass label, we can assume it's superclass label has already been introduced
            # or that it's superclass does not exist
            if not any([class_names_to_idx[j] for j in label1] < seen_classes):
                continue
            og_label = (0,0)    
            if label1 in classHierarchy or label1 in classHierarchy.values(): #if subclass has superclass or is superclass
                if label1 in classHierarchy.values(): # if label is superclass label
                    label1 = torch.from_numpy(np.array([class_names_to_idx[i] for i in label1]))
                    
                    og_label = label1
                    label1 = F.one_hot(label1, num_classes=num_classes)
                    
                    label1 = label1.to(torch.int32)

                    label = label1
                else: # if label is subclass and has superclass
                    label2 = label1
                    label1 = classHierarchy[label1]
                    
                    label1 = torch.from_numpy(np.array([class_names_to_idx[i] for i in label1]))
                    
                    label1 = F.one_hot(label1, num_classes=num_classes)
                    label1 = label1.to(torch.int32)

                    label2 = torch.from_numpy(np.array([class_names_to_idx[i] for i in label2]))
                    og_label = label2
                    label2 = F.one_hot(label2, num_classes=num_classes)
                    label2 = label2.to(torch.int32)

                    label = label1 + label2
                    
            else: # subclass has no superclass
                label1 = torch.from_numpy(np.array([class_names_to_idx[i] for i in label1]))
                og_label = label1
                label1 = F.one_hot(label1, num_classes=num_classes)
                label1 = label1.to(torch.int32)

                label = label1

#             label = label.to(torch.int32)
            outputs = model(images).detach().cpu() # sigmoidless activation
            _, preds = torch.max(outputs, 1)
#             print(predicted)

            correct += JS(outputs,label)
            running_corrects += torch.sum(preds == og_label.data)
#             correct += (predicted == label).sum().item()
#             correct /= batch_size

#             print(preds,label)
        if mode == 0:
            print(f"In-task validation JS: {correct / total} ")
            print(f"In-task validation accuracy: {running_corrects / total} ")
            
        elif mode == 1:
            print(f"Post-task validation JS: {correct / total} ")
            print(f"Post-task validation accuracy: {running_corrects / total} ")
            
        elif mode == 2:
            print(f"Final Test JS: {correct /total} ")
            print(f"Final Test accuracy: {running_corrects / total} ")
            
            


In [24]:
# Setup 
# BCE loss for multi-label classification
# sigmoid activation after FC layer 
# everything above 0.5 is a predicted label

criterion = nn.BCEWithLogitsLoss() # as output is sigmoidless

# get dataset corresponding to each split
train_data = dataset_splits["train"]
intask_val_data = dataset_splits["intask_valid"]
posttask_val_data = dataset_splits["posttask_valid"]
test_data = dataset_splits["test"]

# pre-trained Model on imageNet 
# resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
seen_classes = 0
resnet = ResNet(num_classes=10, num_layers=18 )
# resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)


# initialize data to train on first task
for task in range(len(tasks)):
    train_data.choose_task(task)
    intask_val_data.choose_task(task)
    posttask_val_data.choose_task(task)
    
    trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
    InTask_valloader = torch.utils.data.DataLoader(intask_val_data, batch_size=batch_size, shuffle=True, num_workers=2)
    PostTask_valloader = torch.utils.data.DataLoader(posttask_val_data, batch_size=batch_size, shuffle=True, num_workers=2)
    
    seen_classes += n_classes_per_task[task]
    
        
    new_fc = nn.Linear(resnet.num_ftrs, seen_classes)
    
    for cl in range(seen_classes-n_classes_per_task[task]):
        new_fc.weight[cl].data = resnet.fc.weight[cl].data
            
    resnet.fc = new_fc
    resnet = resnet.to(device)
    if (device.type == 'cuda') and (ngpu > 1):
        print(f"Training on multiple gps ({ngpu})")    
        resnet = nn.DataParallel(resnet,list(range(ngpu)))
    params_to_update = resnet.parameters()

    optimizer_ft = optim.SGD(params_to_update, lr=1.0, momentum=0.9)
    
    print(f"Begining Training on Task {task+1}")
    resnet = train_model(resnet, trainloader, InTask_valloader, criterion, optimizer_ft , seen_classes,num_epochs)
    test_model(resnet, InTask_valloader,seen_classes, mode=1)

# resnet = train_model(resnet, dataloader_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

AttributeError: 'ResNet' object has no attribute 'num_ftrs'

In [20]:
PATH = "models/resnet50_JOINT.pth"
torch.save(resnet.state_dict(), PATH)