In [None]:
import os
import copy
import torch
import model
import datasets
import torchvision
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from tqdm import tqdm

In [None]:
def valid_file(fp, classes, img_per_class):
    '''
    creates valid_list.txt and outputs a new train_list.txt as well
    fp --> file path for original train_list
    img_per_class --> how many images we want per class
    '''
    num_classes = np.arange(classes)

    valid_1 = []
    train_1 = []
    valid_2 = []
    train_2 = []

    with open(fp) as f:
        all_fps = f.readlines()

    all_fps = np.array([x.split() for x in all_fps])


    for clss in num_classes:
        filtered = all_fps[all_fps[:, 1] == str(clss)]

        choice = np.random.choice(len(filtered), 2 * img_per_class, replace=False)
        print(choice)
        for element in np.arange(len(filtered)):
            # append to valid
            if element in choice[:3]:
                valid_1.append(filtered[element])
            else:
                train_1.append(filtered[element])
        for element in np.arange(len(filtered)):
            # append to valid
            if element in choice[3:]:
                valid_2.append(filtered[element])
            else:
                train_2.append(filtered[element])

    np.savetxt("birds_dataset/processed_train_1.txt", train_1, fmt="%s")
    np.savetxt("birds_dataset/processed_val_1.txt", valid_1, fmt="%s")
    np.savetxt("birds_dataset/processed_train_2.txt", train_2, fmt="%s")
    np.savetxt("birds_dataset/processed_val_2.txt", valid_2, fmt="%s")

if __name__ == "__main__":
    print('Create Validation dataset')
    # get 2 distinct train/val splits by running this twice with random commented out
    # check there are no similarities with: grep -Fxf processed_val_1.txt processed_val.txt
    valid_file('./birds_dataset/train_list.txt', 20, 3)

In [None]:
class Trainer:
    def __init__(self):
        # datasets 
        self.train_dataset_1 = None
        self.valid_dataset_1 = None
        self.train_dataset_2 = None
        self.valid_dataset_2 = None
        # dataloaders for 2 holdouts
        self.train_dataloader_1 = None
        self.val_dataloader_1 = None
        self.train_dataloader_2 = None
        self.val_dataloader_2 = None
        # test sets
        self.test_dataset = None
        self.test_dataloader = None

        self.classes = 20

        self.nn_model = None


    def loaddata(self):
        # load your dataset and dataloader
        # feel free to change header of bird_dataset class
        root = 'birds_dataset/'
        # self.train_dataset = datasets.bird_dataset(root,'processed_train.txt')
        self.train_dataset_1 = datasets.bird_dataset(root, 'processed_train_1.txt')
        self.valid_dataset_1 = datasets.bird_dataset(root, 'processed_val_1.txt')

        self.train_dataset_2 = datasets.bird_dataset(root, 'processed_train_2.txt')
        self.valid_dataset_2 = datasets.bird_dataset(root, 'processed_val_2.txt')

        self.test_dataset = datasets.bird_dataset(root, 'test_list.txt')


        # Fill in optional arguments to the dataloader as you need it
        self.train_dataloader_1 = DataLoader(dataset=self.train_dataset_1, batch_size=32,
                                    shuffle=True, num_workers=2, pin_memory=True)
        self.val_dataloader_1 = DataLoader(dataset=self.valid_dataset_1, batch_size=32,
                                    shuffle=True, num_workers=2, pin_memory=True)
        self.train_dataloader_2 = DataLoader(dataset=self.train_dataset_2, batch_size=32,
                                             shuffle=True, num_workers=2, pin_memory=True)
        self.val_dataloader_2 = DataLoader(dataset=self.valid_dataset_2, batch_size=32,
                                           shuffle=True, num_workers=2, pin_memory=True)
        self.test_dataloader = DataLoader(self.test_dataset, batch_size=32,
                                    shuffle=True, num_workers=2, pin_memory=True)

    def init_vgg(self, freeze=False):
        vgg = torchvision.models.vgg16_bn(pretrained=True)
        # freeze layers
        if freeze:
            for param in vgg.parameters():
                param.requires_grad = False
        # Modify last layer
        vgg.classifier = nn.Sequential(
            nn.Linear(in_features=25088, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=4096, out_features=4096, bias=True),
            nn.ReLU(inplace=True), 
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=4096, out_features=self.classes, bias=True))
#         print(vgg)

        self.nn_model = vgg.cuda()

    def init_resnet(self, freeze=False):
        resnet = torchvision.models.resnet18(pretrained=True)
        # freeze layers
        if freeze:
            for param in resnet.parameters():
                param.requires_grad = False
        
        # Modify last layer
        num_ftrs = resnet.fc.in_features
        resnet.fc = nn.Linear(num_ftrs, self.classes)
        resnet.fc.requires_grad_ = True

        self.nn_model = resnet.cuda()

    def init_model(self):
        # Create NN model object
        nn_model = model.baseline_Net(classes=20)
        self.nn_model = nn_model.cuda()
        # Initialize weights
        def weights_init(m):
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_uniform_(m.weight)
                torch.nn.init.zeros_(m.bias)

        self.nn_model.apply(weights_init)
        print(self.nn_model)

    def init_custom(self):
        # Create NN model object
        nn_model = model.custom_Net(classes=20)
        self.nn_model = nn_model.cuda()
        # Initialize weights
        def weights_init(m):
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_uniform_(m.weight)
                torch.nn.init.zeros_(m.bias)

        self.nn_model.apply(weights_init)
        print(self.nn_model)


    def train(self, train_dataloader, val_dataloader, epoch):
        # 4a: Create loss functions, optimizers
        # For baseline model use this
        criterion = nn.CrossEntropyLoss().cuda()
        optimizer = torch.optim.Adam(self.nn_model.parameters(), lr=0.0001)
        
        decayRate = 0.96
        #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=20, gamma=decayRate)
        

        train_loss, train_accuracy = [], []
        val_loss, val_accuracy = [], []
        best_val_loss = 100

        # 4a: train a baseline model
        # For each epoch iterate over your dataloaders/datasets, pass it to your NN model, get output,
        #    calculate loss and backpropagate using optimizer
        for epoch in tqdm(range(epoch)):
            train_epoch_loss, train_epoch_accuracy = self.fit(self.nn_model, train_dataloader,
                                                        optimizer, criterion)
            val_epoch_loss, val_epoch_accuracy = self.validate(self.nn_model, val_dataloader, criterion)

            # record acc and losses
            train_loss.append(train_epoch_loss)
            train_accuracy.append(train_epoch_accuracy)
            val_loss.append(val_epoch_loss)
            val_accuracy.append(val_epoch_accuracy)
            tqdm.write('Train Loss: {}, Train Acc: {}'.format(
                train_epoch_loss, train_epoch_accuracy))
            tqdm.write('Val Loss: {}, Val Acc: {}'.format(
                val_epoch_loss, val_epoch_accuracy))
            
            scheduler.step()

        return train_loss, train_accuracy, val_loss, val_accuracy

    def test(self, model_Net):
        # test the network

        # load your dataset and dataloader
        # feel free to change header of bird_dataset class

        if model_Net == "baseline":
            testmodel = model.baseline_Net(classes=20)
            testmodel.load_state_dict(torch.load(PATH + "best.pth"))
            testmodel.to(device)
        else:
            testmodel = model.custom_Net(classes=20)
            testmodel.load_state_dict(torch.load(PATH + "custom.pth"))
            testmodel.to(device)
            
        accuracy = 0.0
        with torch.no_grad():
            for i_batch, data_batch in enumerate(self.test_dataloader):
                # get the inputs; data is a list of [inputs, labels]
                inputs_batch, labels_batch = data_batch
                inputs_batch = inputs_batch.cuda()
                labels_batch = labels_batch.cuda().long()

                # forward + backward + optimize
                outputs = testmodel(inputs_batch)

                _, preds = torch.max(outputs.data, 1)
                accuracy += float((preds == labels_batch).sum()) / float(labels_batch.shape[0])

            
            accuracy = accuracy/len(self.test_dataloader)
        
        print('Test Acc: ', accuracy)


    def fit(self, model, dataloader, optimizer, criterion):
        '''
        function fits the model
        returns the training loss and train_accuracy
        '''
        running_loss = 0.0
        accuracy = 0.0
        model.train()

        for i_batch, data_batch in enumerate(dataloader):
            # zero the parameter gradients
            optimizer.zero_grad()
            # get the inputs; data is a list of [inputs, labels]
            inputs_batch, labels_batch = data_batch
            inputs_batch = inputs_batch.cuda()
            labels_batch = labels_batch.cuda().long()
            # forward + backward + optimize
            outputs = model(inputs_batch)
            loss = criterion(outputs, labels_batch)

            running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)

            accuracy += float((preds == labels_batch).sum()) / float(labels_batch.shape[0])


            # backwards propagation
            loss.backward()
            optimizer.step()

        running_loss = running_loss/len(dataloader)
        acc = accuracy/len(dataloader)
  
        return running_loss, acc

    def validate(self, model, dataloader, criterion):
        '''
        runs model on the validation set
        '''
        model.eval()
        running_loss = 0.0
        accuracy = 0
        with torch.no_grad():
            for i_batch, data_batch in enumerate(dataloader):
                # get the inputs; data is a list of [inputs, labels]
                inputs_batch, labels_batch = data_batch
                inputs_batch = inputs_batch.cuda()
                labels_batch = labels_batch.cuda().long()

                # forward + backward + optimize
                outputs = model(inputs_batch)
                loss = criterion(outputs, labels_batch)

                running_loss += loss.item() 
                _, preds = torch.max(outputs.data, 1)
                accuracy += float((preds == labels_batch).sum()) / float(labels_batch.shape[0])

            
            running_loss = running_loss/len(dataloader)
            accuracy = accuracy/len(dataloader)

        return running_loss, accuracy



    def kfolds(self, init_model, name, freeze=None):
        # fold 1
        if freeze is not None:
            init_model(freeze)
        else:
            init_model()
        train_loss1, train_accuracy1, val_loss1, val_accuracy1 = self.train(self.train_dataloader_1, self.val_dataloader_1)

        # fold 2
        if freeze is not None:
            init_model(freeze)
        else:
            init_model()
        train_loss2, train_accuracy2, val_loss2, val_accuracy2 = self.train(self.train_dataloader_2, self.val_dataloader_2)

        train_loss = [((train_loss1[i] + train_loss2[i]) / 2) for i in range(len(train_loss1))]
        train_accuracy =  [((train_accuracy1[i] + train_accuracy2[i]) / 2) for i in range(len(train_accuracy1))]
        val_loss = [((val_loss1[i] + val_loss2[i]) / 2) for i in range(len(val_loss2))]
        val_accuracy = [((val_accuracy1[i] + val_accuracy2[i]) / 2) for i in range(len(val_accuracy1))]

        PATH = './saved_models/'
        torch.save(trainer.nn_model.state_dict(), PATH + name + ".pth")
        return train_loss, train_accuracy, val_loss, val_accuracy

    def graph(self, train_loss, train_accuracy, val_loss, val_accuracy):
        plt.plot(train_accuracy)
        plt.plot(val_accuracy)
        plt.title('Training vs. Validation Accuarcy')
        plt.legend(['train', 'val'])
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.show()

        plt.plot(train_loss)
        plt.plot(val_loss)
        plt.title('Training vs. Validation Loss')
        plt.legend(['train', 'val'])
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.show()

In [None]:
if __name__ == "__main__":
    # Make sure to use the GPU. The following line is just a check to see if GPU is availables
    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    torch.cuda.set_device(device)
    print(device)

    trainer = Trainer()
    trainer.loaddata()

    # show example
    print('Train', len(trainer.train_dataset_1), ' Valid:', len(
        trainer.valid_dataset_1), ' Test:', len(trainer.test_dataset))
    img, l = trainer.train_dataset_1[0]
    print(img.shape)
    print(l, type(l))
    plt.imshow(np.transpose(img, (1, 2, 0)), interpolation='nearest')
    plt.show()

    # baseline net
    #trainer.init_model()
    #train_loss, train_accuracy, val_loss, val_accuracy = trainer.train(trainer.train_dataloader_1, trainer.val_dataloader_1)
    #trainer.graph(train_loss, train_accuracy, val_loss, val_accuracy)
    #PATH = './saved_models/'
    #torch.save(trainer.nn_model.state_dict(), PATH + "best.pth")
    
    # test custom
    #trainer.test("baseline")
    
    # custom net
    trainer.init_custom()
    train_loss, train_accuracy, val_loss, val_accuracy = trainer.train(trainer.train_dataloader_1, trainer.val_dataloader_1, 75)
    trainer.graph(train_loss, train_accuracy, val_loss, val_accuracy)
    PATH = './saved_models/'
    torch.save(trainer.nn_model.state_dict(), PATH + "custom.pth")
    
    # test custom
    trainer.test("custom")

In [None]:
# lr = 0.0001
# epoch 50: 0.4065632832080201
# epoch 75: 0.4197994987468672

# lr = 0.001
# epoch 75: 0.39442355889724307
-----
## exp LR
# w/ scheduler
# epoch 75: 0.39262218045112784

# w/ scheduler, leaky relu 0.001
# epoch 75: 0.41220238095238093

# w/ scheduler, leaky relu 0.0001
# epoch 75: 0.39747807017543857

# w/ scheduler, leaky relu 0.01
# epoch 75: doesnt work, acc only 0.1 no save
------
## stepLR
# w/ scheduler, leaky relu 0.0001
# epoch 75: is bad too 0.18 no save

# 0.001, 10, 0.01
# epoch 75: 0.19415726817042608 no save

# 0.001, 20, 0.1
# epoch 75: 0.32346491228070173

# 0.001, 20, 0.3
# epoch 75: 0.3620770676691729

# 0.001, 20, 0.6
# epoch 75: 0.38024749373433586

# 0.001, 30, 0.6
# epoch 75: 0.34234022556390975

# 0.001, 20, 0.8
# epoch 75: 0.39262218045112784

# 0.001, 25, 0.96
# epoch 75: 0.3925438596491228

# 0.0001, 20, 0.96
# epoch 75: 0.4230889724310777

## Weight Maps

In [None]:


# w = custom.b1[0].weight.data.cpu()
# plt.figure(figsize=(20, 17))
# for i, filter in enumerate(w):
#     plt.subplot(8, 8, i+1) # (8, 8) because in conv0 we have 7x7 filters and total of 64 (see printed shapes)
#     plt.imshow(filter[0, :, :].detach())
#     plt.axis('off')
#     plt.savefig('images/custom_firstlayer.png')
# # plt.show()
# plt.clf()

# model_children = list(resnet.children())
# w = model_children[0].weight
# plt.figure(figsize=(20, 17))
# for i, filter1 in enumerate(w):
#     plt.subplot(8, 8, i+1) # (8, 8) because in conv0 we have 7x7 filters and total of 64 (see printed shapes)
#     plt.imshow(filter1[0, :, :].detach().cpu())
#     plt.axis('off')
#     plt.savefig('images/resnet_firstlayer.png')
# # plt.show()
# plt.clf()


model_children = list(vgg.children())
w = model_children[0][0].weight
plt.figure(figsize=(20, 17))
for i, filter in enumerate(w):
    plt.subplot(8, 8, i+1) # (8, 8) because in conv0 we have 7x7 filters and total of 64 (see printed shapes)
    plt.imshow(filter[0, :, :].detach().cpu())
    plt.axis('off')
    plt.savefig('images/vgg_firstlayer.png')
# plt.show()
plt.clf()



In [None]:
# trainer = Trainer()
# trainer.loaddata()
# trainer.init_vgg()
# vgg = trainer.nn_model


# PATH = './saved_models/'
# custom = model.custom_Net(classes=20).cuda()
# custom.load_state_dict(torch.load(PATH + "custom.pth"))

trainer = Trainer()
trainer.loaddata()
trainer.init_resnet()
resnet = trainer.nn_model


In [None]:



# first layer
img, l = trainer.test_dataset[0]
plt.imshow(np.transpose(img, (1, 2, 0)), interpolation='nearest')
plt.show()

# Visualize feature maps
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [None]:
model

# Feature Map

In [None]:
# ******************************************************************
# PLOTS WERE CREATED SEPARATELY TO PREVENT THE KERNEL FROM DYING!!!!!!!!!!!



# model = custom
# # custom model: initial layer
# model.b1.register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(8, 8, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/custom_initial.png')
# plt.show()
# plt.clf


# # custom model: middle layer
# model.b6.register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(16, 16, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/custom_middle.png')
# plt.show()
# plt.clf


# custom model: final layer
# model.b6.register_forward_hook(get_activation('conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(16, 16, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/custom_end.png')
# plt.show()
# plt.clf

################################################################################
# model = vgg

# model_children = list(model.children())

# model_children[0][0].register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(8, 8, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/vgg_initial.png')
# plt.show()
# plt.clf

# model_children[0][20].register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(16, 16, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/vgg_middle.png')
# plt.show()
# plt.clf


# model_children[0][40].register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(16, 16, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/vgg_end.png')
# plt.show()
# plt.clf


################################################################################

model = resnet
model_children = list(model.children())


# model_children[0].register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(8, 8, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/resnet_initial.png')
# plt.show()
# plt.clf


# model_children[5][0].register_forward_hook(get_activation('Conv2d'))
# data, _ = trainer.test_dataset[0]
# data.unsqueeze_(0)
# output = model(data.cuda())
# act = activation['Conv2d'].squeeze()
# plt.figure(figsize=(20, 17))
# for i, feature in enumerate(act):
#     plt.subplot(8, 8, i + 1)
#     plt.imshow(feature.cpu())
#     plt.axis("off")
#     plt.savefig('images/resnet_middle.png')
# plt.show()
# plt.clf



model_children[7][0].register_forward_hook(get_activation('Conv2d'))
data, _ = trainer.test_dataset[0]
data.unsqueeze_(0)
output = model(data.cuda())
act = activation['Conv2d'].squeeze()
plt.figure(figsize=(20, 17))
for i, feature in enumerate(act):
    plt.subplot(23, 23, i + 1)
    plt.imshow(feature.cpu())
    plt.axis("off")
    plt.savefig('images/resnet_end.png')
plt.show()
plt.clf

In [None]:
feature