<a href="https://colab.research.google.com/github/cgold212/assignment3inverseMethod/blob/main/chenCopy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# imports
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
import os
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
from torchvision.utils import make_grid
import shutil
import random


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
trfm = transforms.Compose([transforms.Resize((200,200)),
                           transforms.RandomHorizontalFlip(0.5),
                           transforms.ToTensor()]) 

data  = datasets.StanfordCars(root='Stanford_cars_dataset' , transform= None,  download = True)
test_data  = datasets.StanfordCars(root='Stanford_cars_dataset' , split= 'test', transform= None,  download = True)

In [None]:
# load annotations and labels
mat_train= loadmat('./Stanford_cars_dataset/stanford_cars/devkit/cars_train_annos.mat')
mat_test= loadmat('./Stanford_cars_dataset/stanford_cars/cars_test_annos_withlabels.mat')
meta = loadmat('./Stanford_cars_dataset/stanford_cars/devkit/cars_meta.mat')

# get classes
labels = list()
for l in meta['class_names'][0]:
    labels.append(l[0].replace('/','').replace(' ','_'))

# get train labels
train = list()
for example in mat_train['annotations'][0]:
    label  = labels[example[-2][0][0]-1]
    image  = example[-1][0]
    class_car= example[4][0][0]
    train.append((image, class_car, label))

# get test labels
test = list()
for example in mat_test['annotations'][0]:
    label  = labels[example[-2][0][0]-1]
    image  = example[-1][0]
    class_car= example[4][0][0]
    test.append((image, class_car, label))

# create dirs for new dataset
os.mkdir('./ordered_stanford_dataset')
os.mkdir('./ordered_stanford_dataset/train')
os.mkdir('./ordered_stanford_dataset/valid')
os.mkdir('./ordered_stanford_dataset/test')

# get all filenames for train and test
imgs_train = os.listdir('./Stanford_cars_dataset/stanford_cars/cars_train')
imgs_test = os.listdir('./Stanford_cars_dataset/stanford_cars/cars_test')

# create dir for each label in train and test
for i in labels:
    os.mkdir('./ordered_stanford_dataset/train/'+i)
    os.mkdir('./ordered_stanford_dataset/valid/'+i)
    os.mkdir('./ordered_stanford_dataset/test/'+i)



FileExistsError: ignored

In [None]:
# move files from train to specific folder in the new dataset 
for image in imgs_train:
    for annos in train:
        img_name = annos[0]
        img_label = annos[2]
        if image == img_name:
            src = './Stanford_cars_dataset/stanford_cars/cars_train/'+image
            des = './ordered_stanford_dataset/train/'+img_label+'/'+image
            shutil.move(src,des)
            break
# move files from test to specific folder in the new dataset 
for image in imgs_test:
    for annos in test:
        img_name = annos[0]
        img_label = annos[2]
        if image == img_name:
            src = './Stanford_cars_dataset/stanford_cars/cars_test/'+image
            des = './ordered_stanford_dataset/test/'+img_label+'/'+image
            shutil.move(src,des)
            break

In [None]:
# get valid dataset from train
val_part = 0.1
for l in labels:
    files = os.listdir('./ordered_stanford_dataset/train/'+l)
    num_files = len(files)
    num_val_files = int(np.floor(num_files * val_part))
    if num_val_files > 0:
        val_idxs = random.sample(range(0, num_files),num_val_files)   # pick random images for validation
        for idx in val_idxs:
            src = './ordered_stanford_dataset/train/' + l + '/' + files[idx]
            des = './ordered_stanford_dataset/valid/' + l + '/' + files[idx]
            shutil.move(src, des)
# get valid dataset from test
for l in labels:
    files = os.listdir('./ordered_stanford_dataset/test/'+l)
    num_files = len(files)
    num_val_files = int(np.floor(num_files * val_part))
    if num_val_files > 0:
        val_idxs = random.sample(range(0, num_files),num_val_files)   # pick random images for validation
        for idx in val_idxs:
            src = './ordered_stanford_dataset/test/' + l + '/' + files[idx]
            des = './ordered_stanford_dataset/valid/' + l + '/' + files[idx]
            shutil.move(src, des)

In [None]:
# get ordered datasets
# stats = ((0, 0, 0), (1, 1, 1))
trfm = transforms.Compose([
                          transforms.Resize((256, 256)),
                          T.RandomCrop(size=(100, 100)),
                          transforms.RandomHorizontalFlip(0.5),
                          #  transforms.ColorJitter(),
                          transforms.ToTensor(),
                          # transforms.Normalize(*stats, inplace = True)
                          ]) 
trfm_test = transforms.Compose([
                          transforms.Resize((256, 256)),
                          # T.RandomCrop(size=(128, 128)),
                          transforms.RandomHorizontalFlip(0.5),
                          #  transforms.ColorJitter(),
                          transforms.ToTensor(),
                          # transforms.Normalize(*stats, inplace = True)
                          ])

train_data = datasets.ImageFolder('./ordered_stanford_dataset/train',transform= trfm)
val_data = datasets.ImageFolder('./ordered_stanford_dataset/valid',transform= trfm)
test_data = datasets.ImageFolder('./ordered_stanford_dataset/test',transform= trfm)

In [None]:
print('number of images in train - '+str(len(train_data)))
print('number of images in validation - '+str(len(val_data)))
print('number of images in test - '+str(len(test_data)))

In [None]:
# get data loaders
def get_data_loaders(train_data,val_data,test_data,batch_size):

    train_loader = DataLoader(train_data, batch_size=batch_size, 
                              shuffle=True, num_workers=2)
    val_loader = DataLoader(val_data, batch_size=batch_size, 
                              shuffle=True, num_workers=2)
    test_loader = DataLoader(test_data, batch_size=batch_size, 
                              shuffle=True, num_workers=2)
    
    return train_loader, val_loader, test_loader

In [None]:
batch_size = 32
train_loader, val_loader, test_loader = get_data_loaders(train_data,val_data,test_data,batch_size)

In [None]:
# function to show bach images
def show(imgs,num2show):
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid((imgs.detach()[:num2show]), nrow=8).permute(1, 2, 0))
    return None

In [None]:
# batch_train_images, train_labels = next(iter(train_loader))
# show(batch_train_images,5)

In [None]:
# batch_test_images, test_labels = next(iter(test_loader))
# show(batch_test_images,5)

In [None]:
# batch_val_images, val_labels = next(iter(val_loader))
# show(batch_val_images,5)

In [None]:
# train_data.classes[53]

In [None]:
# len(labels)

In [None]:
# val_labels

In [None]:
# train_loader.batch_size

In [None]:
from torchvision.models import resnet50


In [None]:
# train function
def train(model, train_loader, val_loader, num_epochs, criterion, optimizer, grad_clip = None, checkpoint_path = None):
    # print(1)
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    train_losses_iter = []
    n_iters = 0
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        total_correct = 0
        total_instances = 0
        for images, labels in train_loader:
            n_iters += 1
            optimizer.zero_grad()
            # print(2)
            if torch.cuda.is_available():
                images = images.to(device)
                labels = labels.to(device)
            # print(3)
            one_hot = torch.zeros((64, 196))
            one_hot[torch.arange(len(labels)), labels] = 1
            # print(one_hot[0,:])
            one_hot = one_hot.to(device)
            # print(4)
            outputs = model(images)
            outputs = torch.sigmoid(outputs)
            # print(5)
            # print(outputs.shape)
            if outputs.shape[0] == one_hot.shape[0]:
              # print(outputs.shape)
              loss = criterion(outputs, one_hot)  
              loss.backward() 

            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            
            optimizer.step()
            train_loss += loss.item()
            train_losses_iter.append(loss.item()/train_loader.batch_size)

            # get classifications of the batch
            classifications = torch.argmax(outputs, dim=1)
            correct_predictions = sum(classifications == labels).item()
            total_correct += correct_predictions
            total_instances += len(images)
            if n_iters % 50 == 0:
                print('Iter - %d Train loss - %f'%(n_iters , loss.item()/train_loader.batch_size))

        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                    labels = labels.cuda()
                outputs = model(images)
                outputs = torch.sigmoid(outputs)          
                one_hot = torch.zeros((64, 196))
                one_hot[torch.arange(len(labels)), labels] = 1
                # print(one_hot[0,:])
                one_hot = one_hot.to(device)    
                if outputs.shape[0] == one_hot.shape[0]:
                  # print(outputs.shape)
                  loss = criterion(outputs, one_hot)  
                  val_loss += loss.item()
                  _, predicted = torch.max(outputs.data, 1)
                  total += labels.size(0)
                  correct += (predicted == labels).sum().item()
        cur_train_acc = round(total_correct/total_instances, 3)
        train_accuracies.append(cur_train_acc)
        train_losses.append(train_loss/train_loader.batch_size)
        val_losses.append(val_loss/val_loader.batch_size)
        val_accuracies.append(correct/total)
        if epoch % 10 == 0:
            plot_results_iter(train_losses_iter, val_losses, val_accuracies, train_accuracies)
        print('Epoch - %d | train acc - %f%% | val acc - %f%%'%(epoch ,100*cur_train_acc ,100*correct/total))
        if (checkpoint_path is not None) and epoch > 0 and epoch % 5 == 0:
            torch.save(model.state_dict(), checkpoint_path.format(epoch0))
    return train_losses, val_losses, val_accuracies, train_accuracies

def plot_results(train_losses, val_losses, val_accuracies):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.plot(train_losses, label='train')
    ax1.plot(val_losses, label='val')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax2.plot(np.array(val_accuracies)*100)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    plt.show()
def plot_results_iter(train_losses, val_losses, val_accuracies, train_accuracies):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.plot(train_losses, label='train')
    # ax1.plot(val_losses, label='val')
    ax1.set_xlabel('Iterations')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax2.plot(np.array(train_accuracies)*100,label='train')
    ax2.plot(np.array(val_accuracies)*100,label='val')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    plt.show()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset

class TripletDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.classes = list(set(dataset.targets))

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        anchor_sample, anchor_label = self.dataset[idx]
        positive_class = anchor_label
        negative_class = random.choice([c for c in self.classes if c != positive_class])
        positive_index = random.choice([i for i, l in enumerate(self.dataset.targets) if l == positive_class])
        negative_index = random.choice([i for i, l in enumerate(self.dataset.targets) if l == negative_class])
        positive_sample, positive_label = self.dataset[positive_index]
        negative_sample, negative_label = self.dataset[negative_index]
        return anchor_sample, positive_sample, negative_sample


triplet_dataset = TripletDataset(train_data)
batch_size = 200
triplet_dataloader = DataLoader(triplet_dataset, batch_size=batch_size, shuffle=True)

triplet_dataloader1, val_loader, test_loader = get_data_loaders(triplet_dataloader,val_data,test_data,batch_size)




In [None]:
import random

def train_siamese(model, dataloader, optimizer, criterion, num_epochs, checkpoint_path = None):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        print('epoch num: ' ,epoch )
        if (checkpoint_path is not None) and epoch > 0 and epoch % 5 == 0:
          print(1)
          torch.save(model.state_dict(), checkpoint_path.format(epoch))
        for i, (anchor_image, positive_image, negative_image) in enumerate(triplet_dataloader):
              
              optimizer.zero_grad()

              print(negative_image.shape)
              anchor_image = anchor_image.to(device)            
              positive_image = positive_image.to(device)
              negative_image = negative_image.to(device)

              # Compute the feature representations of the anchor, positive, and negative samples
              anchor_representation = model(anchor_image)
              positive_representation = model(positive_image)
              negative_representation = model(negative_image)

              loss = criterion(anchor_representation, positive_representation, negative_representation)
              print(loss)
              loss.backward()
              optimizer.step()
              train_losses.append(loss/triplet_dataloader.batch_size)
      
    fig, (ax1) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.plot(train_losses, label='train')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    plt.show()  
    return model,train_losses

# get the model
import torchvision
# model = resnet50(pretrained=True)
model = torchvision.models.efficientnet_b0(pretrained=True)
model = model.to(device)

# model.load_state_dict(torch.load('/content/drive/MyDrive/Intro_to_Deep_Learning/checkpoints_resnet50_test2/ckpt-275.pk'))

# batch_size = 32
# train_loader, val_loader, test_loader = get_data_loaders(train_data,val_data,test_data,batch_size)

num_epochs = 50
optimizer = torch.optim.Adam(lr = 0.0001,params=model.parameters(), weight_decay = 1e-4)


# Define the triplet margin loss
triplet_loss = torch.nn.TripletMarginLoss(margin=1.0)
trained_model1, train_losses = train_siamese(model, 
                               triplet_dataloader1, 
                               optimizer, 
                               triplet_loss, 
                               num_epochs, 
                               checkpoint_path='/content/drive/MyDrive/Intro_to_Deep_Learning/checkpoints_eff_contrastive/ckpt-{}.pk')


In [None]:
trained_model = trained_model.to(device)
# Add a new linear layer as classifier
trained_model.add_module('classifier', nn.Linear(1280, 196))

batch_size = 64
train_loader, val_loader, test_loader = get_data_loaders(train_data,val_data,test_data,batch_size)


num_epochs = 200
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr = 0.0001,params=trained_model.parameters(), weight_decay = 1e-4)

info = train(trained_model,
             train_loader,
             val_loader,
             num_epochs,
             criterion,
             optimizer,
             grad_clip = 0.5,
             checkpoint_path='/content/drive/MyDrive/Intro_to_Deep_Learning/checkpoints_resnet50_test2/ckpt-{}.pk')

In [None]:
# get the model
import torchvision
# model = resnet50(pretrained=True)
model = torchvision.models.efficientnet_b0(pretrained=True)

# Add a new linear layer as classifier
model.add_module('classifier', nn.Linear(1280, 196))

# model.fc = nn.Sequential(nn.Linear(in_features=2048, out_features=500, bias=True),
                        #  nn.ReLU(),
                        #  nn.Linear(in_features=500, out_features=196, bias=True)) 
if torch.cuda.is_available():
    model = model.to(device)
# model.load_state_dict(torch.load('/content/drive/MyDrive/Intro_to_Deep_Learning/checkpoints_resnet50_test2/ckpt-275.pk'))

batch_size = 64
train_loader, val_loader, test_loader = get_data_loaders(train_data,val_data,test_data,batch_size)


num_epochs = 10
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr = 0.0001,params=model.parameters(), weight_decay = 1e-4)
info = train(model,
             train_loader,
             val_loader,
             num_epochs,
             criterion,
             optimizer,
             grad_clip = 0.5,
             checkpoint_path='/content/drive/MyDrive/Intro_to_Deep_Learning/checkpoints_resnet50_test2/ckpt-{}.pk')

In [None]:
# we can take only part of the dataset 

# samp = torch.utils.data.Subset(train_data,[0,1,2,3,4])

In [None]:
batch_size = 64
train_loader, val_loader, test_loader = get_data_loaders(train_data,val_data,test_data,batch_size)

In [None]:

num_epochs = 100
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr = 0.0001,params=model.parameters(), weight_decay = 1e-4)
info = train(model, train_loader, val_loader, num_epochs, criterion, optimizer, grad_clip = 0.5)

Try to make the data more diverse with random crops

In [None]:
num_epochs = 10
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr = 0.0001,params=model.parameters(), weight_decay = 1e-4)
info2 = train(model, train_loader, val_loader, num_epochs, criterion, optimizer, grad_clip = 0.5)