In [40]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from classes.CNNModel import CNNModel
from classes.TrainDataset import TrainDataset
from classes.TestDataset import TestDataset
from utils.cos_sim import cos_sim_make_output

## Plan:
- Use a pre-trained model for feature extraction
- Make pre-trained/homemade model for feature processing
- Compute 20*20 asymetric cosine similarity matrix
- Select top 2 images most similar to 'left' image

# 0. Pipeline

In [2]:
train_folder = 'data/train/'
test_folder = 'data/test/'
csv_folder = 'data/'

train_csv = csv_folder + 'train.csv'
train_candidates_csv = csv_folder + 'train_candidates.csv'
test_candidates_csv = csv_folder + 'test_candidates.csv'

In [15]:
# Define the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

test cell below

In [19]:
# Define the data transformations
transform = transforms.Compose([
    transforms.Resize((49, 40)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define the dataset and data loader
train_dataset = TrainDataset(train_csv, train_candidates_csv, train_folder+'left', train_folder+'all', transform=transform)
test_dataset = TestDataset(test_candidates_csv, test_folder+'left', test_folder+'all', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

Make fisrt predictions

In [41]:
df_output = cos_sim_make_output(test_loader, test_candidates_csv)

Processed 200 samples in 4.18 seconds
Processed 400 samples in 8.35 seconds
Processed 600 samples in 12.73 seconds
Processed 800 samples in 16.62 seconds
Processed 1000 samples in 20.64 seconds
Processed 1200 samples in 25.05 seconds
Processed 1400 samples in 29.46 seconds
Processed 1600 samples in 34.09 seconds
Processed 1800 samples in 38.61 seconds
Processed 2000 samples in 42.50 seconds


# 1. Feature Extraction

#### Clip ResNet 50x4

In [None]:
# !conda install --yes -c pytorch pytorch=1.7.1 torchvision cudatoolkit=11.0
# !pip install ftfy regex tqdm
# !pip install git+https://github.com/openai/CLIP.git
# !pip install torch

In [96]:
from torchvision.models import resnet50, ResNet50_Weights

# New weights with accuracy 80.858%
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

In [115]:
from clip import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, transform = clip.load("RN50x4", device)

cpu
