In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import sklearn
import torch
import torch.nn as nn
%matplotlib inline

In [27]:
image_path = '../9520-final-project/SUFRData/image_files/uniform_bg/scaling'

# Torchvision models

In [3]:
import torchvision.models as models
from torchvision.models import VGG

In [4]:
vgg19 = models.vgg19(pretrained=True)

In [136]:
resnet101 = models.resnet101(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /Users/ivanjutamulia/.cache/torch/checkpoints/resnet101-5d3b4d8f.pth
100%|██████████| 170M/170M [00:08<00:00, 21.3MB/s] 


## TODO: use more Brain-Score models

### Remove last layer from network architecture

In [6]:
vgg19.classifier = nn.Sequential(*list(vgg19.classifier.children())[:-1])

In [138]:
resnet101 = nn.Sequential(*list(resnet101.children())[:-1])

# Preprocess Images

In [8]:
from torchvision import transforms
from PIL import Image

def load_img(image_path, img_idx):
    return Image.open('{}/{}.jpg'.format(image_path, img_idx))

def process_img(img):    
    transform = transforms.Compose([
     transforms.Resize(256),                    
     transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize(
         mean=[0.485, 0.456, 0.406],
         std=[0.229, 0.224, 0.225]                  
    )])
    img = transform(img)
    processed_img = torch.unsqueeze(img, 0)
    
    return processed_img

# Process Dataset

## TODO: make datasets with specific scale differences to see if there is difference with more scale variance

In [12]:
def parse_dataset(relative_path):
    f = open(relative_path, "r")
    dataset_idxs = []
    for line in f.readlines():
        img1_idx, img2_idx, label = line.strip().split(" ")
        dataset_idxs.append(((img1_idx, img2_idx), label))
    return dataset_idxs

def get_negative_pairs(face_idxs, c_offset_values):
    negative_pairs = []
    for c in c_offset_values:
        for i in face_idxs:
            for j in face_idxs:
                # ensures i != j and that there are no duplicate pairs
                if i > j: 
                    pair_idxs = (1+5*i+2,1+5*j+2+c)
                    negative_pairs.append(pair_idxs)
    return negative_pairs

def get_dataset_idxs(face_idxs, c_offset_values, num_negative_pairs=None):
    if num_negative_pairs is None:
        num_negative_pairs = face_idxs.shape[0] * len(c_offset_values)
    positive_pairs = np.array([(1+5*i+2,1+5*i+2+c) for c in c_offset_values for i in face_idxs])
    all_negative_pairs = np.array(get_negative_pairs(face_idxs, c_offset_values))
    negative_pairs = all_negative_pairs[list(np.random.choice(len(all_negative_pairs), num_negative_pairs, replace=False)), :]
    return positive_pairs, negative_pairs

#### Train and Test Dataset

In [56]:
train_face_idxs = np.random.choice(400, 350, replace=False)

mask = np.ones(400, dtype=bool)
mask[train_face_idxs] = False
test_face_idxs = np.arange(400)[mask]
c_offset_values = [-2,-1,1,2]

train_positive_pairs, train_negative_pairs = get_dataset_idxs(train_face_idxs, c_offset_values)
test_positive_pairs, test_negative_pairs = get_dataset_idxs(test_face_idxs, [-1,1])

#### Cosine Similarity for Evaluation

In [57]:
def cos_sim(a,b):
    dot = np.dot(a, b.T)
    norm_product = np.linalg.norm(a)*np.linalg.norm(b)
    return dot / norm_product

def compare_img_pairs(model, train_idxs, use_corr=False):
    similarities = []
    i = 0
    
    for img1_idx, img2_idx in train_idxs:
        img1 = process_img(load_img(image_path, img1_idx))
        img2 = process_img(load_img(image_path, img1_idx))
        vector1 = model(img1).detach().numpy()
        vector2 = model(img2).detach().numpy()
        
        if use_corr:
            similarities.append(np.corrcoef(vector1, vector2)[0][1])
        else:
            similarities.append(cos_sim(vector1, vector2)[0][0])
#         print(i)
        i += 1
        
    return similarities

In [58]:
positive_pair_similarities = compare_img_pairs(vgg19, test_positive_pairs, use_corr=False)
negative_pair_similarities = compare_img_pairs(vgg19, test_negative_pairs, use_corr=False)

In [59]:
np.array(positive_pair_similarities).mean()

0.4096328

In [60]:
np.array(negative_pair_similarities).mean()

0.41143674

# Precompute and Store Img. Feature Vectors

In [134]:
import json

vgg19_img_idx_to_vec = {}

for img_idx in range(1, 2001):
    img_vec = process_img(load_img(image_path, img_idx))
    img_feature_vec = vgg19.forward(img_vec).detach().squeeze().numpy()
    vgg19_img_idx_to_vec[img_idx] = img_feature_vec.tolist()
    if img_idx % 100 == 0:
        print("Processed {} images.".format(img_idx))

with open('./vgg19-feature-vecs.json', 'w') as f:
    json.dump(vgg19_img_idx_to_vec, f)

Processed 100 images.
Processed 200 images.
Processed 300 images.
Processed 400 images.
Processed 500 images.
Processed 600 images.
Processed 700 images.
Processed 800 images.
Processed 900 images.
Processed 1000 images.
Processed 1100 images.
Processed 1200 images.
Processed 1300 images.
Processed 1400 images.
Processed 1500 images.
Processed 1600 images.
Processed 1700 images.
Processed 1800 images.
Processed 1900 images.
Processed 2000 images.


In [142]:
def store_img_feature_vecs(model, modelname):
    model_img_idx_to_vec = {}

    for img_idx in range(1, 2001):
        img_vec = process_img(load_img(image_path, img_idx))
        img_feature_vec = model.forward(img_vec).detach().squeeze().numpy()
        model_img_idx_to_vec[img_idx] = img_feature_vec.tolist()
        if img_idx % 100 == 0:
            print("Processed {} images.".format(img_idx))

    with open('./{}-feature-vecs.json'.format(modelname), 'w') as f:
        json.dump(model_img_idx_to_vec, f)

In [143]:
store_img_feature_vecs(resnet101, 'resnet101')

Processed 100 images.
Processed 200 images.
Processed 300 images.
Processed 400 images.
Processed 500 images.
Processed 600 images.
Processed 700 images.
Processed 800 images.
Processed 900 images.
Processed 1000 images.
Processed 1100 images.
Processed 1200 images.
Processed 1300 images.
Processed 1400 images.
Processed 1500 images.
Processed 1600 images.
Processed 1700 images.
Processed 1800 images.
Processed 1900 images.
Processed 2000 images.


## Implementation 0: Similarity-based classifier

### AUC Score (baseline)

In [61]:
from sklearn.metrics import roc_auc_score

labels = np.concatenate((np.zeros(len(negative_pair_similarities)), np.ones(len(positive_pair_similarities))))
roc_auc_score(labels, negative_pair_similarities + positive_pair_similarities)

0.49179999999999996

## Implementation 1: MLP Same/Different Classifier 

In [62]:
vgg_img_idx_to_vec = {}

def get_vgg_vec(img_idx, vgg_img_idx_to_vec, model, image_path):
    if img_idx in vgg_img_idx_to_vec:
        return vgg_img_idx_to_vec[img_idx]
    else:
        img_vec = process_img(load_img(image_path, img_idx))
        img_feature_vec = model.forward(img_vec)
        vgg_img_idx_to_vec[img_idx] = img_feature_vec
        return img_feature_vec

In [63]:
def mlp_preprocessing(model, positive_pairs, negative_pairs):

    pairs = np.concatenate((positive_pairs, negative_pairs))

    x = []
    for img1_idx, img2_idx in pairs:
        img1_feature_vec = get_vgg_vec(img1_idx, vgg_img_idx_to_vec, model, image_path)
        img2_feature_vec = get_vgg_vec(img2_idx, vgg_img_idx_to_vec, model, image_path)
        x_i = torch.cat([img1_feature_vec, img2_feature_vec], dim=1).detach().squeeze().numpy()
        x.append(x_i)

    x = np.array(x)
    y = np.concatenate((np.zeros(len(negative_pairs)), np.ones(len(positive_pairs))))
    
    return x, y

In [47]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=[512,64,10])
train_x, train_y = mlp_preprocessing(vgg19, train_positive_pairs[:10], train_negative_pairs[:10])
print(train_x.shape)
print(train_y.shape)

(20, 8192)
(20,)


In [50]:
model = mlp.fit(train_x,train_y)
model.best_loss_

0.0020002258602114507

In [51]:
test_x, test_y = mlp_preprocessing(vgg19, test_positive_pairs, test_negative_pairs)
model.score(test_x, test_y)

0.5

### Implementation 2: MLP Multiclass Classification and Correlation of feature vector for evaluation

In [93]:
from itertools import chain

train_faces = list(chain.from_iterable(train_positive_pairs)) + list(chain.from_iterable(train_negative_pairs))
train_faces_indices = np.unique(train_faces)
train_faces_labels = [(i-1) // 5 for i in train_faces_indices]
print(len(train_faces_indices))
print(len(train_faces_labels))

1750
1750


In [103]:
def mlp2_preprocessing(model, train_faces_indices):
    x = []
    for i in train_faces_indices:
        img_feature_vec = get_vgg_vec(i, vgg_img_idx_to_vec, model, image_path).detach().squeeze().numpy()
        x.append(img_feature_vec)

    x = np.array(x)
    
    return x

In [120]:
mlp2 = MLPClassifier(hidden_layer_sizes=[512,64,10])

train_faces = mlp2_preprocessing(vgg19, train_faces_indices[:50])

In [121]:
model2 = mlp2.fit(train_faces, train_faces_labels[:50])
model2.best_loss_

0.0010844428949991225

In [None]:
def mlp2_preprocessing_test_pairs(model, test_faces_indices, positive=True):
    x = []
    if positive:
        for i in test_faces_indices:
            img_feature_vec = get_vgg_vec(i, vgg_img_idx_to_vec, model, image_path).detach().squeeze().numpy()
            x.append(img_feature_vec)

        x = np.array(x)
    
    return x

In [127]:
test_faces_positive_pair = mlp2_preprocessing(vgg19, train_faces_indices[53:55])
test_faces_negative_pair = mlp2_preprocessing(vgg19, train_faces_indices[63:75])

In [123]:
from sklearn.neural_network._base import ACTIVATIONS

def deepest_layer(data, MLP, layer=0):
    L = ACTIVATIONS['relu'](np.matmul(data, MLP.coefs_[layer]) + MLP.intercepts_[layer])
    layer += 1
    if layer >= len(MLP.coefs_)-1:
        return L
    else:
        return deepest_layer(L, MLP, layer=layer)

In [129]:
L_pos = deepest_layer(test_faces_positive_pair, model2)
L_neg = deepest_layer(test_faces_negative_pair, model2)

In [130]:
cos_sim(L_pos[0], L_pos[1])

0.9871656403115372

In [131]:
cos_sim(L_neg[0], L_neg[1])

0.9351490858375113