# Baseline test for Image Specificity

## Colab Specific Setup

In [0]:
from google.colab import drive
drive.mount("/content/gdrive")

In [0]:
!git clone https://github.com/jasmainak/specificity.git

## Feature Extraction


In [1]:
import torch
import torchvision
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from torchvision.models import vgg16
from torch import nn as nn
from torchvision import transforms
from torchvision import utils
from torch.utils.data import Dataset, DataLoader
from imageio import imread
from scipy.io import loadmat
import os
from tqdm import tqdm_notebook as tqdm

In [2]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [3]:
data_path = "./data/"
drive_path = "/content/gdrive/My Drive/data/"

In [4]:
dataset = "pascal"

In [5]:
class SpecificityDataset(Dataset):
    """Dataset class for the Pascal Dataset"""
    
    def __init__(self, image_dir, dataset, data_mat, transform=None):
        self.transform = transform
        self.image_dir = image_dir
        if (not dataset) or (not data_mat):
            raise ValueError("Dataset or Data Matrix not found")
        self.dataset = dataset
        if dataset == "memorability":
            self.files = sorted(os.listdir(self.image_dir), key=lambda x:int(x.split("_")[1].split(".")[0]))
        else:
            self.files = sorted(os.listdir(self.image_dir))
        self.scores = data_mat['specificity'][0][dataset][0]['mean'][0][0]
        
    def __len__(self):
        return len(os.listdir(self.image_dir))
    
    def __getitem__(self, idx):        
        image_file = self.files[idx]
        image = np.asarray(imread(self.image_dir+"/"+image_file))        
        if self.transform:
            image = self.transform(image)            
        sample = {"image": image, "score": self.scores[idx], "file":image_file}
        return sample

In [6]:
transformed_dataset = SpecificityDataset(
    image_dir=data_path+"images/{}/".format(dataset),
    dataset = dataset,
    data_mat = loadmat(data_path+"/specificity_alldatasets.mat"),
    transform=transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.RandomCrop(224),        
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
)

In [7]:
dataloader = DataLoader(transformed_dataset, batch_size=10,
                        shuffle=True)

In [0]:
vgg = vgg16(pretrained=True).features.to(device)

In [0]:
images = None
scores = []
files = []
onlyonce = False
with torch.no_grad():
    for batch in tqdm(dataloader):
        out = vgg(batch['image'].to(device)).cpu().numpy()
        second_dim = out.shape[1]*out.shape[2]*out.shape[3]
        out = out.reshape(out.shape[0],second_dim)
        if type(images) == type(None):
            images = out
        else:
            images = np.append(images, out, axis=0)
        scores.extend([float(bat) for bat in batch['score']])
        files.extend(batch['file'])

        batch = None
        out = None
        if onlyonce:
            break

In [0]:
results = {"images":images, "scores": scores, "filenames":files}

In [8]:
import pickle

In [0]:
with open(drive_path+"{}_features.pickle".format(dataset), "wb") as pkl:
    pickle.dump(results, pkl)

## Support-Vector Regression



In [9]:
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from random import randint
from scipy.stats import spearmanr
from matplotlib import pyplot as plt
%matplotlib inline

In [10]:
train_dataset = "pascal"

In [11]:
with open("{}_features.pickle".format(train_dataset), "rb") as pkl:
    data = pickle.load(pkl)

In [12]:
SPLIT_SEED = 42 # for test consistency

In [13]:
file_train, file_test, image_train, image_test, score_train, score_test = \
train_test_split(
    data['filenames'], data['images'], data['scores'],
    test_size = 0.2,
    random_state = SPLIT_SEED
)

In [141]:
def spearman_loss(y_pred, y_test):
    return spearmanr(y_pred, y_test).correlation

In [177]:
gammas = [1/(image_train[0].shape[0]*i) for i in range(1,11)]
param_grid = {'gamma' : gammas}
grid_search = GridSearchCV(
    SVR(gamma="scale"),
    param_grid=param_grid,
    scoring=make_scorer(spearman_loss, greater_is_better=True),
    iid=False,
    cv=5
)   

In [178]:
grid_search.fit(data["images"], data['scores'])

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid=False, n_jobs=None,
             param_grid={'C': [1],
                         'gamma': [3.985969387755102e-05, 1.992984693877551e-05,
                                   1.328656462585034e-05, 9.964923469387754e-06,
                                   7.971938775510205e-06, 6.64328231292517e-06,
                                   5.6942419825072885e-06,
                                   4.982461734693877e-06, 4.428854875283447e-06,
                                   3.985969387755102e-06]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=make_scorer(spearman_loss), verbose=0)

In [179]:
pd.DataFrame(grid_search.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,6.931262,0.250646,1.688868,0.068412,1,3.98597e-05,"{'C': 1, 'gamma': 3.985969387755102e-05}",0.015543,0.012512,-0.025205,-0.0317,0.034519,0.001134,0.025391,10
1,6.79859,0.110816,1.672751,0.074527,1,1.99298e-05,"{'C': 1, 'gamma': 1.992984693877551e-05}",0.019491,-0.021944,-0.039299,0.008715,0.063939,0.00618,0.035719,9
2,6.100916,0.875012,1.552864,0.271491,1,1.32866e-05,"{'C': 1, 'gamma': 1.328656462585034e-05}",0.021994,-0.039043,-0.032374,0.026159,0.069872,0.009322,0.040467,8
3,6.214245,1.089273,1.661667,0.260623,1,9.96492e-06,"{'C': 1, 'gamma': 9.964923469387754e-06}",0.021505,-0.044476,-0.022273,0.023462,0.069128,0.009469,0.039541,7
4,7.067339,0.91665,1.702199,0.206415,1,7.97194e-06,"{'C': 1, 'gamma': 7.971938775510205e-06}",0.021902,-0.044034,-0.01731,0.0323,0.070317,0.012635,0.039769,6
5,5.398871,0.188105,1.300235,0.06851,1,6.64328e-06,"{'C': 1, 'gamma': 6.64328231292517e-06}",0.019847,-0.045352,-0.015032,0.036428,0.069861,0.013151,0.040049,5
6,6.681744,0.844912,1.790646,0.166912,1,5.69424e-06,"{'C': 1, 'gamma': 5.6942419825072885e-06}",0.021271,-0.047388,-0.011805,0.043059,0.070656,0.015158,0.041315,2
7,6.262911,1.000863,1.505757,0.258287,1,4.98246e-06,"{'C': 1, 'gamma': 4.982461734693877e-06}",0.020626,-0.048753,-0.008501,0.044578,0.072083,0.016007,0.041897,1
8,5.161083,0.082402,1.25157,0.041867,1,4.42885e-06,"{'C': 1, 'gamma': 4.428854875283447e-06}",0.019652,-0.048687,-0.007164,0.042708,0.068307,0.014963,0.040443,4
9,5.124976,0.11284,1.252513,0.026553,1,3.98597e-06,"{'C': 1, 'gamma': 3.985969387755102e-06}",0.017889,-0.049167,-0.005828,0.041551,0.071135,0.015116,0.041026,3


In [168]:
svr = SVR(gamma=1/image_train[0].shape[0], C=1)

In [169]:
svr.fit(image_train, score_train)

SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma=3.985969387755102e-05, kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [170]:
score_pred = svr.predict(image_test)

In [171]:
a = spearmanr(score_pred, score_test).correlation

In [172]:
a

0.01141228530713268