# CS5785 final

### 1. Setting up initial development environment


In [None]:
#installing gensim and spacy if they are not already installed

#!pip install gensim
#!pip install spacy

In [None]:
#downloading a larger model for spacy (required for converting text into vectors)

#!python -m spacy download en_core_web_lg

In [None]:
#downloading required data for gensim

#!wget -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"

In [1]:
import os
import csv
import random
import gensim
import numpy as np
import string

num_train = 8000
num_dev = 2000
num_test = 2000
split_idx = list(range(num_train + num_dev))
random.shuffle(split_idx)

#loading word2vec model from gensim - NOT used in our models, used in baseline only
#word2vec = gensim.models.KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin.gz", binary=True)
#print("Loaded gensim word vectors successfully!")

In [2]:
import spacy

#loading word2vec model from spacy - used in our models
nlp = spacy.load("en_core_web_lg")
print("Loaded spacy word vectors successfully!")


### 2. Parsing descriptions, tags and ResNet feature vectors


In [3]:
#list of stop words used for text preprocessing

stop_words = ['a','the','an','and',\
                           'as','at','for','from','in','into','of','on',\
                           'onto','to',\
                           'that',\
                           'have','had','has','having','be',\
                           'is','am','are','was','were','being','been',\
                           'your' 'those', 'this', 'these', \
                           'their', 'the', 'that', 'our', 'my',\
                           'its', 'his' 'her'\
                           'an', 'a', 'he',\
                           'him', 'his', 'her', 'she', 'it', 'they', 'them', 'its', 'their','theirs',\
                           'you','your','yours','me','my','mine','I','we','and/or']

In [68]:
#reading tags from file and converting tags to vectors

def parse_tags(data_dir, num_doc):
    docs = []
    for i in range(num_doc):
        path = os.path.join(data_dir, "%d.txt" % i)
        with open(path) as f:
            docs.append(f.read())
    return docs

def tags_to_vec(tag):
    temp = tag.split('\n')
    tags = []
    for item in temp:
        if len(item) > 0:
            item_tags = item.split(':')
            cat = item_tags[0]        ##category
            sub_cat = item_tags[1]    ##sub-category
            tags.append(sub_cat)      ##we only consider sub-category when converting tags to vectors
    
    tags_str = ' '.join(tags)
    processed_tags = nlp(tags_str)    ##using spacy to process text
            
    return processed_tags.vector

train_dev_tags = parse_tags("tags_train", num_doc=(num_train+num_dev))
test_tags = parse_tags("tags_test", num_doc=num_test)
print('Parsed file')

x_train_tags = np.array([tags_to_vec(train_dev_tags[i]) for i in split_idx[:num_train]])
x_dev_tags = np.array([tags_to_vec(train_dev_tags[i]) for i in split_idx[num_train:]])
x_test_tags = np.array([tags_to_vec(d) for d in test_tags])

#optionally, save matrices to .npy files and store them on local machine
#np.save('xtraintags', x_train_tags)
#np.save('xdevtags', x_dev_tags)
#np.save('xtesttags', x_test_tags)

print("Built all x_tags matrices!")   
print("x_train_tags shape:", x_train_tags.shape)
print("x_dev_tags shape:", x_dev_tags.shape)
print("x_test_tags shape:", x_test_tags.shape)

Parsed file
Built all x_tags matrices!
x_train_tags shape: (8000, 300)
x_dev_tags shape: (2000, 300)
x_test_tags shape: (2000, 300)


In [33]:
#reading descriptions and converting descriptions to vectors

def parse_descriptions(data_dir, num_doc):
    docs = []
    for i in range(num_doc):
        path = os.path.join(data_dir, "%d.txt" % i)
        with open(path) as f:
            docs.append(f.read())
    return docs

#using average of word vectors to represent paragraphs
#DEPRECATED in favor of the approach below (sent_to_vec)
def doc_to_vec(sentence, word2vec):
    word_vecs = []
    for w in sentence.split():
        w = w.lower()
        if w in word2vec.vocab and w not in stop_words:
            word_vecs.append(word2vec.get_vector(w))
    word_vecs = [word2vec.get_vector(w.lower()) for w in sentence.split() if (w in word2vec.vocab) and (w not in stop_words)]
    return np.stack(word_vecs).mean(0)

#converting the entire paragraph directly into vectors
def sent_to_vec(sentence, preprocess=False):
    sent = None
    #if specified, preprocess the text by removing punctuations, converting 
    #all the words to lower case, and removing stop words
    if preprocess:
        processed = []
        sentence = sentence.translate(str.maketrans('', '', string.punctuation))
        temp = sentence.split()
        for word in temp:
            if word not in stop_words:
                processed.append(word.lower())
        processed_str = ' '.join(processed)
        sent = nlp(processed_str)
    else:
        sent = nlp(sentence)
        
    return sent.vector

train_dev_desc = parse_descriptions("descriptions_train", num_doc=(num_train+num_dev))
test_desc = parse_descriptions("descriptions_test", num_doc=num_test)
print('Parsed file')

x_train = np.array([sent_to_vec(train_dev_desc[i], preprocess=True) for i in split_idx[:num_train]])
x_dev = np.array([sent_to_vec(train_dev_desc[i], preprocess=True) for i in split_idx[num_train:]])
x_test = np.array([sent_to_vec(d, preprocess=True) for d in test_desc])

#optionally, save matrices to .npy files and store them on local machine
#np.save('xtrain', x_train)
#np.save('xdev', x_dev)
#np.save('xtest', x_test)

print("Built all x mabtrices!")
print("x_train shape:", x_train.shape)
print("x_dev shape:", x_dev.shape)
print("x_test shape:", x_test.shape)


Built all x mabtrices!
x_train shape: (8000, 300)
x_dev shape: (2000, 300)
x_test shape: (2000, 300)


In [34]:
#reading ResNet image features from file

def parse_features(features_path):
    vec_map = {}
    with open(features_path) as f:
        for row in csv.reader(f):
            img_id = int(row[0].split("/")[1].split(".")[0])
            vec_map[img_id] = np.array([float(x) for x in row[1:]])
    return np.array([v for k, v in sorted(vec_map.items())])

#DEPRECATED - random projection to reduce dimensionality
#p = np.random.randn(1000, 100)
y_train_dev = parse_features("features_train/features_resnet1000_train.csv") #@ p
y_train = y_train_dev[split_idx[:num_train]]
y_dev = y_train_dev[split_idx[num_train:]]
y_test = parse_features("features_test/features_resnet1000_test.csv") #@ p

#optionally, save matrices to .npy files and store them on local machine
#np.save('ytrainfull', y_train)
#np.save('ydevfull', y_dev)
#np.save('ytestfull', y_test)

print("Built all y matrices!")
print("y_train shape:", y_train.shape)
print("y_dev shape:", y_dev.shape)
print("y_test shape:", y_test.shape)

Built all y matrices!
y_train shape: (8000, 1000)
y_dev shape: (2000, 1000)
y_test shape: (2000, 1000)



### 3. Model training


In [14]:
import warnings

#suppress warnings
#warnings.filterwarnings('ignore')

In [15]:
#using cosine similarity (cosine distance) in favor of euclidean distance
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#Method 1 (baseline) & 2 - use description to predict dimensionality reduced ResNet image vectors 

from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

# train OLS model with regression
parameters = {"alpha": [0.01, 0.1, 1, 2, 5, 10, 100]}
reg = GridSearchCV(Ridge(), param_grid=parameters, cv=10, verbose=10)
reg.fit(x_train, y_train)
reg_best = reg.best_estimator_

print("Trained linear regression model!")
print("Summary of best model:")
print(reg_best)

In [29]:
#Method 3 - use description to predict tags

from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

parameters = {"alpha": [0.01, 0.1, 1, 2, 5, 10, 100]}
reg = GridSearchCV(Ridge(), param_grid=parameters, cv=10, verbose=10)
reg.fit(x_train, x_train_tags)
reg_best = reg.best_estimator_

print("Trained linear regression model!")
print("Summary of best model:")
print(reg_best)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.661222493556135, total=   0.1s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6502817517849822, total=   0.1s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6501822813698078, total=   0.1s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6399666189487007, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.5s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6511612841864508, total=   0.1s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6399229655619706, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6668569908309135, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.8s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6475358698093154, total=   0.1s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.650012015653537, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6648987059921628, total=   0.0s
[CV] alpha=0.5 .......................................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    1.0s remaining:    0.0s


[CV] .............. alpha=0.5, score=0.6614842456660708, total=   0.1s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.6514415786275852, total=   0.1s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.6514314230782787, total=   0.0s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.6416623959694858, total=   0.1s
[CV] alpha=0.5 .......................................................
[CV] ............... alpha=0.5, score=0.652210858590011, total=   0.0s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.6414632891253068, total=   0.0s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.6680617304787189, total=   0.1s
[CV] alpha=0.5 .......................................................
[CV] .

[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:    5.2s finished


In [36]:
#Method 4 - use ResNet image vectors to predict description

parameters = {"alpha": [0.1, 0.5, 1, 2, 5, 10, 100]}
reg = GridSearchCV(Ridge(), param_grid=parameters, cv=10, verbose=10)
reg.fit(y_train, x_train)
reg_best = reg.best_estimator_

print("Trained linear regression model!")
print("Summary of best model:")
print(reg_best)

Fitting 10 folds for each of 7 candidates, totalling 70 fits
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .............. alpha=0.1, score=0.4924541234127216, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4788508120985232, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4758009604625774, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.6s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4908751780125679, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4811136533282584, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    2.6s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4734618772607761, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    3.1s remaining:    0.0s


[CV] ............. alpha=0.1, score=0.48892149569425825, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    3.6s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4893221247180699, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    4.1s remaining:    0.0s


[CV] ............. alpha=0.1, score=0.47347235940409493, total=   0.4s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    4.6s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.4700440073788237, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] ............. alpha=0.5, score=0.49764964012056134, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.4845691920735378, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] ............. alpha=0.5, score=0.48138171367630084, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.4963395681363638, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.4865132226439228, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] .............. alpha=0.5, score=0.4789457518303309, total=   0.4s
[CV] alpha=0.5 .......................................................
[CV] .

[CV] ............... alpha=100, score=0.517613202325048, total=   0.4s
[CV] alpha=100 .......................................................
[CV] .............. alpha=100, score=0.5097316306332631, total=   0.5s
[CV] alpha=100 .......................................................
[CV] .............. alpha=100, score=0.5022875877360534, total=   0.4s


[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed:   38.9s finished


Trained linear regression model!
Summary of best model:
Ridge(alpha=100, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)


In [66]:
#Method 7 - use ResNet image vectors + tags to predict description with Ridge

combined_img_train = np.concatenate((y_train, x_train_tags), axis=1)
combined_img_dev = np.concatenate((y_dev, x_dev_tags), axis=1)
combined_img_test = np.concatenate((y_test, x_test_tags), axis=1)

parameters = {"alpha": [0.01, 0.1, 1, 10, 100, 500]}
reg = GridSearchCV(Ridge(), param_grid=parameters, cv=10, verbose=10)
reg.fit(combined_img_train, x_train)
reg_best = reg.best_estimator_

print("Trained linear regression model!")
print("Summary of best model:")
print(reg_best)

Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ............. alpha=0.01, score=0.5710760317893183, total=   0.5s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV] ............. alpha=0.01, score=0.5569262044280748, total=   0.7s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.6s remaining:    0.0s


[CV] .............. alpha=0.01, score=0.565783384089551, total=   0.8s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.5s remaining:    0.0s


[CV] .............. alpha=0.01, score=0.571360445322149, total=   0.7s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    3.4s remaining:    0.0s


[CV] ............. alpha=0.01, score=0.5638462174814163, total=   0.6s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    4.1s remaining:    0.0s


[CV] .............. alpha=0.01, score=0.557064574516982, total=   0.5s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    4.8s remaining:    0.0s


[CV] ............. alpha=0.01, score=0.5755202149520334, total=   0.5s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    5.5s remaining:    0.0s


[CV] ............... alpha=0.01, score=0.57262250770562, total=   0.6s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    6.4s remaining:    0.0s


[CV] ............. alpha=0.01, score=0.5636907198125763, total=   0.6s
[CV] alpha=0.01 ......................................................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    7.2s remaining:    0.0s


[CV] ............. alpha=0.01, score=0.5559812872026486, total=   0.7s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5724933058188185, total=   0.6s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5584957207865137, total=   0.6s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5672788885426046, total=   0.6s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5728499373018051, total=   0.5s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5653812701916199, total=   0.7s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5585647427396908, total=   0.8s
[CV] alpha=0.1 .......................................................
[CV] .

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:   50.1s finished


Trained linear regression model!
Summary of best model:
Ridge(alpha=100, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)


In [None]:
## Methods 5 and 6

In [70]:
from sklearn.ensemble import ExtraTreesRegressor

In [72]:
et_model_flipped = ExtraTreesRegressor(bootstrap=True, n_jobs=-1, verbose=10)
et_model_flipped.fit(combined_img_train, x_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 10
building tree 2 of 10
building tree 3 of 10
building tree 4 of 10
building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10


[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed:   24.2s remaining:   24.2s


building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:   24.7s remaining:   10.6s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:   32.3s finished


ExtraTreesRegressor(bootstrap=True, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
          oob_score=False, random_state=None, verbose=10, warm_start=False)

In [73]:
from sklearn.ensemble import RandomForestRegressor

In [74]:
rf_model_flipped = RandomForestRegressor(n_jobs=-1, verbose=10)
rf_model_flipped.fit(combined_img_train, x_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 10building tree 2 of 10building tree 3 of 10building tree 4 of 10



building tree 5 of 10
building tree 6 of 10
building tree 7 of 10
building tree 8 of 10


[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed:  2.7min remaining:  2.7min


building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:  2.8min remaining:  1.2min
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  3.6min finished


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
           oob_score=False, random_state=None, verbose=10,
           warm_start=False)


### 4. Predict on validation set


In [11]:
#calculating euclidean distance
#DEPRECATED

def dist_matrix(x1, x2):
    return ((np.expand_dims(x1, 1) - np.expand_dims(x2, 0)) ** 2).sum(2) ** 0.5

In [129]:
#Method 2

y_dev_pred = reg.predict(x_dev)
dev_distances = cosine_similarity(y_dev_pred, y_dev)
dev_scores = []
dev_pos_list = []

for i in range(num_dev):
    pred_dist_idx = list(np.argsort(dev_distances[i]))
    pred_dist_idx.reverse()
    dev_pos = pred_dist_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

Development MAP@20: 0.16594818252353083
Mean index of true image 56.8725
Median index of true image 18.0


In [31]:
#Method 3

y_dev_pred = reg_best.predict(x_dev)
dev_distances = cosine_similarity(y_dev_pred, x_dev_tags)
dev_scores = []
dev_pos_list = []

for i in range(1979):
    pred_dist_idx = list(np.argsort(dev_distances[i]))
    pred_dist_idx.reverse()
    dev_pos = pred_dist_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

Development MAP@20: 0.195760789051526
Mean index of true image 34.95048004042446
Median index of true image 12.0


In [46]:
#Method 4

x_dev_pred = reg_best.predict(y_dev)                 ##predicting description vectors from image vectors
dev_distances = cosine_similarity(x_dev, x_dev_pred)
dev_scores = []
dev_pos_list = []

#for each true description vector, find the most similar 
#predicted description vectors and the corresponding images
for i in range(num_dev):
    desc_idx = list(np.argsort(dev_distances[i]))   
    desc_idx.reverse()
    dev_pos = desc_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

Development MAP@20: 0.3297697787506611
Mean index of true image 16.738
Median index of true image 5.0


In [81]:
#Method 7

x_dev_pred = reg_best.predict(combined_img_dev)
dev_distances = cosine_similarity(x_dev, x_dev_pred)
dev_scores = []
dev_pos_list = []

for i in range(num_dev):
    desc_idx = list(np.argsort(dev_distances[i]))
    desc_idx.reverse()
    dev_pos = desc_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

Development MAP@20: 0.4619524335764854
Mean index of true image 7.9425
Median index of true image 2.0


In [None]:
## other models used in experimentation

In [75]:
#Method 6 - et

x_dev_pred = et_model_flipped.predict(combined_img_dev)
dev_distances = cosine_similarity(x_dev, x_dev_pred)
dev_scores = []
dev_pos_list = []

for i in range(num_dev):
    desc_idx = list(np.argsort(dev_distances[i]))
    desc_idx.reverse()
    dev_pos = desc_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   7 out of  10 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.1s finished


Development MAP@20: 0.21955806274478873
Mean index of true image 26.71
Median index of true image 10.0


In [76]:
#Method 5 - rf

x_dev_pred = rf_model_flipped.predict(combined_img_dev)
dev_distances = cosine_similarity(x_dev, x_dev_pred)
dev_scores = []
dev_pos_list = []

for i in range(num_dev):
    desc_idx = list(np.argsort(dev_distances[i]))
    desc_idx.reverse()
    dev_pos = desc_idx.index(i)
    dev_pos_list.append(dev_pos)
    if dev_pos < 20:
        dev_scores.append(1 / (dev_pos + 1))
    else:
        dev_scores.append(0.0)

print("Development MAP@20:", np.mean(dev_scores))
print("Mean index of true image", np.mean(dev_pos_list))
print("Median index of true image", np.median(dev_pos_list))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   7 out of  10 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.1s finished


Development MAP@20: 0.21215280937887362
Mean index of true image 32.725
Median index of true image 10.0



### 5. Predict on test set and generate output .csv file


In [63]:
#Method 7
x_train_all = np.concatenate([x_train, x_dev])                             ##description vectors
img_train_all = np.concatenate([combined_img_train, combined_img_dev])     ##combined ResNet + tag vectors
reg_best.fit(img_train_all, x_train_all)

x_test_pred = reg_best.predict(combined_img_test)
test_distances = cosine_similarity(x_test, x_test_pred)
pred_rows = []

for i in range(num_test):
    test_dist_idx = list(np.argsort(test_distances[i]))
    test_dist_idx.reverse()
    top_20 = test_dist_idx[:20]
    row = ["%d.jpg" % i for i in test_dist_idx[:20]]
    pred_rows.append(" ".join(row))

with open("test_submission.csv", "w") as f:
    f.write("Descritpion_ID,Top_20_Image_IDs\n")
    for i, row in enumerate(pred_rows):
        f.write("%d.txt,%s\n" % (i, row))

print("Output written!")

Output written!


### The end!