## Word2Vec unimodal classification demo

1. Vectorize words from captions

2. Calculate vectors average

3. Find most similar word to the average vector

3. Use one or two distinct classifiers to perform the classification

4. Compute the accuracy of classification with the selected classifiers (LogReg, SVC)

In [1]:
import pandas as pd

In [2]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
df = pd.read_json("data/COCO/coco-easier.txt", 
                  lines=True)
df = df.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,all_categories,captions,category,file_name,img_id
0,"[bus, person, person, person, person, car, car...",[A city street show cars going up and down the...,traffic light,000000350023.jpg,350023
1,"[person, person, person, person, person, perso...",[Woman in wet suit considers waves near forest...,surfboard,000000059635.jpg,59635
2,"[person, giraffe, person, person]","[A giraffe stands alone outdoors at the zoo., ...",giraffe,000000471567.jpg,471567
3,"[bottle, refrigerator, oven, spoon, spoon, bot...","[An oven with a stove, pots and utensils, and ...",clock,000000384661.jpg,384661
4,"[car, car, car, car, car, car, traffic light, ...",[A post office box is on the side of a busy st...,traffic light,000000496722.jpg,496722
5,"[toilet, toilet, toilet, toilet, toilet, toile...",[A row of white toilets sitting on top of a di...,toilet,000000458054.jpg,458054
6,"[dog, frisbee]",[A dog that is jumping in the air looking at a...,dog,000000575357.jpg,575357
7,[toilet],[A table with a plate of food and a urinal bas...,toilet,000000325114.jpg,325114
8,"[bottle, bottle, bottle, bottle, bottle, bottl...",[A woman holding a small cake with lit candles...,clock,000000213547.jpg,213547
9,"[person, surfboard]","[A man in shorts surfing in the ocean, A man i...",surfboard,000000364126.jpg,364126


In [4]:
# connect all text to build a doc2vec vocabulary
df["captions"] = df["captions"].apply(lambda sents: " ".join(sents))
df["captions"][0]

'A city street show cars going up and down the hill. A hill of a street at a stoplight with traffic  A street that has four lanes of traffic. a long road with cars on both sides of it  A busy intersection in a city features several cars and buildings.'

In [5]:
from gensim.utils import simple_preprocess
df["words"] = df.apply(lambda row: simple_preprocess(row["captions"]), axis=1)
df["words"][0]

2018-05-21 18:42:06,525 : INFO : 'pattern' package not found; tag filters are not available for English


['city',
 'street',
 'show',
 'cars',
 'going',
 'up',
 'and',
 'down',
 'the',
 'hill',
 'hill',
 'of',
 'street',
 'at',
 'stoplight',
 'with',
 'traffic',
 'street',
 'that',
 'has',
 'four',
 'lanes',
 'of',
 'traffic',
 'long',
 'road',
 'with',
 'cars',
 'on',
 'both',
 'sides',
 'of',
 'it',
 'busy',
 'intersection',
 'in',
 'city',
 'features',
 'several',
 'cars',
 'and',
 'buildings']

In [6]:
from gensim.models import KeyedVectors
word2vec = KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin', binary=True, limit=200000)

2018-05-21 18:42:06,705 : INFO : loading projection weights from ./data/GoogleNews-vectors-negative300.bin
2018-05-21 18:42:09,911 : INFO : loaded (200000, 300) matrix from ./data/GoogleNews-vectors-negative300.bin


In [7]:
def get_vectors(words):
    return list(map(lambda word: word2vec[word], filter(lambda word: word in word2vec, words)))

In [8]:
df["vectors"] = df.apply(lambda row: get_vectors(row["words"]), axis=1)

In [9]:
import numpy as np
df["vector_avg"] = df.apply(lambda row: np.divide(np.sum(row["vectors"], axis = 0), len(row["vectors"])), axis=1)
df[:5]

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg
0,"[bus, person, person, person, person, car, car...",A city street show cars going up and down the ...,traffic light,000000350023.jpg,350023,"[city, street, show, cars, going, up, and, dow...","[[-0.010070801, 0.057373047, 0.18359375, -0.04...","[0.07995111, 0.07051823, 0.06434508, 0.0843439..."
1,"[person, person, person, person, person, perso...",Woman in wet suit considers waves near foreste...,surfboard,000000059635.jpg,59635,"[woman, in, wet, suit, considers, waves, near,...","[[0.24316406, -0.07714844, -0.103027344, -0.10...","[0.04933966, 0.012969971, -0.029572986, 0.0616..."
2,"[person, giraffe, person, person]",A giraffe stands alone outdoors at the zoo. A ...,giraffe,000000471567.jpg,471567,"[giraffe, stands, alone, outdoors, at, the, zo...","[[-0.055908203, 0.33007812, -0.39648438, 0.017...","[0.032204077, 0.088930055, -0.049434863, 0.037..."
3,"[bottle, refrigerator, oven, spoon, spoon, bot...","An oven with a stove, pots and utensils, and a...",clock,000000384661.jpg,384661,"[an, oven, with, stove, pots, and, utensils, a...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[-0.02268982, 0.087446496, 0.03933277, 0.06695..."
4,"[car, car, car, car, car, car, traffic light, ...",A post office box is on the side of a busy str...,traffic light,000000496722.jpg,496722,"[post, office, box, is, on, the, side, of, bus...","[[-0.021484375, -0.0037994385, -0.114746094, -...","[0.04842315, 0.0264707, -0.008286966, 0.109912..."


In [10]:
# Model separates words with underscore

categories = list(map(lambda cat: cat.replace(" ", "_"), df["category"].unique()))
categories = list(map(lambda cat: cat if cat != 'traffic_light' else 'traffic', categories))
categories

['traffic',
 'surfboard',
 'giraffe',
 'clock',
 'toilet',
 'dog',
 'pizza',
 'cat',
 'tennis_racket',
 'skis']

In [11]:
# 1. Google's model does not contain phrase traffic light
# 2. Model separates words with underscore
# Tested accuracy was 0.72
def get_prediction_by_most_similar_word(vector):
    similar_word_tuples = word2vec.similar_by_vector(vector)
    prediction = word2vec.most_similar_to_given(similar_word_tuples[0][0], categories)
    if (prediction == "traffic"):
        prediction = "traffic light"
    if (prediction == "tennis_racket"):
        prediction = "tennis racket"
    return prediction

In [12]:
from scipy.spatial.distance import cosine
def get_prediction_by_cosine_sim(vector):
    prediction = categories[0]
    max_sim = 1;
    for cat in categories:
        similarity = cosine(vector, word2vec[cat])
        similarity
        if similarity < max_sim:
            prediction = cat
            max_sim = similarity
    if (prediction == "traffic"):
        prediction = "traffic light"
    if (prediction == "tennis_racket"):
        prediction = "tennis racket"
    return prediction

In [13]:
df["similar"] = df.apply(lambda row: get_prediction_by_cosine_sim(row["vector_avg"]), axis=1)
df[:5]

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar
0,"[bus, person, person, person, person, car, car...",A city street show cars going up and down the ...,traffic light,000000350023.jpg,350023,"[city, street, show, cars, going, up, and, dow...","[[-0.010070801, 0.057373047, 0.18359375, -0.04...","[0.07995111, 0.07051823, 0.06434508, 0.0843439...",traffic light
1,"[person, person, person, person, person, perso...",Woman in wet suit considers waves near foreste...,surfboard,000000059635.jpg,59635,"[woman, in, wet, suit, considers, waves, near,...","[[0.24316406, -0.07714844, -0.103027344, -0.10...","[0.04933966, 0.012969971, -0.029572986, 0.0616...",surfboard
2,"[person, giraffe, person, person]",A giraffe stands alone outdoors at the zoo. A ...,giraffe,000000471567.jpg,471567,"[giraffe, stands, alone, outdoors, at, the, zo...","[[-0.055908203, 0.33007812, -0.39648438, 0.017...","[0.032204077, 0.088930055, -0.049434863, 0.037...",giraffe
3,"[bottle, refrigerator, oven, spoon, spoon, bot...","An oven with a stove, pots and utensils, and a...",clock,000000384661.jpg,384661,"[an, oven, with, stove, pots, and, utensils, a...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[-0.02268982, 0.087446496, 0.03933277, 0.06695...",toilet
4,"[car, car, car, car, car, car, traffic light, ...",A post office box is on the side of a busy str...,traffic light,000000496722.jpg,496722,"[post, office, box, is, on, the, side, of, bus...","[[-0.021484375, -0.0037994385, -0.114746094, -...","[0.04842315, 0.0264707, -0.008286966, 0.109912...",traffic light


## Classification and testing

#### Cosine similarity of category and vector_avg

In [14]:
from sklearn.metrics import accuracy_score

In [15]:
score = accuracy_score(df["category"], df["similar"])
print("Accuracy: %0.2f" % (score))

Accuracy: 0.91


#### Logistic regression

In [16]:
from sklearn.model_selection import cross_val_score

In [17]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [18]:
# Returns the mean accuracy on the given test data and labels, in 5 cross validation splits
scores = cross_val_score(classifier, 
                         pd.DataFrame(df["vector_avg"].tolist()), 
                         df["category"].values, 
                         cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.95 (+/- 0.01)


#### Linear SVC

In [19]:
from sklearn.svm import LinearSVC
classifier = LinearSVC()

In [20]:
# Returns the mean accuracy on the given test data and labels, in 5 cross validation splits
scores = cross_val_score(classifier, 
                         pd.DataFrame(df["vector_avg"].tolist()), 
                         df["category"].values, 
                         cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.96 (+/- 0.02)


In [21]:
from sklearn.model_selection import cross_val_predict

df["predictions"] = cross_val_predict(classifier, 
                                      pd.DataFrame(df["vector_avg"].tolist()), 
                                      df["category"].values, 
                                      cv=5)
df[:5]

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar,predictions
0,"[bus, person, person, person, person, car, car...",A city street show cars going up and down the ...,traffic light,000000350023.jpg,350023,"[city, street, show, cars, going, up, and, dow...","[[-0.010070801, 0.057373047, 0.18359375, -0.04...","[0.07995111, 0.07051823, 0.06434508, 0.0843439...",traffic light,traffic light
1,"[person, person, person, person, person, perso...",Woman in wet suit considers waves near foreste...,surfboard,000000059635.jpg,59635,"[woman, in, wet, suit, considers, waves, near,...","[[0.24316406, -0.07714844, -0.103027344, -0.10...","[0.04933966, 0.012969971, -0.029572986, 0.0616...",surfboard,surfboard
2,"[person, giraffe, person, person]",A giraffe stands alone outdoors at the zoo. A ...,giraffe,000000471567.jpg,471567,"[giraffe, stands, alone, outdoors, at, the, zo...","[[-0.055908203, 0.33007812, -0.39648438, 0.017...","[0.032204077, 0.088930055, -0.049434863, 0.037...",giraffe,giraffe
3,"[bottle, refrigerator, oven, spoon, spoon, bot...","An oven with a stove, pots and utensils, and a...",clock,000000384661.jpg,384661,"[an, oven, with, stove, pots, and, utensils, a...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[-0.02268982, 0.087446496, 0.03933277, 0.06695...",toilet,clock
4,"[car, car, car, car, car, car, traffic light, ...",A post office box is on the side of a busy str...,traffic light,000000496722.jpg,496722,"[post, office, box, is, on, the, side, of, bus...","[[-0.021484375, -0.0037994385, -0.114746094, -...","[0.04842315, 0.0264707, -0.008286966, 0.109912...",traffic light,traffic light


## Misclassified

In [22]:
k1 = df.loc[(df.category != df.predictions)]
k1 = k1.sample(frac=1).reset_index(drop=True)
k1

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar,predictions
0,"[dog, dog, chair, person, bed, book, book, boo...",A man sitting at a computer desk in front of a...,dog,000000366884.jpg,366884,"[man, sitting, at, computer, desk, in, front, ...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.06266062, 0.044794988, 0.0012399774, 0.0781...",toilet,clock
1,"[chair, person, wine glass, fork, knife, bowl,...",An individual is capture in the stillness of t...,pizza,000000425361.jpg,425361,"[an, individual, is, capture, in, the, stillne...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[0.01859116, 0.051346943, -0.02416192, 0.06112...",pizza,clock
2,"[cat, cat, person, bottle, cat, fire hydrant, ...",A woman in yellow shirt and skirt with cats in...,cat,000000375493.jpg,375493,"[woman, in, yellow, shirt, and, skirt, with, c...","[[0.24316406, -0.07714844, -0.103027344, -0.10...","[0.07454224, 0.03916931, 0.02020874, 0.0478256...",dog,traffic light
3,"[car, car, car, person, car, toilet, car]",A young attractive woman sitting on a toilette...,toilet,000000365642.jpg,365642,"[young, attractive, woman, sitting, on, toilet...","[[0.09472656, 0.328125, -0.048583984, -0.00665...","[0.09142665, 0.007317794, -0.00028429533, 0.10...",toilet,traffic light
4,"[dog, boat, boat, person, person, person, pers...",Sail boats sail over a body of water while peo...,dog,000000078565.jpg,78565,"[sail, boats, sail, over, body, of, water, whi...","[[0.22363281, 0.43359375, -0.20117188, -0.0125...","[0.09650697, 0.10090093, -0.051098112, 0.06210...",surfboard,surfboard
5,"[bed, bed, person, toilet, backpack, bottle, b...",A young woman using a laptop computer while si...,toilet,000000393569.jpg,393569,"[young, woman, using, laptop, computer, while,...","[[0.09472656, 0.328125, -0.048583984, -0.00665...","[0.06295634, -0.0036450764, 0.04265364, 0.0957...",toilet,clock
6,"[clock, person, hot dog, donut, donut, donut, ...",This a case full of doughnuts and cinnamon bun...,clock,000000405195.jpg,405195,"[this, case, full, of, doughnuts, and, cinnamo...","[[0.109375, 0.140625, -0.03173828, 0.16601562,...","[-0.00797035, 0.025007578, -0.049233273, 0.186...",pizza,pizza
7,"[motorcycle, person, person, person, person, p...",A street scene with a person on a motorcycle. ...,dog,000000139099.jpg,139099,"[street, scene, with, person, on, motorcycle, ...","[[0.09814453, 0.075683594, 0.030883789, 0.1787...","[0.08060455, -0.013256497, -0.0058034263, 0.10...",dog,traffic light
8,"[cell phone, car, car, car, person, person, pe...",A man is taking a photo of a city with his pho...,traffic light,000000545100.jpg,545100,"[man, is, taking, photo, of, city, with, his, ...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.053505316, 0.078639336, 0.008547899, -0.021...",dog,clock
9,"[umbrella, tie, person, person, person, person...",A newly married couple walking down a street. ...,traffic light,000000496854.jpg,496854,"[newly, married, couple, walking, down, street...","[[-0.044189453, -0.123046875, 0.12597656, -0.1...","[0.040103912, -0.025392914, -0.01187439, 0.012...",dog,clock


In [25]:
k1["captions"][2]

'A woman in yellow shirt and skirt with cats in grass. a person standing next to a yellow fire hydrant A woman wearing a jean skirt standing next to a fire hydrant. a woman standing next to a yellow fire hydrant A woman wearing a yellow top and a jean skirt standing next to a yellow fire hydrant. '