In [38]:
import gensim
from gensim.models import Word2Vec

In [39]:
import pickle
import argparse
import nltk
import pandas as pd
from collections import Counter
from pycocotools.coco import COCO

## Get captions in training dataset

In [40]:
coco = COCO("./data/annotations/captions_train2014.json")
captions = pd.DataFrame.from_dict(coco.anns)

loading annotations into memory...
Done (t=0.60s)
creating index...
index created!


In [41]:
captions=captions.T
captions.head()

Unnamed: 0,image_id,id,caption
48,318556,48,A very clean and well decorated empty bathroom
67,116100,67,A panoramic view of a kitchen and all of its a...
126,318556,126,A blue and white bathroom with butterfly theme...
148,116100,148,A panoramic photo of a kitchen and dining room
173,379340,173,A graffiti-ed stop sign across the street from...


In [42]:
ids=pd.read_csv("TrainImageIds.csv")
ids=[int(x) for x in ids.columns]

In [43]:
filt_captions=captions[captions["image_id"].isin(ids)]
filt_captions.head()

Unnamed: 0,image_id,id,caption
67,116100,67,A panoramic view of a kitchen and all of its a...
148,116100,148,A panoramic photo of a kitchen and dining room
653,18691,653,A few people sit on a dim transportation system.
668,285579,668,"A person, protected from the rain by their umb..."
818,539984,818,A brown horse is grazing grass near a red house.


## Get word2vec embeddings

In [44]:
sentences=[]

for x in filt_captions["caption"]:
    tokens=nltk.tokenize.word_tokenize(x.lower())
    sentences.append(tokens)

In [45]:
model = Word2Vec(sentences,size=256, min_count=1)

In [46]:
print(model)

Word2Vec(vocab=12567, size=256, alpha=0.025)


In [47]:
model["orchestrating"]

  """Entry point for launching an IPython kernel.


array([ 5.44866640e-03,  7.78626278e-03,  1.69322584e-02,  5.08261612e-03,
       -4.14583803e-04, -6.74990891e-03,  8.31173919e-03,  5.98568656e-03,
       -4.33492288e-03, -4.20297758e-04, -4.04012203e-03,  8.32135603e-03,
       -1.64447178e-03, -8.09848367e-04,  3.32372612e-03,  8.75216909e-03,
       -5.97423036e-03, -5.04401838e-03,  1.14105467e-03, -5.67083573e-03,
       -1.47392938e-03, -3.28260288e-03, -1.39608672e-02, -6.47407817e-03,
        9.09896742e-04, -3.85798700e-03,  4.13584197e-03,  9.27154068e-03,
        4.05295752e-03,  2.44149669e-05,  5.80700627e-03,  2.80695432e-03,
       -1.43665932e-02,  6.88454229e-03, -6.38931617e-03,  1.02491165e-03,
        8.35133810e-03,  6.71062199e-03,  4.19907941e-04, -5.26678190e-03,
       -5.84268104e-03, -4.58228821e-03,  1.22952973e-02,  2.94932444e-03,
        4.17447789e-03,  4.09262581e-03, -1.14655653e-02,  8.16832544e-05,
       -1.39878772e-03, -9.12812073e-04,  5.61878970e-03,  3.56877781e-03,
       -3.78139084e-03, -

In [48]:
model.wv.save_word2vec_format('word2vec.model.bin', binary=True)

In [36]:
model = gensim.models.KeyedVectors.load_word2vec_format('word2vec.model.bin', binary=True)

In [37]:
model['orchestrating']

array([ 0.01205461, -0.01833498,  0.02455851,  0.00092925, -0.00562304,
        0.00043357,  0.01463171,  0.01850286, -0.01136436,  0.01135348,
       -0.01337052,  0.01292691,  0.00510718,  0.00223395, -0.01884581,
        0.00152092,  0.00195047, -0.01488339,  0.02079125,  0.01067534,
       -0.00300988, -0.00812828, -0.01823266,  0.01200487, -0.01770525,
       -0.01725552,  0.00642528,  0.01892599,  0.00740643, -0.01393753,
        0.01257138,  0.0103461 , -0.0078802 ,  0.01923333,  0.00306906,
       -0.00128885,  0.00341946,  0.01590269, -0.00492797,  0.00658   ,
       -0.01916674,  0.01166086,  0.00576859, -0.00199283,  0.00424741,
        0.0012248 , -0.00816283, -0.01444382, -0.00252305, -0.00398955,
        0.0006814 ,  0.0102491 , -0.00515129, -0.00935104,  0.01179864,
        0.00092461,  0.00629285, -0.00614708,  0.00092957,  0.01848733,
       -0.00229329, -0.01276232,  0.00312819, -0.01591261, -0.00581342,
        0.00297201,  0.00741572, -0.02746999, -0.00858064, -0.00

In [48]:
words = list(model.wv.vocab)
print(model['orchestrating'])

[-0.0198124   0.00403214  0.01863487  0.00140835  0.00629108  0.00262589
  0.00305537  0.0103415  -0.02235805  0.00371054  0.00608911 -0.016513
  0.00095832  0.00326822 -0.00800401  0.01285153  0.01773841  0.00730178
  0.00588541  0.00167226 -0.02761898  0.01106803 -0.0027543  -0.00245525
  0.00580114  0.00851934 -0.01612504  0.01859374 -0.00065698 -0.0178295
  0.01423899  0.0093177  -0.01491377 -0.01423898 -0.00945396 -0.02396948
 -0.0009068  -0.00767918 -0.00038746 -0.01618955  0.00608693  0.01190796
  0.00244327  0.02831708  0.00450109 -0.0292737  -0.00566097 -0.01755272
 -0.01685097  0.0049314   0.00041349 -0.01053353  0.01785323  0.00438937
  0.01207001  0.01006929  0.03905445  0.03000938 -0.03997581 -0.0152995
 -0.0112565   0.0047344   0.00169621  0.02271652  0.00115679 -0.00202081
  0.00741793 -0.00726812 -0.00967692 -0.00258372  0.02003296  0.00398613
  0.02779187 -0.01531116 -0.00620083  0.01189591 -0.02555749 -0.01354365
 -0.01565268 -0.01157285  0.0032087  -0.01240461 -0.010

  


In [139]:
import pickle
import random

In [64]:
file = open('./data/vocab.pkl', 'rb')

In [65]:
data = pickle.load(file)

In [190]:
im=Image.open("./data/images/train/COCO_train2014_000000000077.jpg")

In [191]:
w,h=im.size

In [192]:
w,h

(500, 375)

In [193]:
d1,d2=[256,256]

In [197]:
left=random.randint(0,w-d1)
top=random.randint(0,h-d2)
right=left+d1
bottom=top+d2

In [198]:
im1 = im.crop((left,top,right,bottom))

In [199]:
im1.save("test.jpg")

In [137]:
im1 = im.crop((left,top,right,bottom))

In [138]:
im1.save("test.jpg")