In [1]:
#imports
from pathlib import Path
import json
from collections import defaultdict
import numpy as np
import time
import gensim
from gensim.models.keyedvectors import KeyedVectors
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
%matplotlib inline
import re, string
import pickle

In [5]:
# load COCO metadata
filename = "data/captions_train2014.json"
with Path(filename).open() as f:
    coco_data = json.load(f)

In [2]:
with Path('data/resnet18_features.pkl').open('rb') as f:
    resnet18_features = pickle.load(f)

In [20]:
class coco_values:
    def __init__(self, coco_data):
        #make variables for images and captions
        self.coco_annotations = coco_data['annotations']
        self.coco_images = coco_data['images']
        self.coco_img_ids = [i['id'] for i in self.coco_images if i['id'] in resnet18_features.keys()]
        self.coco_cap_ids = [i['id'] for i in self.coco_annotations if i['image_id'] in resnet18_features.keys()]
    def get_captions(self, image_id):
            #Find the caption IDs by returning all caption IDs associated with an image ID
        return [i['id'] for i in self.coco_annotations if i['image_id'] == image_id]
    def get_image_from_cap(self, caption_id):
        #Find the image ID by seeing if any image had a caption ID match
        return [i['image_id'] for i in self.coco_annotations if i['id'] == caption_id][0]
    def get_caption_text(self, caption_id):
        #Take the text from a given caption ID
        return [i['caption'] for i in self.coco_annotations if i['id'] == caption_id][0]
    def get_url(self, image_id):
        return [i['coco_url'] for i in self.coco_images if i['id'] == image_id]

In [21]:
x = coco_values(coco_data)

In [17]:
x.get_captions(318556)

[48, 126, 219, 255, 3555]

In [18]:
x.get_image_from_cap(48)

318556

In [19]:
x.get_caption_text(48)

'A very clean and well decorated empty bathroom'

In [16]:
#load glove dataset
path = r"data/glove.6B.200d.txt.w2v"
t0 = time.time()
glove = KeyedVectors.load_word2vec_format(path, binary=False)
t1 = time.time()
print("elapsed %ss" % (t1 - t0))

elapsed 97.68171954154968s


In [32]:
def string2vectors(given_str,IDF=None):
    if IDF is None:
        print("No IDF given, loading GloVe-200d")
        path = r"data/glove.6B.200d.txt.w2v"
        t0 = time.time()
        IDF = KeyedVectors.load_word2vec_format(path, binary=False)
        t1 = time.time()
        print("GloVe loaded in: %ss" % (t1-t0))
    punc_regex = re.compile('[{}]'.format(re.escape(string.punctuation)))
    given_str = punc_regex.sub('', given_str)
    str_list = given_str.lower().split()
    return [IDF[word] if word in IDF else np.zeroes((200,)) for word in str_list]

In [33]:
#string2vectors("My dog loves me",glove)

In [36]:
class ImageToWord:
    def __init__(self, context_words, d):
        """ Initializes all of the encoder and decoder layers in our model, setting them
        as attributes of the model.
        
        Parameters
        ----------
        context_words : int
            The number of context words included in our vocabulary
            
        d : int
            The dimensionality of our word embeddings
        """
        
        # STUDENT CODE:
        self.encode = dense(context_words, d, weight_initializer=glorot_normal, bias=False)
        self.decode = dense(d, context_words, weight_initializer=glorot_normal, bias=False)
        
    
    
    def __call__(self, x):
        ''' Passes data as input to our model, performing a "forward-pass".
        
        This allows us to conveniently initialize a model `m` and then send data through it
        to be classified by calling `m(x)`.
        
        Parameters
        ----------
        x : Union[numpy.ndarray, mygrad.Tensor], shape=(M, context_words)
            A batch of data consisting of M words from the context matrix,
                each tracking the number of co-occurences with `context_words` words.
                
        Returns
        -------
        mygrad.Tensor, shape=(M, context_words)
            The result of passing the data through borth the encoder and decoder.
        '''
        
        # STUDENT CODE:
        return self.decode(self.encode(x))
    
    
    @property
    def parameters(self):
        """ A convenience function for getting all the parameters of our model.
        
        This can be accessed as an attribute, via `model.parameters` 
        
        Returns
        -------
        Tuple[Tensor, ...]
            A tuple containing all of the learnable parameters for our model"""
        
        # STUDENT CODE:
        return self.encode.parameters + self.decode.parameters

In [24]:
def resnetID2vectors(N,resnet_model=None):
    if resnet_model is None:
        print("no resnet model given, using ResNet18")
        with Path('data/resnet18_features.pkl').open('rb') as f:
            resnet_model = pickle.load(f)
    return [resnet_model[i] for i in N if i in resnet_model.keys()]

In [27]:
#resnetID2vectors([318556])

no resnet model given, using ResNet18


[array([[6.63060725e-01, 7.25843489e-01, 2.50235319e+00, 3.05797410e+00,
         1.18076408e+00, 8.59722376e-01, 2.11275458e+00, 1.66923761e+00,
         9.30204213e-01, 2.95161271e+00, 4.11769390e-01, 2.46403694e+00,
         3.68513763e-01, 1.11124635e+00, 6.83404088e-01, 2.38099003e+00,
         3.51461768e-01, 4.53104496e-01, 6.18082881e-01, 1.47529796e-01,
         6.73661709e-01, 8.39756727e-01, 8.64176512e-01, 2.60511875e-01,
         7.59026051e-01, 2.43982124e+00, 1.52093339e+00, 6.98069096e-01,
         7.00831831e-01, 4.83107895e-01, 1.89520228e+00, 5.31131327e-01,
         5.05404234e-01, 7.72590935e-01, 1.47595334e+00, 1.46737254e+00,
         3.97098160e+00, 1.54140517e-01, 5.48306704e-01, 1.23401248e+00,
         2.30810314e-01, 1.65646225e-01, 7.39505067e-02, 6.08843267e-01,
         4.72796887e-01, 1.17470765e+00, 4.49406654e-02, 2.92875671e+00,
         2.91214973e-01, 1.36587119e+00, 9.66572523e-01, 8.86391938e-01,
         7.79555738e-01, 2.26721197e-01, 7.75084496