In [18]:
#imports
from pathlib import Path
import json
from collections import defaultdict
import numpy as np
import time
import gensim
from gensim.models.keyedvectors import KeyedVectors
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
%matplotlib inline
import re, string

In [9]:
# load COCO metadata
filename = "data/captions_train2014.json"
with Path(filename).open() as f:
    coco_data = json.load(f)

In [10]:
class coco_values:
    def __init__(self, coco_data):
        #make variables for images and captions
        self.coco_annotations = coco_data['annotations']
        self.coco_images = coco_data['images']
        self.coco_img_ids = [i['id'] for i in self.coco_images]
        self.coco_cap_ids = [i['id'] for i in self.coco_annotations]
    def get_captions(self, image_id):
            #Find the caption IDs by returning all caption IDs associated with an image ID
        return [i['id'] for i in self.coco_annotations if i['image_id'] == image_id]
    def get_image_from_cap(self, caption_id):
        #Find the image ID by seeing if any image had a caption ID match
        return [i['image_id'] for i in self.coco_annotations if i['id'] == caption_id][0]
    def get_caption_text(self, caption_id):
        #Take the text from a given caption ID
        return [i['caption'] for i in self.coco_annotations if i['id'] == caption_id][0]

In [11]:
x = coco_values(coco_data)

In [12]:
x.get_captions(318556)

[48, 126, 219, 255, 3555]

In [13]:
x.get_image_from_cap(48)

318556

In [14]:
x.get_caption_text(48)

'A very clean and well decorated empty bathroom'

In [16]:
#load glove dataset
path = r"data/glove.6B.200d.txt.w2v"
t0 = time.time()
glove = KeyedVectors.load_word2vec_format(path, binary=False)
t1 = time.time()
print("elapsed %ss" % (t1 - t0))

elapsed 97.68171954154968s


In [32]:
def string2vectors(given_str,IDF=None):
    if IDF is None:
        print("No IDF given, loading GloVe-200d")
        path = r"data/glove.6B.200d.txt.w2v"
        t0 = time.time()
        IDF = KeyedVectors.load_word2vec_format(path, binary=False)
        t1 = time.time()
        print("GloVe loaded in: %ss" % (t1-t0))
    punc_regex = re.compile('[{}]'.format(re.escape(string.punctuation)))
    given_str = punc_regex.sub('', given_str)
    str_list = given_str.lower().split()
    return [IDF[word] if word in IDF else np.zeroes((200,)) for word in str_list]

In [33]:
#string2vectors("My dog loves me",glove)

In [35]:
class ImageToWord():
    """Implements a simple-cell RNN that produces both outputs and hidden descriptors."""
    def __init__(self, dim_input, dim_recurrent, dim_output):
        """ Initializes all layers needed for RNN
        
        Parameters
        ----------
        dim_input: int 
            Dimensionality of data passed to RNN (C)
        
        dim_recurrent: int
            Dimensionality of hidden state in RNN (D)
        
        dim_output: int
            Dimensionality of output of RNN (K)
        """
        # Initialize one dense layer for each matrix multiplication that appears
        # in the simple-cell RNN equation; name these "layers" in ways that make
        # their correspondence to the equation obvious

        self.fc_x2h = dense(dim_input, dim_recurrent, weight_initializer=glorot_normal)
        self.fc_h2h = dense(dim_recurrent, dim_recurrent, weight_initializer=glorot_normal, bias=False)
        self.fc_h2y = dense(dim_recurrent, dim_output, weight_initializer=glorot_normal)

    
    
    def __call__(self, x, h=None):
        """ Performs the full forward pass for the RNN.
        
        Note that we will return the hidden states h_t and classification scores y_t for the
        full sequence, even though our loss will only utilize the last y_T.
        
        Parameters
        ----------
        x: Union[numpy.ndarray, mygrad.Tensor], shape=(T, C)
            The one-hot encodings for the sequence
        
        h: Optional[Union[numpy.ndarray, mygrad.Tensor]], shape=(1, D)
            An optional initial hidden dimension state h_0.
            If None, initialize an array of zeros.
        
        Returns
        -------
        Tuple[y, h]
            y: mygrad.Tensor, shape=(T, K)
                The final classification scores for each RNN step
            h: mygrad.Tensor, shape=(T, D)
                The hidden states computed at each RNN step, excluding the initial state h_0
        """
        # Initialize the hidden state h_{t=0} as zeros if an
        # initial hidden state is not provided as an argument.
        #
        # You will want to loop over each x_t to compute the
        # corresponding h_t, then store each h_t in a list.
        # You do not want to store the initial state h_{t=0}.
        #
        # You can use `mg.concatenate(list_of_h, axis=0)` to
        # create a shape-(T, K) tensor of hidden-descriptors.
        #
        # A standard for-loop is appropriate here. Be mindful of what the shape 
        # of x_t should be versus the shape of the item that it produced by the
        # for-loop.
        #
        # Note that you can do a for-loop over a mygrad-tensor and it will
        # produce sub-tensors that are tracked by the computational graph.
        # I.e. mygrad will be able to still "backprop" through your for-loop!
        
        # STUDENT CODE: 
        
        h_t = np.zeros((1,self.fc_h2h.weight.shape[0]), dtype = np.float32)
        h = [] # we do not need to store the initial state, as we do not return it/use it to compute y
        
        for x_t in x:
            # `x_t[np.newaxis]` simply reshapes `x_t`: (C,) -> (1, C)
            #
            # h_t: shape-(1, D) hidden descriptor
            h_t = relu(self.fc_x2h(x_t[np.newaxis]) + self.fc_h2h(h_t))
            h.append(h_t)

            
        
        # shape-(T, D) collection of T descriptors (each shape-(D,))
        all_h = mg.concatenate(h, axis=0)
        
        
        # `all_y` is:
        # a shape-(T, K) collection of T "prediction scores", one produced
        # in association with each of the T hidden descriptors.
        #
        # We will only be making use of `all_y[-1:]` for our prediction
        # in our notebook; this is the shape-(1, K) vector associated with y_T
        all_y = self.fc_h2y(all_h)
        return all_y,all_h
        
        
    
    
    @property
    def parameters(self):
        """ A convenience function for getting all the parameters of our model.
        
        This can be accessed as an attribute, via `model.parameters` 
        
        Returns
        -------
        Tuple[Tensor, ...]
            A tuple containing all of the learnable parameters for our model
        """
        return self.fc_x2h.parameters + self.fc_h2h.parameters + self.fc_h2y.parameters