In [15]:
!git clone https://github.com/antipainK/AGH_Neural_Networks_and_Deep_Learning

Cloning into 'AGH_Neural_Networks_and_Deep_Learning'...
remote: Enumerating objects: 74, done.[K
remote: Counting objects: 100% (74/74), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 74 (delta 22), reused 68 (delta 22), pack-reused 0[K
Unpacking objects: 100% (74/74), done.


In [6]:
import numpy as np
import matplotlib.pyplot as plt
import time
#import mkl

#mkl.set_num_threads(2)
np.random.seed(1234)

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams["figure.figsize"] = [16, 9]

### Handy utility functions

In [7]:
def zeros(*dims):
    return np.zeros(shape=tuple(dims), dtype=np.float32)

def ones(*dims):
    return np.ones(shape=tuple(dims), dtype=np.float32)

def rand(*dims):
    return np.random.rand(*dims).astype(np.float32)

def randn(*dims):
    return np.random.randn(*dims).astype(np.float32)

def sigmoid(batch, stochastic=False):
    return  1.0 / (1.0 + np.exp(-batch))

def as_matrix(vector):
    return np.reshape(vector, (-1, 1))

# Word2Vec

In [16]:
import pickle
import gzip

with gzip.open("AGH_Neural_Networks_and_Deep_Learning/lab13/text8.dat.gz", "rb") as f:
    train_dict, train_set, train_tokens = pickle.load(f)

train_set = np.random.permutation(train_set)

In [10]:
from collections import namedtuple
Config = namedtuple("Config", ["dict_size", "vect_size", "neg_samples", "updates", "learning_rate", 
                               "learning_rate_decay", "decay_period", "log_period"])
conf = Config(
    dict_size=len(train_dict),
    vect_size=100,
    neg_samples=10,
    updates=5000000,
    learning_rate=0.1,
    learning_rate_decay=0.995,
    decay_period=10000,
    log_period=10000)

## Negative Sampling

In order to train distributed word representations, first:
 - calculate gradient of the cost function with respect to the `word_vector` and store in `word_grad`.
 - calculate gradient of the cost function with respect to the `context_vect` and store in `context_grad`.
 - calculate gradient of the cost function with respect to the sampled `negative_vects` and store in `neg_context_grad`.

In [29]:
def neg_sample(conf, train_set, train_tokens):
    Vp = randn(conf.dict_size, conf.vect_size)
    Vo = randn(conf.dict_size, conf.vect_size)

    J = 0.0
    learning_rate = conf.learning_rate
    for i in range(conf.updates):
        idx = i % len(train_set)

        word    = train_set[idx, 0]
        context = train_set[idx, 1]
        
        neg_context = np.random.randint(0, len(train_tokens), conf.neg_samples)
        neg_context = train_tokens[neg_context]

        word_vect = Vp[word, :]              # word vector
        context_vect = Vo[context, :];       # context wector
        negative_vects = Vo[neg_context, :]  # sampled negative vectors

        # Cost and gradient calculation starts here
        score_pos = word_vect @ context_vect.T
        score_neg = word_vect @ negative_vects.T

        J -= np.log(sigmoid(score_pos)) + np.sum(np.log(sigmoid(-score_neg)))
        if (i + 1) % conf.log_period == 0:
            print('Update {0}\tcost: {1:>2.2f}'.format(i + 1, J / conf.log_period))
            final_cost = J / conf.log_period
            J = 0.0

        pos_g = 1.0 - sigmoid(score_pos)
        neg_g = sigmoid(score_neg) # Modified

        # Note: use pos_g and neg_g will simplify gradient calculation.
        # Hint: Calculate derivatives for the cost function uwith respect to `word_vector`, 'context_vect` and `negative_vects`.

        word_grad = -pos_g * context_vect + np.sum(as_matrix(neg_g) * negative_vects, axis=0) # Modified
        context_grad = -pos_g * word_vect # Modified
        neg_context_grad = as_matrix(neg_g) @ as_matrix(word_vect).T # Modified

        Vp[word, :] -= learning_rate * word_grad
        Vo[context, :] -= learning_rate * context_grad
        Vo[neg_context, :] -= learning_rate * neg_context_grad

        if i % conf.decay_period == 0:
            learning_rate = learning_rate * conf.learning_rate_decay

    return Vp, Vo, J # Modified


In [30]:
Vp, Vo, J = neg_sample(conf, train_set, train_tokens)

Update 10000	cost: 36.18
Update 20000	cost: 28.30
Update 30000	cost: 23.38
Update 40000	cost: 19.58
Update 50000	cost: 17.13
Update 60000	cost: 15.47
Update 70000	cost: 13.91
Update 80000	cost: 12.76
Update 90000	cost: 12.23
Update 100000	cost: 11.55
Update 110000	cost: 10.92
Update 120000	cost: 10.45
Update 130000	cost: 10.14
Update 140000	cost: 9.68
Update 150000	cost: 9.29
Update 160000	cost: 9.03
Update 170000	cost: 8.80
Update 180000	cost: 8.48
Update 190000	cost: 8.33
Update 200000	cost: 8.23
Update 210000	cost: 8.06
Update 220000	cost: 7.92
Update 230000	cost: 7.71
Update 240000	cost: 7.62
Update 250000	cost: 7.39
Update 260000	cost: 7.38
Update 270000	cost: 7.25
Update 280000	cost: 7.12
Update 290000	cost: 6.97
Update 300000	cost: 6.89
Update 310000	cost: 6.92
Update 320000	cost: 6.76
Update 330000	cost: 6.67
Update 340000	cost: 6.59
Update 350000	cost: 6.55
Update 360000	cost: 6.41
Update 370000	cost: 6.44
Update 380000	cost: 6.35
Update 390000	cost: 6.35
Update 400000	cost: 6

### Word similarity

In [31]:
def lookup_word_idx(word, word_dict):
    try:
        return np.argwhere(np.array(word_dict) == word)[0][0]
    except:
        raise Exception("No such word in dict: {}".format(word))

def similar_words(embeddings, word, word_dict, hits):
    word_idx = lookup_word_idx(word, word_dict)
    similarity_scores = embeddings @ embeddings[word_idx]
    similar_word_idxs = np.argsort(-similarity_scores)    
    return [word_dict[i] for i in similar_word_idxs[:hits]]

In [32]:
print('\n\nTraining cost: {0:>2.2f}\n\n'.format(J))

Vp_norm = Vp / as_matrix(np.linalg.norm(Vp , axis=1))
for w in ['zero', 'computer', 'cars', 'home', 'album']:
    similar = similar_words(Vp_norm, w, train_dict, 5)
    print('Words similar to {}: {}'.format(w, ", ".join(similar)))



Training cost: 0.00


Words similar to zero: zero, four, five, three, eight
Words similar to computer: computer, software, technology, digital, design
Words similar to cars: cars, line, speed, range, light
Words similar to home: home, moved, park, city, street
Words similar to album: album, songs, band, music, released
