# Applied Machine Learning - Project 2
### Boaz Shvartzman, Ofir Ziv

In [1]:
%matplotlib notebook

In [2]:
import os
import re
import json
import datetime
import collections
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
import multiprocessing

## Part 1

    1.

In [3]:
class DatasetSplitParser(object):
    
    def __init__(self, split_path):
        with open(split_path, 'r')as f:
            file_content = f.read()

        self._ds_by_index = {}
        
        for row in file_content.split('\n')[1:-1]:
            index, ds = row.split(',')
            self._ds_by_index[int(index)] = int(ds)
            
    def get_dataset_by_index(self):
        return self._ds_by_index

    2.

In [4]:
class DatasetsHolder(object):
    
    def __init__(self, sentences_path, datasplit_parser):
        with open(sentences_path, 'r')as f:
            file_content = f.read()
        
        file_content = file_content.decode("ascii", "ignore") # Remove non-ASCII
        file_content = re.sub(r'([^\s\w]|_)+', ' ', file_content) # Remove non-alphanumeric
        file_content = file_content.lower() # Lowercase
        
        self._trainset = {}
        self._testset = {}
        
        ds_by_index = datasplit_parser.get_dataset_by_index()

        for row in file_content.split('\n')[1:-1]:
            
            index, sentence = row.split('\t')
            index, sentence = int(index), re.sub(r'\b\w{1,2}\b', '', sentence).split()
            
            if len(sentence) < 2:
                continue
                
            if ds_by_index[index] == 1:
                self._trainset[index] = sentence
                
            elif ds_by_index[index] == 2:
                self._testset[index] = sentence

    def get_trainset(self):
        return self._trainset
    
    def get_testset(self):
        return self._testset

## Part 2

    1.

In [5]:
class Hyperparameters(object):
    
    def __init__(self, window_size=1, vector_size=30, negative_words=50, iterations=2000,
                 noise_distribution='unigram', noise_dist_params={'alpha': 1.}, random_seed=1234,
                testset_measure_iterations=100):

        self.window_size = window_size
        self.vector_size = vector_size
        self.negative_words = negative_words
        self.iterations = iterations
        self.noise_distribution = noise_distribution
        self.noise_dist_params = noise_dist_params
        self.random_seed = random_seed
        self.testset_measure_iterations = testset_measure_iterations
        np.random.seed(random_seed)
        
    def __str__(self):
        return '\n'.join(['Window size: {}'.format(self.window_size),
                          'Embedding vector size: {}'.format(self.vector_size),
                          'Negative words: {}'.format(self.negative_words),
                          'Iterations: {}'.format(self.iterations),
                          'Noise distribution: {}'.format(self.noise_distribution),
                          'Random seed: {}'.format(self.random_seed),
                          'Testset measure step size: {}'.format(self.testset_measure_iterations)])

    2. + 3.

In [6]:
class ModelParameters(object):
    
    def __init__(self, hyperparameters):
        self._hyperparameters = hyperparameters
        
    def init(self, trainset):
        self._words = np.unique(sum(trainset.values(), []))
        vector_size = self._hyperparameters.vector_size
        
        def sample():
            vectors = np.random.multivariate_normal(np.zeros(vector_size), np.identity(vector_size) * 1e-2, len(self._words))
            return vectors / np.sqrt(np.sum(np.power(vectors, 2), axis=1)).reshape(-1, 1)

        self.U, self.V = sample(), sample()
        self._word_2_index = dict(zip(self._words, range(len(self._words))))
        
        return self
        
    def word2index(self, words):
        
        if isinstance(words, collections.Iterable) and not type(words) in [str, unicode, np.unicode_]:
            result = []
            for word in words:
                if word in self._word_2_index:
                    result.append(self._word_2_index[word])
            return np.array(result)
        else:
            return self._word_2_index[words]

    def index2word(self, indices):
        return self._words[indices]

    4. + 5.

In [7]:
class unigram_sampler(object):
    
    PRE_RANDOM = 10000
    
    def _init_pre_randomed(self):
        self.pre_randomed = np.random.choice(self.words, self.PRE_RANDOM, p=self.frequencies)
        self.pre_random_index = 0

    def __init__(self, trainset, alpha):
        self.words, self.frequencies = np.unique(sum(trainset.values(), []), return_counts=True)
        self.frequencies = np.power(self.frequencies, alpha).astype(np.float64)
        self.frequencies /= self.frequencies.sum()
        self._init_pre_randomed()
        
    def __call__(self, k):

        if self.PRE_RANDOM - self.pre_random_index < k:
            self._init_pre_randomed()
            
        result = self.pre_randomed[self.pre_random_index: self.pre_random_index + k]
        self.pre_random_index += k
        return result
    

def get_words_sampler(dataset, hyperparams):
    
    if hyperparams.noise_distribution == 'unigram':
        return unigram_sampler(dataset, hyperparams.noise_dist_params['alpha'])

    else:
        raise NotImplemented

    6.

$$ log \left ( \frac{1}{1 + exp(-v_c^Tu_t)} \right ) + \frac{1}{K}\sum_{j=1}^K log \left (1 - \frac{1}{1 + exp(-v_j^Tu_t)} \right ) = log \left ( \sigma(v_c^Tu_t) \right ) + \frac{1}{K}\sum_{j=1}^K log \left (1 - \sigma(v_j^Tu_t) \right )$$

In [8]:
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))

def log_prob_context_with_negatives(target_word, context_word, negative_words, model_params):
    U, V = model_params.U, model_params.V
    u_t, v_c = U[target_word].reshape(-1, 1), V[context_word].reshape(-1, 1)
    sig = np.log(sigmoid(v_c.T.dot(u_t)))
    neg = np.sum(np.log(1 - sigmoid(V[negative_words].dot(u_t)))) / len(negative_words)
    
    return (sig + neg).ravel()[0]

## Part 3

    1.

$$ \frac{\partial f}{du_t} = \left (1 - \sigma(v_c^Tu_t) \right )v_c - \frac{1}{K}\sum_{j=1}^K \sigma(v_j^Tu_t)v_j$$

$$ \frac{\partial f}{dv_c} = \left (1 - \sigma(v_c^Tu_t) \right )u_t $$

$$ \frac{\partial f}{dv_j} = - \frac{1}{K}\sigma(v_j^Tu_t) u_t $$

    2.

In [9]:
def log_prob_context_with_negatives_gradient(model_params, target_word, context_word, negative_words):
    U, V = model_params.U, model_params.V
    u_t, v_c = U[target_word].reshape(-1, 1), V[context_word].reshape(-1, 1)
    neg = 1 - sigmoid(v_c.T.dot(u_t))
    
    dFdu = neg * v_c - np.sum(sigmoid(V[negative_words].dot(u_t)) * V[negative_words], axis=0).reshape(-1, 1) / len(negative_words)
    dFdv_c = neg * u_t

    dFdv_j = -(sigmoid(V[[negative_words]].dot(u_t))).dot(u_t.T) / len(negative_words)
    
    return dFdu, dFdv_c, dFdv_j

    3.

In [10]:
class TargetContextSampler(object):
    
    PRE_RANDOM = 10000
    
    def _init_pre_randoms(self):
        self.pre_randomed_sentences = np.random.choice(range(len(self.values)), self.PRE_RANDOM, p=self.probs)
        self.pre_randomed_words = np.random.rand(self.PRE_RANDOM)
        self.pre_random_index = 0
    
    def __init__(self, dataset):
        self.values = dataset.values()
        self.probs = np.array([len(v) for v in self.values]).astype(float)
        self.probs /= self.probs.sum()
        self._init_pre_randoms()
        
    def __call__(self, window_size):
        
        if self.pre_random_index == self.PRE_RANDOM:
            self._init_pre_randoms()
            
        w = self.values[self.pre_randomed_sentences[self.pre_random_index]]
        target = int(len(w) * self.pre_randomed_words[self.pre_random_index])
        self.pre_random_index += 1

        pairs = []

        for i in range(1, window_size + 1):
            if target + i < len(w):
                pairs.append((w[target], w[target + i]))
            if target - i >= 0:
                pairs.append((w[target], w[target - i]))

        return pairs

    4.

In [11]:
def get_target_context_minibatch(sampler, minibatch_size, window_size):
    return [
        sampler(window_size) for i in range(minibatch_size)
    ]

    5.

In [12]:
def get_parameters_update(minibatch_samples, hyperparameters, model_params):

    u_gradients = {}
    c_gradients = {}

    for samples in minibatch_samples:
        target_index = model_params.word2index(samples[0][0])

        for target, context in samples:
            context_index = model_params.word2index(context)
            negative_indeices = model_params.word2index(sample_k_words(hyperparameters.negative_words))
            
            g_t, g_c, g_j = log_prob_context_with_negatives_gradient(model_params, target_index, context_index, 
                                                                     negative_indeices)
            u_gradients[target_index] = u_gradients.get(target_index, 0) + g_t
            c_gradients[context_index] = c_gradients.get(context_index, 0) + g_c
            
            for i, index in enumerate(negative_indeices):
                c_gradients[index] = c_gradients.get(index, 0) + g_j[i].reshape(-1, 1)
                
    return u_gradients, c_gradients

    6.

In [13]:
class SGDParameters(object):
    
    def __init__(self, learning_rate=1e-3, minibatch_size=50, anealing_factor=300):
        self.learning_rate = learning_rate
        self.minibatch_size = minibatch_size
        self.anealing_factor = anealing_factor
        
    def __str__(self):
        return '\n'.join(['Initial learning rate: {}'.format(self.learning_rate),
                          'Minibatch size: {}'.format(self.minibatch_size),
                          'Anealing step size: {}'.format(self.anealing_factor)])

    7.

In [14]:
# LearnParamsUsingSGD V.1
def LearnParamsUsingSGD(trainset, hyperparameters, sgd_parameters, model_parameters):
    
    U, V = model_parameters.U, model_parameters.V
    learning_rate = sgd_parameters.learning_rate
    errors = []
    sample_target_context = TargetContextSampler(trainset)
    
    for i in range(1, hyperparameters.iterations):
        minibatch = get_target_context_minibatch(sample_target_context, sgd_parameters.minibatch_size, 
                                                 hyperparameters.window_size)
        
        u_gradients, c_gradients = get_parameters_update(minibatch, hyperparameters, model_parameters, U, V)

        if i % sgd_parameters.anealing_factor == 0:
            learning_rate /= 2.0
        
        for index, gradient in u_gradients.items():
            U[index] += (learning_rate * gradient).reshape(-1)

        for index, gradient in c_gradients.items():
            V[index] += (learning_rate * gradient).reshape(-1)
            
        U /= np.sqrt(np.sum(np.power(U, 2), axis=1)).reshape(-1, 1)
        V /= np.sqrt(np.sum(np.power(V, 2), axis=1)).reshape(-1, 1)
            
    return U, V, errors

## Part 4
    1.

In [15]:
def log_prob_full(dataset, hyperparameters, model_params):
    
    def get_next_pair():
        for _, w in dataset.iteritems():
            for target in range(len(w)):
                for i in range(1, hyperparameters.window_size + 1):
                    if target + i < len(w):
                        yield w[target], w[target + i]
                    if target - i >= 0:
                        yield w[target], w[target - i]
    
    total_log_prob = 0
    num_of_pairs = 0
    
    for target_word, context_word in get_next_pair(): 
        try:
            target_index, context_index = model_params.word2index(target_word), model_params.word2index(context_word)
        except:
            continue
            
        negative_words = model_params.word2index(sample_k_words(hyperparameters.negative_words))   
        total_log_prob += log_prob_context_with_negatives(target_index, context_index, 
                                                          negative_words, model_params)
        num_of_pairs += 1
        
    return total_log_prob, num_of_pairs

    2.

In [16]:
def log_prob_minibatch(minibatch_samples, hyperparameters, model_params):
    
    log_prob = 0
    num_of_pairs = 0
    
    for samples in minibatch_samples:
        target_index = model_params.word2index(samples[0][0])
        
        for target, context in samples:
            context_index = model_params.word2index(context)
            negative_indeices = model_params.word2index(sample_k_words(hyperparameters.negative_words))
            log_prob += log_prob_context_with_negatives(target_index, context_index, 
                                                        negative_indeices, model_params)
            num_of_pairs += 1
        
    return log_prob, num_of_pairs

    3. + 4.

In [17]:
def init_live_view():
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.ion()
    fig.show()
    fig.canvas.draw()
    return fig, ax

def update_live_view(fig, ax, errors, anealing_factor, smooth_factor=30):
    ax.clear()
    ax.plot(range(len(errors)), errors, color='orange', alpha=0.5)
    ret = np.cumsum(errors, dtype=float)
    ret[smooth_factor:] = ret[smooth_factor:] - ret[:-smooth_factor]
    rolling = ret[smooth_factor - 1:] / smooth_factor
    ax.plot(range(smooth_factor, len(rolling) + smooth_factor), rolling, color='green')
    for line in range(0, len(errors), anealing_factor)[1:]:
        plt.axvline(x=line, color='red')
    fig.canvas.draw()

#LearnParamsUsingSGD V2
def LearnParamsUsingSGD(trainset, hyperparameters, sgd_parameters, model_parameters, testset=None, liveview=False):

    U, V = model_parameters.U, model_parameters.V
    learning_rate = sgd_parameters.learning_rate
    scores_sequence = []
    sample_target_context = TargetContextSampler(trainset)
    trainset_scores, testset_scores = {}, {}
    
    if liveview:
        fig, ax = init_live_view()
    
    for i in range(1, hyperparameters.iterations):

        minibatch = get_target_context_minibatch(sample_target_context, sgd_parameters.minibatch_size, 
                                                 hyperparameters.window_size)
        u_gradients, c_gradients = get_parameters_update(minibatch, hyperparameters, model_parameters)

        if i % sgd_parameters.anealing_factor == 0:
            learning_rate /= 2.0

        for index, gradient in u_gradients.items():
            U[index] += (learning_rate * gradient).reshape(-1)
            U[index] /= np.sqrt(np.sum(np.power(U[index], 2)))

        for index, gradient in c_gradients.items():
            V[index] += (learning_rate * gradient).reshape(-1)
            V[index] /= np.sqrt(np.sum(np.power(V[index], 2)))

        score_minibatch = get_target_context_minibatch(sample_target_context, sgd_parameters.minibatch_size, 
                                                       hyperparameters.window_size)
        score, samples = log_prob_minibatch(score_minibatch, hyperparameters, model_parameters)

        trainset_scores[i] = score / samples
        scores_sequence.append(score / samples)
        
        if liveview and (i % 25 == 0):
            update_live_view(fig, ax, scores_sequence, sgd_parameters.anealing_factor)
                    
        if testset is not None and i % hyperparameters.testset_measure_iterations == 0:
            score, samples = log_prob_full(testset, hyperparameters, model_parameters)
            testset_scores[i] = score / samples
            
        try:
            open('stopFile')
            return trainset_scores, testset_scores
        except IOError:
            continue
            
    
    return trainset_scores, testset_scores

## Part 5
    1.

In [18]:
def predict_context_words(input_word, hyperparams, model_params):
    target_index = model_params.word2index(input_word)
    negative_indices = model_params.word2index(sample_k_words(hyperparameters.negative_words))
    
    top_10 = np.full(10, -np.inf)
    top_10_words = np.full(10, None)
    
    for context_word in model_params._words:
        
        if context_word == input_word:
            continue
            
        try:
            context_index = model_params.word2index(context_word)
        except KeyError:
            continue
        
        log_prob = log_prob_context_with_negatives(target_index, context_index, 
                                                   negative_indices, model_params)

        smallest = np.argmin(top_10)
        if top_10[smallest] < log_prob:
            top_10[smallest] = log_prob
            top_10_words[smallest] = context_word
            
    return top_10_words[np.argsort(-top_10)]

    2.

In [19]:
def predict_input_words(context_words, hyperparams, model_params):
    
    context_indices = model_params.word2index(context_words)
    negative_indices = model_params.word2index(sample_k_words(hyperparameters.negative_words))
    
    top_10 = np.full(10, -np.inf)
    top_10_words = np.full(10, None)
    
    for target_word in model_params._words:
        
        try:
            target_index = model_params.word2index(target_word)
        except KeyError:
            continue
            
        log_prob = 0
        
        for context_index in context_indices:
            log_prob += log_prob_context_with_negatives(target_index, context_index, 
                                                           negative_indices, model_params)

        smallest = np.argmin(top_10)
        if top_10[smallest] < log_prob:
            top_10[smallest] = log_prob
            top_10_words[smallest] = target_word
            
    return top_10_words[np.argsort(-top_10)]

    3.

In [20]:
def visualize_words(words, model_params, use_context=False, ax=None):
    indices = model_params.word2index(words)
    if use_context:
        vectors = model_params.V[indices, :2]
    else:
        vectors = model_params.U[indices, :2]
    if ax is None:
        plt.figure()
        plt.scatter(vectors[:, 0], vectors[:, 1])
        for word, vector in zip(words, vectors):
            plt.text(vector[0], vector[1], word)
    else:
        ax.scatter(vectors[:, 0], vectors[:, 1])
        for word, vector in zip(words, vectors):
            ax.text(vector[0], vector[1], word)

    4.

In [21]:
def analogy_solver(word1, word2, word3, model_parameters, use_context=False):
    index1 = model_parameters.word2index(word1)
    index2 = model_parameters.word2index(word2)
    index3 = model_parameters.word2index(word3)
    
    if use_context:
        comb = model_parameters.V[index1] - model_parameters.V[index2] + model_parameters.V[index3]
        projections = model_parameters.V.dot(comb)
    else:
        comb = model_parameters.U[index1] - model_parameters.U[index2] + model_parameters.U[index3]
        projections = model_parameters.U.dot(comb)
    
    top_10 = np.argsort(-projections)[:10]
    return model_parameters.index2word(top_10), projections[top_10]

## Part 6

    1.

In [22]:
split_parser = DatasetSplitParser('datasetSplit.txt')
dataset_holder = DatasetsHolder('datasetSentences.txt', split_parser)

    2.

In [23]:
with open('part6_config.json', 'r') as config_file:
    we_config = json.load(config_file)

hyperparameters = Hyperparameters(**we_config['hyperparameters'])
sgd_parameters = SGDParameters(**we_config['SGD_parameters'])
model_parameters = ModelParameters(hyperparameters).init(dataset_holder.get_trainset())

sample_k_words = get_words_sampler(dataset_holder.get_trainset(), hyperparameters)

    3.

In [24]:
time_before = datetime.datetime.now()
trainset_scores, testset_scores = LearnParamsUsingSGD(dataset_holder.get_trainset(), hyperparameters, 
                                                      sgd_parameters, model_parameters, 
                                                      dataset_holder.get_testset(), liveview=True)
time_after = datetime.datetime.now()

<IPython.core.display.Javascript object>

    4.

In [25]:
log_likelihood, samples = log_prob_full(dataset_holder.get_trainset(), hyperparameters, model_parameters)
final_trainset_score = log_likelihood / samples
log_likelihood, samples = log_prob_full(dataset_holder.get_testset(), hyperparameters, model_parameters)
final_testset_score = log_likelihood / samples

output_file = open('we_run_sum.txt', 'w')

output_file.write('Hyperparameters\n-------------\n' + str(hyperparameters) + '\n\n')
output_file.write('SGD parameters\n------------------\n' + str(sgd_parameters) + '\n\n')
output_file.write('Mean Log likelihood\n------------------\nTrainset: {}\nTestset: {}\n\n'.format(final_trainset_score,
                                                                                              final_testset_score))
output_file.write('\nTraining time\n--------------\n{}'.format(time_after - time_before))
output_file.close()

## Part 7

In [37]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 1</span>'))

print hyperparameters
print sgd_parameters

train_x, train_y = zip(*sorted(trainset_scores.items(), key=lambda x: x[0]))
test_x, test_y = zip(*sorted(testset_scores.items(), key=lambda x: x[0]))

plt.figure()
plt.plot(train_x, train_y, color='green', label='Train log-likelihood')
plt.plot(test_x, test_y, color='red', label='Test log-likelihood')
plt.legend();

Window size: 5
Embedding vector size: 2
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Minibatch size: 50
Anealing step size: 5000


<IPython.core.display.Javascript object>

In [27]:
with open('part7_config.json', 'r') as config_file:
    we_config = json.load(config_file)

hyperparameters = Hyperparameters(**we_config['hyperparameters'])
sgd_parameters = SGDParameters(**we_config['SGD_parameters'])
model_parameters = ModelParameters(hyperparameters).init(dataset_holder.get_trainset())

In [28]:
def worker(d):
    
    hyperparameters.vector_size = d
    model_parameters = ModelParameters(hyperparameters).init(dataset_holder.get_trainset())
    time_before = datetime.datetime.now()
    trainset_scores, testset_scores = LearnParamsUsingSGD(dataset_holder.get_trainset(), hyperparameters, 
                                                          sgd_parameters, model_parameters)
    time_after = datetime.datetime.now()
    
    log_likelihood, samples = log_prob_full(dataset_holder.get_trainset(), hyperparameters, model_parameters)
    final_trainset_score = log_likelihood / samples
    log_likelihood, samples = log_prob_full(dataset_holder.get_testset(), hyperparameters, model_parameters)
    final_testset_score = log_likelihood / samples

    return time_after - time_before, final_trainset_score, final_testset_score

pool = multiprocessing.Pool(5)

dims = np.linspace(10, 300, 5, dtype=int)
dims_results = pool.map(worker, dims)

In [33]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 2</span>'))
params_string = str(hyperparameters).split('\n')
print '\n'.join([params_string[0]] + params_string[2:])
print sgd_parameters

f, axarr = plt.subplots(1, 2, figsize=(9.7, 3))

axarr[0].plot(dims.astype(str), [dims_results[i][0].total_seconds() for i in range(5)], marker='o')
axarr[0].set_title('Training time (seconds)')
axarr[0].set_xlabel('d')
axarr[0].set_xticks(dims.astype(str))

axarr[1].plot(dims.astype(str), [dims_results[i][1] for i in range(5)], marker='o', color='green', label='Train set')
axarr[1].plot(dims.astype(str), [dims_results[i][2] for i in range(5)], marker='o', color='red', label='Test set')
axarr[1].set_title('Mean Log Likelihood')
axarr[1].set_xlabel('d')
axarr[1].set_xticks(dims.astype(str))
axarr[1].legend()
plt.tight_layout(w_pad=4.0);

Window size: 5
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Minibatch size: 50
Anealing step size: 5000


<IPython.core.display.Javascript object>

In [30]:
def worker(neg_words):
    
    hyperparameters.negative_words = neg_words
    model_parameters = ModelParameters(hyperparameters).init(dataset_holder.get_trainset())
    time_before = datetime.datetime.now()
    trainset_scores, testset_scores = LearnParamsUsingSGD(dataset_holder.get_trainset(), hyperparameters, 
                                                          sgd_parameters, model_parameters)
    time_after = datetime.datetime.now()
    
    log_likelihood, samples = log_prob_full(dataset_holder.get_trainset(), hyperparameters, model_parameters)
    final_trainset_score = log_likelihood / samples
    log_likelihood, samples = log_prob_full(dataset_holder.get_testset(), hyperparameters, model_parameters)
    final_testset_score = log_likelihood / samples

    return time_after - time_before, final_trainset_score, final_testset_score

neg_words = np.linspace(2, 100, 5, dtype=int)
nw_results = pool.map(worker, neg_words)
pool.close()
pool.join()

In [32]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 3</span>'))
print hyperparameters
params_string = str(sgd_parameters).split('\n')
print '\n'.join([params_string[0]] + params_string[2:])

f, axarr = plt.subplots(1, 2, figsize=(9.7, 3))

axarr[0].plot(neg_words.astype(str), [nw_results[i][0].total_seconds() for i in range(5)], marker='o')
axarr[0].set_title('Training time (seconds)')
axarr[0].set_xlabel('Negative words')
axarr[0].set_xticks(neg_words.astype(str))

axarr[1].plot(neg_words.astype(str), [nw_results[i][1] for i in range(5)], marker='o', color='green', label='Train set')
axarr[1].plot(neg_words.astype(str), [nw_results[i][2] for i in range(5)], marker='o', color='red', label='Test set')
axarr[1].set_title('Mean Log Likelihood')
axarr[1].set_xlabel('MB size')
axarr[1].set_xticks(neg_words.astype(str))
axarr[1].legend()
plt.tight_layout(w_pad=4.0);

Window size: 5
Embedding vector size: 150
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Anealing step size: 5000


<IPython.core.display.Javascript object>

In [34]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 4</span>'))

    1.

In [35]:
words = ['good', 'bad', 'lame', 'cool', 'exciting']

for word in words:
    print 'Target: ' + word
    print 'Results: ' + ', '.join(predict_context_words(word, hyperparameters, model_parameters)) + '\n'

Target: good
Results: cogent, lull, scathing, gradually, tat, drudgery, sewing, indeed, uproariously, thrill

Target: bad
Results: stoked, sensibility, kilt, person, cliches, resist, hanks, blemishes, childlike, formulaic

Target: lame
Results: accompanying, remaining, pushiness, angry, unleashed, lifeless, humor, fatherhood, louis, crappy

Target: cool
Results: demonstrates, leavened, epiphany, astute, satisfies, these, boarders, refused, underway, gauge

Target: exciting
Results: helmer, sundance, pregnancy, every, living, poetry, grandstanding, expectation, hickenlooper, italian



    2.

In [38]:
hyperparameters.vector_size = 2
_2d_model_parameters = ModelParameters(hyperparameters).init(dataset_holder.get_trainset())
LearnParamsUsingSGD(dataset_holder.get_trainset(), hyperparameters, sgd_parameters, _2d_model_parameters)

In [39]:
print hyperparameters
print sgd_parameters

print '\nInput embeddings'
visualize_words(words, _2d_model_parameters)
visualize_words(['baby', 'face', 'walking', 'beautiful'], _2d_model_parameters, ax=plt.gca())

Window size: 5
Embedding vector size: 2
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Minibatch size: 50
Anealing step size: 5000

Input embeddings


<IPython.core.display.Javascript object>

In [40]:
print 'Context embeddings'
visualize_words(words, _2d_model_parameters)
visualize_words(['baby', 'face', 'walking', 'beautiful'], _2d_model_parameters, use_context=True, ax=plt.gca())

Context embeddings


<IPython.core.display.Javascript object>

In [41]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 5</span>'))

print hyperparameters
print str(sgd_parameters) + '\n\n'

sentences = ['the movie was surprisingly ___', '___ was really disappointing', 'knowing that she ___ was the best part']

for s in sentences:
    print 'Target: ' + s
    predictions = predict_input_words(s.replace('___', '').split(), hyperparameters, model_parameters)
    print 'Results: ' + ', '.join(predictions) + '\n'

Window size: 5
Embedding vector size: 2
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Minibatch size: 50
Anealing step size: 5000


Target: the movie was surprisingly ___
Results: triumph, kidman, kittenish, sadism, balancing, celibacy, fils, underscoring, ong, joff

Target: ___ was really disappointing
Results: mcbeal, steamy, inspiring, doofus, tonight, methodical, supply, undertone, standout, hampered

Target: knowing that she ___ was the best part
Results: joint, introverted, doctor, witnessed, interviewees, differences, yuen, juice, folks, thriller



In [42]:
display(HTML('<span style="font-size: 24pt; background-color: #00ff00">Deliverable 6</span>'))

print hyperparameters
print sgd_parameters

triplets = [['man', 'woman', 'men'], ['good', 'great', 'bad'], ['warm', 'cold', 'summer']]

print '\n\nInput embeddings:'

for triplet in triplets:
    print 'Target: {} is to {} as {} is to:'.format(*triplet)
    results = ', '.join(analogy_solver(triplet[0], triplet[1], triplet[2], model_parameters)[0])
    print 'Results: ' + results + '\n'
    
print '\n\nContext embeddings:'

for triplet in triplets:
    print 'Target: {} is to {} as {} is to:'.format(*triplet)
    results = ', '.join(analogy_solver(triplet[0], triplet[1], triplet[2], model_parameters, use_context=True)[0])
    print 'Results: ' + results + '\n'

Window size: 5
Embedding vector size: 2
Negative words: 10
Iterations: 20000
Noise distribution: unigram
Random seed: 1234
Testset measure step size: 1000
Initial learning rate: 0.3
Minibatch size: 50
Anealing step size: 5000


Input embeddings:
Target: man is to woman as men is to:
Results: man, men, horizons, pour, forgive, measure, preceded, patronising, combination, manifestation

Target: good is to great as bad is to:
Results: bad, good, shifting, aplomb, fanatics, zinger, impression, april, sob, insult

Target: warm is to cold as summer is to:
Results: warm, summer, tracks, labor, 15th, fondly, apart, newness, scripts, overrun



Context embeddings:
Target: man is to woman as men is to:
Results: man, men, disapproval, sneeze, horribly, luis, rank, properties, establishment, highways

Target: good is to great as bad is to:
Results: good, bad, bike, swooning, change, administration, surf, liberation, underscoring, sermon

Target: warm is to cold as summer is to:
Results: warm, summ