In [1]:
from utils.utils import dump
import matplotlib
import os
matplotlib.use('agg')
import matplotlib.pyplot as plt
import time
import numpy as np
import random
from utils.gradcheck import gradcheck_naive
from utils.utils import normalizeRows, softmax
from utils.sanity_checks import *
from utils.treebank import StanfordSentiment
import pickle
import glob
import os.path as op

In [2]:
def sigmoid(x):
    ### YOUR CODE HERE
    s=1/(1+np.exp(-x))
    ### END YOUR CODE
    return s

In [3]:
def naiveSoftmaxLossAndGradient(
        centerWordVec,
        outsideWordIdx,
        outsideVectors,
        dataset
):
    ### YOUR CODE HERE
    ker=np.exp(np.dot(outsideVectors,centerWordVec))
    Poc=np.exp(np.dot(centerWordVec,outsideVectors[outsideWordIdx,]))
    PWc=sum(ker)
    loss=-np.log(Poc/PWc)
    top=np.dot(ker,outsideVectors)
    gradCenterVec=-outsideVectors[outsideWordIdx,]+top/PWc
    Y=ker/PWc
    Y[outsideWordIdx]=Y[outsideWordIdx]-1
    gradOutsideVecs=np.outer(Y,centerWordVec)
    ### END YOUR CODE
    return loss, gradCenterVec, gradOutsideVecs

In [4]:
def getNegativeSamples(outsideWordIdx, dataset, K):
    """ Samples K indexes which are not the outsideWordIdx """
    negSampleWordIndices = [None] * K
    for k in range(K):
        newidx = dataset.sampleTokenIdx()
        while newidx == outsideWordIdx:
            newidx = dataset.sampleTokenIdx()
        negSampleWordIndices[k] = newidx
    return negSampleWordIndices

In [5]:
def negSamplingLossAndGradient(
        centerWordVec,
        outsideWordIdx,
        outsideVectors,
        dataset,
        K=10
):
    """ Negative sampling loss function for word2vec models

    Arguments/Return Specifications: same as naiveSoftmaxLossAndGradient
    K is the number of negative samples to take.

    """

    negSampleWordIndices = getNegativeSamples(outsideWordIdx, dataset, K)
    indices = [outsideWordIdx] + negSampleWordIndices

    gradCenterVec = np.zeros(centerWordVec.shape)
    gradOutsideVecs = np.zeros(outsideVectors.shape)

    labels = np.array([1] + [-1 for k in range(K)])
    vecs = outsideVectors[indices, :]

    t = sigmoid(vecs.dot(centerWordVec) * labels)
    loss = -np.sum(np.log(t))

    delta = labels * (t - 1)
    gradCenterVec = delta.reshape((1, K + 1)).dot(vecs).flatten()
    gradOutsideVecsTemp = delta.reshape((K + 1, 1)).dot(centerWordVec.reshape(
        (1, centerWordVec.shape[0])))
    for k in range(K + 1):
        gradOutsideVecs[indices[k]] += gradOutsideVecsTemp[k, :]

    return loss, gradCenterVec, gradOutsideVecs

In [6]:
def skipgram(currentCenterWord, windowSize, outsideWords, word2Ind,
             centerWordVectors, outsideVectors, dataset,
             word2vecLossAndGradient=negSamplingLossAndGradient):
    loss = 0.0
    gradCenterVecs = np.zeros(centerWordVectors.shape)
    gradOutsideVectors = np.zeros(outsideVectors.shape)
    ### YOUR CODE HERE
    centerWordVec=centerWordVectors[word2Ind[currentCenterWord],]
    for word in outsideWords:
        outsideWordIdx=word2Ind[word]
        lossCurrent, gradc, grado=word2vecLossAndGradient(centerWordVec,outsideWordIdx,outsideVectors,dataset)
        loss=loss+lossCurrent
        gradCenterVecs[word2Ind[currentCenterWord],]=gradCenterVecs[word2Ind[currentCenterWord],]+gradc
        gradOutsideVectors=gradOutsideVectors+grado
    ### END YOUR CODE

    return loss, gradCenterVecs, gradOutsideVectors

In [7]:
def word2vec_sgd_wrapper(word2vecModel, word2Ind, wordVectors, dataset,
                         windowSize,
                         word2vecLossAndGradient=negSamplingLossAndGradient):
    batchsize = 50
    loss = 0.0
    grad = np.zeros(wordVectors.shape)
    N = wordVectors.shape[0]
    centerWordVectors = wordVectors[:int(N / 2), :]
    outsideVectors = wordVectors[int(N / 2):, :]
    for i in range(batchsize):
        windowSize1 = random.randint(1, windowSize)
        centerWord, context = dataset.getRandomContext(windowSize1)

        c, gin, gout = word2vecModel(
            centerWord, windowSize1, context, word2Ind, centerWordVectors,
            outsideVectors, dataset, word2vecLossAndGradient
        )
        loss += c / batchsize
        grad[:int(N / 2), :] += gin / batchsize
        grad[int(N / 2):, :] += gout / batchsize

    return loss, grad

In [8]:
def test_word2vec():
    """ Test the two word2vec implementations, before running on Stanford Sentiment Treebank """
    dataset, dummy_vectors, dummy_tokens = dummy()

    print("==== Gradient check for skip-gram with naiveSoftmaxLossAndGradient ====")
    gradcheck_naive(lambda vec: word2vec_sgd_wrapper(
        skipgram, dummy_tokens, vec, dataset, 5, negSamplingLossAndGradient),
                    dummy_vectors, "naiveSoftmaxLossAndGradient Gradient")

    print("\n\n\t\t\tSkip-Gram with naiveSoftmaxLossAndGradient\t\t\t")

    print("\nYour Result:")
    loss, dj_dv, dj_du = skipgram(inputs['test_word2vec']['currentCenterWord'], inputs['test_word2vec']['windowSize'],
                                  inputs['test_word2vec']['outsideWords'],
                                  dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset,
                                  naiveSoftmaxLossAndGradient)
    print(
        "Loss: {}\nGradient wrt Center Vectors (dJ/dV):\n {}\nGradient wrt Outside Vectors (dJ/dU):\n {}\n".format(loss,
                                                                                                                   dj_dv,
                                                                                                                   dj_du))

    print("Expected Result: Value should approximate these:")
    print(
        "Loss: {}\nGradient wrt Center Vectors (dJ/dV):\n {}\nGradient wrt Outside Vectors (dJ/dU):\n {}\n".format(
            outputs['test_word2vec']['loss'],
            outputs['test_word2vec']['dj_dv'],
            outputs['test_word2vec']['dj_du']))

In [9]:
def test_naiveSoftmaxLossAndGradient():
    print("\n\n\t\t\tNaiveSoftmaxLossAndGradient\t\t\t")

    dataset, dummy_vectors, dummy_tokens = dummy()

    print("\nYour Result:")
    loss, dj_dv, dj_du = naiveSoftmaxLossAndGradient(
        inputs['test_naivesoftmax']['centerWordVec'],
        inputs['test_naivesoftmax']['outsideWordIdx'],
        inputs['test_naivesoftmax']['outsideVectors'],
        dataset
    )

    print(
        "Loss: {}\nGradient wrt Center Vector (dJ/dV):\n {}\nGradient wrt Outside Vectors (dJ/dU):\n {}\n".format(loss,
                                                                                                                  dj_dv,
                                                                                                                  dj_du))

    print("Expected Result: Value should approximate these:")
    print(
        "Loss: {}\nGradient wrt Center Vectors(dJ/dV):\n {}\nGradient wrt Outside Vectors (dJ/dU):\n {}\n".format(
            outputs['test_naivesoftmax']['loss'],
            outputs['test_naivesoftmax']['dj_dvc'],
            outputs['test_naivesoftmax']['dj_du']))

In [10]:
def test_sigmoid():
    print("\n\n\t\t\ttest sigmoid\t\t\t")

    x = inputs['test_sigmoid']['x']
    s = sigmoid(x)

    print("\nYour Result:")
    print(s)
    print("Expected Result: Value should approximate these:")
    print(outputs['test_sigmoid']['s'])


In [11]:
SAVE_PARAMS_EVERY = 5000
def load_saved_params():
    """
    A helper function that loads previously saved parameters and resets
    iteration start.
    """
    st = 0
    for f in glob.glob("saved_params_*.npy"):
        iter = int(op.splitext(op.basename(f))[0].split("_")[2])
        if (iter > st):
            st = iter

    if st > 0:
        params_file = "saved_params_%d.npy" % st
        state_file = "saved_state_%d.pickle" % st
        params = np.load(params_file)
        with open(state_file, "rb") as f:
            state = pickle.load(f)
        return st, params, state
    else:
        return st, None, None
def save_params(iter, params):
    params_file = "saved_params_%d.npy" % iter
    np.save(params_file, params)
    with open("saved_state_%d.pickle" % iter, "wb") as f:
        pickle.dump(random.getstate(), f)

In [12]:
def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
        PRINT_EVERY=10):
    """ Stochastic Gradient Descent

    Implement the stochastic gradient descent method in this function.

    Arguments:
    f -- the function to optimize, it should take a single
         argument and yield two outputs, a loss and the gradient
         with respect to the arguments
    x0 -- the initial point to start SGD from
    step -- the step size for SGD
    iterations -- total iterations to run SGD for
    postprocessing -- postprocessing function for the parameters
                      if necessary. In the case of word2vec we will need to
                      normalize the word vectors to have unit length.
    PRINT_EVERY -- specifies how many iterations to output loss

    Return:
    x -- the parameter value after SGD finishes
    """

    # Anneal learning rate every several iterations
    ANNEAL_EVERY = 20000

    if useSaved:
        start_iter, oldx, state = load_saved_params()
        if start_iter > 0:
            x0 = oldx
            step *= 0.5 ** (start_iter / ANNEAL_EVERY)

        if state:
            random.setstate(state)
    else:
        start_iter = 0

    x = x0

    if not postprocessing:
        postprocessing = lambda x: x

    exploss = None

    for iter in range(start_iter + 1, iterations + 1):
        # You might want to print the progress every few iterations.
        loss = None
        ### YOUR CODE HERE
        loss, x_grad=f(x)
        x=x-step*x_grad
        ### END YOUR CODE

        x = postprocessing(x)
        if iter % PRINT_EVERY == 0:
            if not exploss:
                exploss = loss
            else:
                exploss = .95 * exploss + .05 * loss
            print("iter %d: %f" % (iter, exploss))

        if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
            save_params(iter, x)

        if iter % ANNEAL_EVERY == 0:
            step *= 0.5

    return x

In [13]:
def test_sgd():
    quad = lambda x: (np.sum(x ** 2), x * 2)

    print("Running sanity checks...")
    t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
    print("test 1 result:", t1)
    assert abs(t1) <= 1e-6

    t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
    print("test 2 result:", t2)
    assert abs(t2) <= 1e-6

    t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
    print("test 3 result:", t3)
    assert abs(t3) <= 1e-6

    print("-" * 40)
    print("ALL TESTS PASSED")
    print("-" * 40)

In [None]:
random.seed(314)
dataset = StanfordSentiment()
tokens = dataset.tokens()
nWords = len(tokens)

# We are going to train 10-dimensional vectors for this assignment
dimVectors = 10

# Context size
C = 5

# Reset the random seed to make sure that everyone gets the same results
random.seed(31415)
np.random.seed(9265)

startTime = time.time()
wordVectors = np.concatenate(
    ((np.random.rand(nWords, dimVectors) - 0.5) /
     dimVectors, np.zeros((nWords, dimVectors))),
    axis=0)
wordVectors = sgd(
    lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
                                     negSamplingLossAndGradient),
    wordVectors, 0.3, 40000, None, False, PRINT_EVERY=10)
# Note that normalization is not called here. This is not a bug,
# normalizing during training loses the notion of length.

print("sanity check: cost at convergence should be around or below 10")
print("training took %d seconds" % (time.time() - startTime))

# concatenate the input and output word vectors
wordVectors = np.concatenate(
    (wordVectors[:nWords, :], wordVectors[nWords:, :]),
    axis=0)

visualizeWords = [
    "great", "cool", "brilliant", "wonderful", "well", "amazing",
    "worth", "sweet", "enjoyable", "boring", "bad", "dumb",
    "annoying", "female", "male", "queen", "king", "man", "woman", "rain", "snow",
    "hail", "coffee", "tea"]

# dimensionality reduction
visualizeIdx = [tokens[word] for word in visualizeWords]
visualizeVecs = wordVectors[visualizeIdx, :]

# save word vectors for evaluation
sampleVectors = {word: list(vec) for word, vec in zip(visualizeWords, visualizeVecs)}
sampleVectorsPath = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sampleVectors.json")
dump(sampleVectors, sampleVectorsPath)

temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
U, S, V = np.linalg.svd(covariance)
coord = temp.dot(U[:, 0:2])

for i in range(len(visualizeWords)):
    plt.text(coord[i, 0], coord[i, 1], visualizeWords[i],
             bbox=dict(facecolor='green', alpha=0.1))

plt.xlim((np.min(coord[:, 0]), np.max(coord[:, 0])))
plt.ylim((np.min(coord[:, 1]), np.max(coord[:, 1])))

plt.savefig('word_vectors.png')

In [14]:
random.seed(314)
dataset = StanfordSentiment()
tokens = dataset.tokens()
nWords = len(tokens)

# We are going to train 10-dimensional vectors for this assignment
dimVectors = 10

# Context size
C = 5

# Reset the random seed to make sure that everyone gets the same results
random.seed(31415)
np.random.seed(9265)

startTime = time.time()
wordVectors = np.concatenate(
    ((np.random.rand(nWords, dimVectors) - 0.5) /
     dimVectors, np.zeros((nWords, dimVectors))),
    axis=0)

iter 10: 19.061546
iter 20: 19.114918
iter 30: 19.287617
iter 40: 19.352559
iter 50: 19.559119
iter 60: 19.717223
iter 70: 19.829304
iter 80: 19.829040
iter 90: 19.912663
iter 100: 19.847231
iter 110: 19.945194
iter 120: 20.068754
iter 130: 20.132756
iter 140: 20.246932
iter 150: 20.248652
iter 160: 20.181659
iter 170: 20.415366
iter 180: 20.523050
iter 190: 20.663462
iter 200: 20.735856
iter 210: 20.850365
iter 220: 20.768547
iter 230: 20.881432
iter 240: 20.874317
iter 250: 20.913310
iter 260: 21.095210
iter 270: 21.252756
iter 280: 21.158454
iter 290: 21.122248
iter 300: 21.194580
iter 310: 21.110808
iter 320: 21.122705
iter 330: 21.156881
iter 340: 21.135975
iter 350: 21.253348
iter 360: 21.273378
iter 370: 21.261887
iter 380: 21.159479
iter 390: 21.123175
iter 400: 21.256452
iter 410: 21.222920
iter 420: 21.305461
iter 430: 21.338125
iter 440: 21.254769
iter 450: 21.099340
iter 460: 21.149976
iter 470: 21.098907
iter 480: 21.073269
iter 490: 21.109938
iter 500: 20.961747
iter 510:

iter 3060: 21.367370
iter 3070: 21.410138
iter 3080: 21.381809
iter 3090: 21.402270
iter 3100: 21.509908
iter 3110: 21.423881
iter 3120: 21.341095
iter 3130: 21.282033
iter 3140: 21.283461
iter 3150: 21.134272
iter 3160: 21.216532
iter 3170: 21.134797
iter 3180: 21.047679
iter 3190: 21.208647
iter 3200: 21.045992
iter 3210: 21.067789
iter 3220: 21.164562
iter 3230: 21.170017
iter 3240: 21.130713
iter 3250: 21.082517
iter 3260: 20.985269
iter 3270: 21.004326
iter 3280: 20.989311
iter 3290: 21.077936
iter 3300: 20.966762
iter 3310: 20.849939
iter 3320: 20.830807
iter 3330: 20.717941
iter 3340: 20.628101
iter 3350: 20.526807
iter 3360: 20.609988
iter 3370: 20.484251
iter 3380: 20.414710
iter 3390: 20.372061
iter 3400: 20.331172
iter 3410: 20.328658
iter 3420: 20.271300
iter 3430: 20.216186
iter 3440: 20.133355
iter 3450: 20.056320
iter 3460: 20.009491
iter 3470: 20.062855
iter 3480: 19.846632
iter 3490: 19.770918
iter 3500: 19.747809
iter 3510: 19.652277
iter 3520: 19.677894
iter 3530: 19

iter 6200: 16.177082
iter 6210: 16.122854
iter 6220: 16.147584
iter 6230: 16.302615
iter 6240: 16.400436
iter 6250: 16.431727
iter 6260: 16.331609
iter 6270: 16.285793
iter 6280: 16.097959
iter 6290: 15.969284
iter 6300: 15.991517
iter 6310: 16.116898
iter 6320: 16.072159
iter 6330: 16.170192
iter 6340: 16.262213
iter 6350: 16.134034
iter 6360: 15.961197
iter 6370: 15.896144
iter 6380: 15.888060
iter 6390: 15.845998
iter 6400: 15.988454
iter 6410: 16.020196
iter 6420: 16.035146
iter 6430: 15.973863
iter 6440: 15.864937
iter 6450: 15.849172
iter 6460: 15.812106
iter 6470: 15.873414
iter 6480: 15.912097
iter 6490: 15.853445
iter 6500: 15.828080
iter 6510: 15.820924
iter 6520: 15.853582
iter 6530: 15.939312
iter 6540: 15.963572
iter 6550: 15.821039
iter 6560: 15.782995
iter 6570: 15.856739
iter 6580: 15.768044
iter 6590: 15.764944
iter 6600: 15.670133
iter 6610: 15.658949
iter 6620: 15.611600
iter 6630: 15.622309
iter 6640: 15.503629
iter 6650: 15.466772
iter 6660: 15.520101
iter 6670: 15

iter 9400: 13.242761
iter 9410: 13.299682
iter 9420: 13.211603
iter 9430: 13.168619
iter 9440: 13.147386
iter 9450: 13.085666
iter 9460: 13.150294
iter 9470: 13.162495
iter 9480: 13.082781
iter 9490: 13.023656
iter 9500: 12.972455
iter 9510: 12.980876
iter 9520: 12.889604
iter 9530: 12.844160
iter 9540: 12.852666
iter 9550: 12.892543
iter 9560: 12.900415
iter 9570: 12.848655
iter 9580: 12.916600
iter 9590: 12.870902
iter 9600: 12.869807
iter 9610: 12.812268
iter 9620: 12.830853
iter 9630: 12.829949
iter 9640: 12.877448
iter 9650: 12.857354
iter 9660: 13.016466
iter 9670: 12.994752
iter 9680: 12.975739
iter 9690: 12.950049
iter 9700: 12.905586
iter 9710: 12.988577
iter 9720: 13.031600
iter 9730: 13.035974
iter 9740: 13.082543
iter 9750: 13.091789
iter 9760: 13.171394
iter 9770: 13.158308
iter 9780: 13.159873
iter 9790: 13.139368
iter 9800: 13.131251
iter 9810: 13.071897
iter 9820: 13.001469
iter 9830: 13.071962
iter 9840: 13.102107
iter 9850: 13.070144
iter 9860: 12.990161
iter 9870: 12

iter 12490: 11.846160
iter 12500: 11.827634
iter 12510: 11.793803
iter 12520: 11.823780
iter 12530: 11.768964
iter 12540: 11.736616
iter 12550: 11.764985
iter 12560: 11.738872
iter 12570: 11.703946
iter 12580: 11.764580
iter 12590: 11.774533
iter 12600: 11.709750
iter 12610: 11.682087
iter 12620: 11.627359
iter 12630: 11.656120
iter 12640: 11.603745
iter 12650: 11.695614
iter 12660: 11.710757
iter 12670: 11.785674
iter 12680: 11.731543
iter 12690: 11.747801
iter 12700: 11.743457
iter 12710: 11.835456
iter 12720: 11.939211
iter 12730: 11.866854
iter 12740: 11.803601
iter 12750: 11.823803
iter 12760: 11.831188
iter 12770: 11.803267
iter 12780: 11.764805
iter 12790: 11.869343
iter 12800: 11.895515
iter 12810: 11.831000
iter 12820: 11.871203
iter 12830: 11.879410
iter 12840: 11.932970
iter 12850: 11.876074
iter 12860: 11.882162
iter 12870: 11.958360
iter 12880: 11.915339
iter 12890: 11.878013
iter 12900: 11.844269
iter 12910: 11.846751
iter 12920: 11.771793
iter 12930: 11.836419
iter 12940

iter 15550: 10.910265
iter 15560: 10.983545
iter 15570: 10.903860
iter 15580: 10.897855
iter 15590: 10.960676
iter 15600: 10.935934
iter 15610: 10.963800
iter 15620: 10.992418
iter 15630: 10.938083
iter 15640: 10.921876
iter 15650: 10.812939
iter 15660: 10.826702
iter 15670: 10.850062
iter 15680: 10.840566
iter 15690: 10.870342
iter 15700: 10.858605
iter 15710: 10.849078
iter 15720: 10.939809
iter 15730: 10.966027
iter 15740: 10.984399
iter 15750: 10.960806
iter 15760: 10.961808
iter 15770: 10.935389
iter 15780: 10.894650
iter 15790: 10.898428
iter 15800: 10.874112
iter 15810: 10.792540
iter 15820: 10.731643
iter 15830: 10.739324
iter 15840: 10.673469
iter 15850: 10.703380
iter 15860: 10.705626
iter 15870: 10.645290
iter 15880: 10.684257
iter 15890: 10.673429
iter 15900: 10.712384
iter 15910: 10.622618
iter 15920: 10.718452
iter 15930: 10.718642
iter 15940: 10.739642
iter 15950: 10.654559
iter 15960: 10.602338
iter 15970: 10.589666
iter 15980: 10.595774
iter 15990: 10.584704
[[ 0.59212

iter 18610: 10.498471
iter 18620: 10.428483
iter 18630: 10.381970
iter 18640: 10.356594
iter 18650: 10.331526
iter 18660: 10.381004
iter 18670: 10.382330
iter 18680: 10.444123
iter 18690: 10.382676
iter 18700: 10.367067
iter 18710: 10.320423
iter 18720: 10.360283
iter 18730: 10.291840
iter 18740: 10.350049
iter 18750: 10.316944
iter 18760: 10.265100
iter 18770: 10.325933
iter 18780: 10.287478
iter 18790: 10.227547
iter 18800: 10.193541
iter 18810: 10.254885
iter 18820: 10.231311
iter 18830: 10.236028
iter 18840: 10.255225
iter 18850: 10.215109
iter 18860: 10.210930
iter 18870: 10.201163
iter 18880: 10.302639
iter 18890: 10.277608
iter 18900: 10.255025
iter 18910: 10.198850
iter 18920: 10.195758
iter 18930: 10.224464
iter 18940: 10.236278
iter 18950: 10.209815
iter 18960: 10.227065
iter 18970: 10.218330
iter 18980: 10.194948
iter 18990: 10.213101
[[ 0.56590189  0.07645212  0.26160112 ... -0.27665691  0.57804808
   0.23507988]
 [ 0.55880663  0.14784434  0.25434298 ... -0.29711228  0.7032

iter 21680: 9.945684
iter 21690: 9.910217
iter 21700: 9.894320
iter 21710: 9.887801
iter 21720: 9.927348
iter 21730: 9.886197
iter 21740: 9.952748
iter 21750: 9.894303
iter 21760: 9.915757
iter 21770: 9.897814
iter 21780: 9.841096
iter 21790: 9.807236
iter 21800: 9.798193
iter 21810: 9.804387
iter 21820: 9.793436
iter 21830: 9.852554
iter 21840: 9.877388
iter 21850: 9.862343
iter 21860: 9.901064
iter 21870: 9.845413
iter 21880: 9.821364
iter 21890: 9.758349
iter 21900: 9.686556
iter 21910: 9.715170
iter 21920: 9.790435
iter 21930: 9.796246
iter 21940: 9.750692
iter 21950: 9.759031
iter 21960: 9.843027
iter 21970: 9.802410
iter 21980: 9.798341
iter 21990: 9.769322
[[ 0.56579213  0.06921038  0.26557809 ... -0.26675856  0.56329361
   0.2386592 ]
 [ 0.56541159  0.14956474  0.25753752 ... -0.30014692  0.71124047
   0.21846888]
 [ 0.61332488  0.07456604  0.22785229 ... -0.23657339  0.58009415
   0.20965054]
 ...
 [-0.20807307 -0.05393354 -0.07789413 ...  0.10681562 -0.24246751
  -0.07406313]

iter 24820: 9.768023
iter 24830: 9.824628
iter 24840: 9.860501
iter 24850: 9.902316
iter 24860: 9.883282
iter 24870: 9.855072
iter 24880: 9.885527
iter 24890: 9.872484
iter 24900: 9.803201
iter 24910: 9.790083
iter 24920: 9.859416
iter 24930: 9.865080
iter 24940: 9.809012
iter 24950: 9.767612
iter 24960: 9.777243
iter 24970: 9.804194
iter 24980: 9.843197
iter 24990: 9.851791
[[ 0.5691498   0.06529082  0.2670918  ... -0.26126559  0.55768686
   0.24204292]
 [ 0.5588936   0.14763459  0.2554197  ... -0.29664561  0.70360201
   0.21659987]
 [ 0.60822447  0.06847227  0.22975669 ... -0.22436913  0.56244005
   0.21014785]
 ...
 [-0.22735964 -0.05909589 -0.08466694 ...  0.11611698 -0.26496465
  -0.08184207]
 [-0.2534343  -0.07423024 -0.09836459 ...  0.13432117 -0.29847129
  -0.09081062]
 [-0.31954601 -0.08533129 -0.12592492 ...  0.16574496 -0.37635678
  -0.11745906]]
iter 25000: 9.896548
iter 25010: 9.940449
iter 25020: 10.018084
iter 25030: 10.009993
iter 25040: 10.009807
iter 25050: 9.986247
i

iter 28000: 9.784685
iter 28010: 9.772095
iter 28020: 9.794935
iter 28030: 9.875667
iter 28040: 9.782918
iter 28050: 9.767359
iter 28060: 9.732926
iter 28070: 9.739790
iter 28080: 9.715624
iter 28090: 9.623164
iter 28100: 9.637900
iter 28110: 9.619319
iter 28120: 9.663998
iter 28130: 9.662950
iter 28140: 9.644830
iter 28150: 9.600046
iter 28160: 9.656194
iter 28170: 9.627715
iter 28180: 9.732849
iter 28190: 9.756233
iter 28200: 9.791321
iter 28210: 9.822498
iter 28220: 9.721829
iter 28230: 9.769154
iter 28240: 9.819115
iter 28250: 9.783840
iter 28260: 9.800020
iter 28270: 9.889811
iter 28280: 9.943421
iter 28290: 9.938865
iter 28300: 9.966225
iter 28310: 9.970908
iter 28320: 10.017697
iter 28330: 10.017172
iter 28340: 9.983947
iter 28350: 9.953482
iter 28360: 10.021356
iter 28370: 9.979310
iter 28380: 9.925367
iter 28390: 9.900319
iter 28400: 9.878425
iter 28410: 9.791782
iter 28420: 9.784111
iter 28430: 9.769669
iter 28440: 9.791412
iter 28450: 9.778501
iter 28460: 9.766012
iter 28470

iter 31200: 9.712169
iter 31210: 9.672233
iter 31220: 9.604361
iter 31230: 9.545813
iter 31240: 9.613582
iter 31250: 9.623392
iter 31260: 9.650818
iter 31270: 9.672344
iter 31280: 9.698848
iter 31290: 9.654129
iter 31300: 9.644513
iter 31310: 9.594963
iter 31320: 9.596025
iter 31330: 9.584926
iter 31340: 9.617407
iter 31350: 9.557358
iter 31360: 9.549172
iter 31370: 9.609123
iter 31380: 9.583316
iter 31390: 9.612159
iter 31400: 9.576606
iter 31410: 9.514621
iter 31420: 9.502060
iter 31430: 9.507680
iter 31440: 9.501095
iter 31450: 9.467095
iter 31460: 9.546767
iter 31470: 9.482896
iter 31480: 9.465170
iter 31490: 9.444516
iter 31500: 9.506010
iter 31510: 9.551142
iter 31520: 9.591836
iter 31530: 9.505640
iter 31540: 9.535180
iter 31550: 9.482541
iter 31560: 9.479580
iter 31570: 9.486627
iter 31580: 9.560189
iter 31590: 9.509335
iter 31600: 9.591306
iter 31610: 9.597255
iter 31620: 9.578286
iter 31630: 9.586702
iter 31640: 9.555876
iter 31650: 9.503932
iter 31660: 9.578945
iter 31670: 9

iter 34400: 9.827288
iter 34410: 9.832878
iter 34420: 9.838322
iter 34430: 9.805798
iter 34440: 9.849621
iter 34450: 9.902636
iter 34460: 9.846691
iter 34470: 9.825699
iter 34480: 9.795604
iter 34490: 9.781015
iter 34500: 9.785536
iter 34510: 9.771259
iter 34520: 9.795453
iter 34530: 9.789280
iter 34540: 9.836934
iter 34550: 9.869771
iter 34560: 9.782541
iter 34570: 9.811029
iter 34580: 9.872583
iter 34590: 9.907793
iter 34600: 9.925262
iter 34610: 9.914598
iter 34620: 9.899426
iter 34630: 9.870474
iter 34640: 9.850892
iter 34650: 9.811864
iter 34660: 9.792425
iter 34670: 9.801764
iter 34680: 9.770707
iter 34690: 9.719432
iter 34700: 9.703887
iter 34710: 9.623155
iter 34720: 9.531198
iter 34730: 9.518160
iter 34740: 9.513533
iter 34750: 9.547568
iter 34760: 9.552108
iter 34770: 9.601743
iter 34780: 9.612699
iter 34790: 9.627165
iter 34800: 9.620828
iter 34810: 9.620904
iter 34820: 9.652426
iter 34830: 9.623676
iter 34840: 9.615261
iter 34850: 9.639375
iter 34860: 9.565237
iter 34870: 9

iter 37600: 9.533822
iter 37610: 9.492988
iter 37620: 9.498976
iter 37630: 9.580059
iter 37640: 9.571127
iter 37650: 9.599381
iter 37660: 9.642366
iter 37670: 9.612346
iter 37680: 9.680257
iter 37690: 9.772122
iter 37700: 9.705962
iter 37710: 9.707451
iter 37720: 9.711175
iter 37730: 9.690517
iter 37740: 9.665310
iter 37750: 9.736719
iter 37760: 9.735231
iter 37770: 9.699614
iter 37780: 9.657109
iter 37790: 9.611508
iter 37800: 9.648461
iter 37810: 9.612340
iter 37820: 9.646897
iter 37830: 9.637285
iter 37840: 9.670454
iter 37850: 9.681937
iter 37860: 9.703563
iter 37870: 9.690834
iter 37880: 9.648606
iter 37890: 9.599916
iter 37900: 9.610638
iter 37910: 9.630899
iter 37920: 9.593479
iter 37930: 9.665779
iter 37940: 9.589750
iter 37950: 9.590446
iter 37960: 9.639159
iter 37970: 9.654268
iter 37980: 9.556479
iter 37990: 9.531627
[[ 0.58250369  0.04325792  0.27965484 ... -0.23968684  0.52530575
   0.26110041]
 [ 0.53313634  0.13997212  0.24808792 ... -0.28290999  0.67274319
   0.20742883

In [None]:
wordVectors = sgd(
    lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
                                     negSamplingLossAndGradient),
    wordVectors, 0.3, 40000, None, False, PRINT_EVERY=10)

In [16]:
print("sanity check: cost at convergence should be around or below 10")
print("training took %d seconds" % (time.time() - startTime))

# concatenate the input and output word vectors
wordVectors = np.concatenate(
    (wordVectors[:nWords, :], wordVectors[nWords:, :]),
    axis=0)

visualizeWords = [
    "great", "cool", "brilliant", "wonderful", "well", "amazing",
    "worth", "sweet", "enjoyable", "boring", "bad", "dumb",
    "annoying", "female", "male", "queen", "king", "man", "woman", "rain", "snow",
    "hail", "coffee", "tea"]

# dimensionality reduction
visualizeIdx = [tokens[word] for word in visualizeWords]
visualizeVecs = wordVectors[visualizeIdx, :]

# save word vectors for evaluation
sampleVectors = {word: list(vec) for word, vec in zip(visualizeWords, visualizeVecs)}


sanity check: cost at convergence should be around or below 10
training took 14919 seconds


NameError: name '__file__' is not defined

In [17]:
sampleVectors

{'great': [0.5682557965917351,
  0.17693766992299975,
  0.18993643512543745,
  -0.2780914082047978,
  -0.8753156815199823,
  -0.14616440151329257,
  0.23939092249734448,
  -0.3107029172057923,
  0.6199832095634382,
  0.16617984462002233],
 'cool': [0.5641256072125872,
  0.13722982658305447,
  0.2082364803517174,
  -0.29296957234563653,
  -0.8704480862547579,
  -0.18822962799771,
  0.24239616047158674,
  -0.29410091959922563,
  0.6979644655991712,
  0.2147529764765612],
 'brilliant': [0.5918855018013452,
  0.10397786927770156,
  0.18125409832798153,
  -0.3249417806914038,
  -0.8565376508947542,
  -0.1796672139151317,
  0.2603509816471901,
  -0.32893244657244936,
  0.6604228461904655,
  0.2415822562502478],
 'wonderful': [0.5336813277875313,
  0.15884523607312168,
  0.2651843805368251,
  -0.34389277627100595,
  -0.8491692008120083,
  -0.18613217422541947,
  0.2672408530040501,
  -0.3180910305972477,
  0.6581041757437853,
  0.20044434983351095],
 'well': [0.5893339401809647,
  0.173118132

In [18]:
sampleVectorsPath = "./sampleVectors.json"
dump(sampleVectors, sampleVectorsPath)

temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
U, S, V = np.linalg.svd(covariance)
coord = temp.dot(U[:, 0:2])

for i in range(len(visualizeWords)):
    plt.text(coord[i, 0], coord[i, 1], visualizeWords[i],
             bbox=dict(facecolor='green', alpha=0.1))

plt.xlim((np.min(coord[:, 0]), np.max(coord[:, 0])))
plt.ylim((np.min(coord[:, 1]), np.max(coord[:, 1])))

plt.savefig('word_vectors.png')