### Capturing word correlation in input data

In [1]:
import numpy as np
onehots = {}
onehots['cat'] = np.array([1,0,0,0])
onehots['the'] = np.array([0,1,0,0])
onehots['dog'] = np.array([0,0,1,0])
onehots['sat'] = np.array([0,0,0,1])

sentence = ['the','cat','sat']
x = onehots[sentence[0]]+onehots[sentence[1]]+onehots[sentence[2]]

print("Sentence Encoding:" + str(x))

Sentence Encoding:[1 1 0 1]


### Predicitng movie reviews

In [2]:
import sys


In [3]:
# reading the reviews and labels into a list
f = open('reviews.txt')
raw_reviews = f.readlines()
f.close()

f = open('labels.txt')
raw_labels = f.readlines()
f.close()

In [4]:
# mapping each review as tokens
tokens = list(map(lambda x:set(x.split(" ")),raw_reviews))

In [5]:
"""
creating a set of vocab taking each word from tokens and if len > 0
we add it to the list of words
"""
vocab = set()
for sent in tokens:
    for word in sent:
        if(len(word)>0):
            vocab.add(word)
vocab = list(vocab)

In [6]:
#enumerating each word to find assign index for each of the words

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i

In [7]:
# Converting each sentence to index and then creating set of each token to avoid duplicates

input_dataset = list()
for sent in tokens:
    sent_indices = list()
    for word in sent:
        try:
            sent_indices.append(word2index[word])
        except:
            ""
    input_dataset.append(list(set(sent_indices)))

In [8]:
# Converting labels to 0 and 1s
target_dataset = list()
for labels in raw_labels:
    if labels == 'positive\n':
        target_dataset.append(1)
    else:
        target_dataset.append(0)

In [9]:
import numpy as np
np.random.seed(1)

In [10]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [11]:
alpha, iteration = (0.01,2)
hidden_size = 100

In [12]:
weights_0_1 = 0.2*np.random.random((len(vocab),hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,1)) - 0.1

#### Training with 2 layer network

In [13]:
correct,total = (0,0)
for iter in range(iteration):
    for i in range(len(input_dataset)-1000):
        x,y = (input_dataset[i],target_dataset[i])
        layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0))
        layer_2 = sigmoid(np.dot(layer_1,weights_1_2))
        
        layer_2_delta = layer_2 - y
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)
        
        weights_0_1[x] -= layer_1_delta * alpha
        weights_1_2 -= np.outer(layer_1,layer_2_delta)*alpha
        
        if(np.abs(layer_2_delta)<0.5):
            correct += 1
        total += 1
        if(i % 10 == 9):
            progress = str(i/float(len(input_dataset)))
            sys.stdout.write('\rIter:'+str(iter)\
                +' Progress:'+progress[2:4]\
                             +'.'+progress[4:6]\
                +'% Training Accuracy:'\
                + str(correct/float(total)) + '%')
    print()
correct,total = (0,0)
for i in range(len(input_dataset)-1000,len(input_dataset)):
    x = input_dataset[i]
    y = target_dataset[i]
    layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0)) 
    layer_2 = sigmoid(np.dot(layer_1,weights_1_2))
    correct += 1
    total += 1
print("Test Accuracy:" + str(correct / float(total)))

Iter:0 Progress:00.03% Training Accuracy:0.3%Iter:0 Progress:00.07% Training Accuracy:0.2%Iter:0 Progress:00.11% Training Accuracy:0.13333333333333333%Iter:0 Progress:00.15% Training Accuracy:0.1%Iter:0 Progress:00.19% Training Accuracy:0.14%Iter:0 Progress:00.23% Training Accuracy:0.15%Iter:0 Progress:00.27% Training Accuracy:0.15714285714285714%Iter:0 Progress:00.31% Training Accuracy:0.2125%Iter:0 Progress:00.35% Training Accuracy:0.23333333333333334%Iter:0 Progress:00.39% Training Accuracy:0.28%Iter:0 Progress:00.43% Training Accuracy:0.2727272727272727%Iter:0 Progress:00.47% Training Accuracy:0.275%Iter:0 Progress:00.51% Training Accuracy:0.2846153846153846%Iter:0 Progress:00.55% Training Accuracy:0.2857142857142857%Iter:0 Progress:00.59% Training Accuracy:0.3%Iter:0 Progress:00.63% Training Accuracy:0.30625%Iter:0 Progress:00.67% Training Accuracy:0.3%Iter:0 Progress:00.71% Training Accuracy:0.3%Iter:0 Progress:00.75% Training Accuracy:0.30526315789473685%Iter

Iter:0 Progress:05.99% Training Accuracy:0.55%Iter:0 Progress:06.03% Training Accuracy:0.5529801324503312%Iter:0 Progress:06.07% Training Accuracy:0.5552631578947368%Iter:0 Progress:06.11% Training Accuracy:0.5581699346405229%Iter:0 Progress:06.15% Training Accuracy:0.5597402597402598%Iter:0 Progress:06.19% Training Accuracy:0.5606451612903226%Iter:0 Progress:06.23% Training Accuracy:0.5615384615384615%Iter:0 Progress:06.27% Training Accuracy:0.5624203821656051%Iter:0 Progress:06.31% Training Accuracy:0.5645569620253165%Iter:0 Progress:06.35% Training Accuracy:0.5666666666666667%Iter:0 Progress:06.39% Training Accuracy:0.5675%Iter:0 Progress:06.43% Training Accuracy:0.5677018633540373%Iter:0 Progress:06.47% Training Accuracy:0.5685185185185185%Iter:0 Progress:06.51% Training Accuracy:0.5699386503067485%Iter:0 Progress:06.55% Training Accuracy:0.5707317073170731%Iter:0 Progress:06.59% Training Accuracy:0.5703030303030303%Iter:0 Progress:06.63% Training Accuracy:0.569879

Iter:0 Progress:12.39% Training Accuracy:0.6816129032258065%Iter:0 Progress:12.43% Training Accuracy:0.682315112540193%Iter:0 Progress:12.47% Training Accuracy:0.6830128205128205%Iter:0 Progress:12.51% Training Accuracy:0.6840255591054313%Iter:0 Progress:12.55% Training Accuracy:0.685031847133758%Iter:0 Progress:12.59% Training Accuracy:0.6857142857142857%Iter:0 Progress:12.63% Training Accuracy:0.6863924050632911%Iter:0 Progress:12.67% Training Accuracy:0.6861198738170347%Iter:0 Progress:12.71% Training Accuracy:0.6867924528301886%Iter:0 Progress:12.75% Training Accuracy:0.687460815047022%Iter:0 Progress:12.79% Training Accuracy:0.6875%Iter:0 Progress:12.83% Training Accuracy:0.6884735202492211%Iter:0 Progress:12.87% Training Accuracy:0.6888198757763975%Iter:0 Progress:12.91% Training Accuracy:0.6894736842105263%Iter:0 Progress:12.95% Training Accuracy:0.6898148148148148%Iter:0 Progress:12.99% Training Accuracy:0.6898461538461539%Iter:0 Progress:13.03% Training Accura

Iter:0 Progress:18.75% Training Accuracy:0.7226012793176972%Iter:0 Progress:18.79% Training Accuracy:0.7221276595744681%Iter:0 Progress:18.83% Training Accuracy:0.721656050955414%Iter:0 Progress:18.87% Training Accuracy:0.7222457627118644%Iter:0 Progress:18.91% Training Accuracy:0.7226215644820296%Iter:0 Progress:18.95% Training Accuracy:0.7232067510548523%Iter:0 Progress:18.99% Training Accuracy:0.7237894736842105%Iter:0 Progress:19.03% Training Accuracy:0.7243697478991596%Iter:0 Progress:19.07% Training Accuracy:0.7247379454926625%Iter:0 Progress:19.11% Training Accuracy:0.7253138075313807%Iter:0 Progress:19.15% Training Accuracy:0.725678496868476%Iter:0 Progress:19.19% Training Accuracy:0.725625%Iter:0 Progress:19.23% Training Accuracy:0.7257796257796257%Iter:0 Progress:19.27% Training Accuracy:0.7255186721991701%Iter:0 Progress:19.31% Training Accuracy:0.7260869565217392%Iter:0 Progress:19.35% Training Accuracy:0.7262396694214877%Iter:0 Progress:19.39% Training Acc

Iter:0 Progress:24.75% Training Accuracy:0.7481421647819063%Iter:0 Progress:24.79% Training Accuracy:0.7483870967741936%Iter:0 Progress:24.83% Training Accuracy:0.748792270531401%Iter:0 Progress:24.87% Training Accuracy:0.7490353697749196%Iter:0 Progress:24.91% Training Accuracy:0.7491171749598716%Iter:0 Progress:24.95% Training Accuracy:0.7495192307692308%Iter:0 Progress:24.99% Training Accuracy:0.74976%Iter:0 Progress:25.03% Training Accuracy:0.7496805111821087%Iter:0 Progress:25.07% Training Accuracy:0.7500797448165869%Iter:0 Progress:25.11% Training Accuracy:0.7501592356687898%Iter:0 Progress:25.15% Training Accuracy:0.7503974562798092%Iter:0 Progress:25.19% Training Accuracy:0.7506349206349207%Iter:0 Progress:25.23% Training Accuracy:0.7507131537242472%Iter:0 Progress:25.27% Training Accuracy:0.7507911392405063%Iter:0 Progress:25.31% Training Accuracy:0.7510268562401263%Iter:0 Progress:25.35% Training Accuracy:0.7512618296529968%Iter:0 Progress:25.39% Training Acc

Iter:0 Progress:30.71% Training Accuracy:0.76875%Iter:0 Progress:30.75% Training Accuracy:0.7686605981794539%Iter:0 Progress:30.79% Training Accuracy:0.768961038961039%Iter:0 Progress:30.83% Training Accuracy:0.7690012970168613%Iter:0 Progress:30.87% Training Accuracy:0.769300518134715%Iter:0 Progress:30.91% Training Accuracy:0.7692108667529107%Iter:0 Progress:30.95% Training Accuracy:0.7693798449612403%Iter:0 Progress:30.99% Training Accuracy:0.7696774193548387%Iter:0 Progress:31.03% Training Accuracy:0.7699742268041238%Iter:0 Progress:31.07% Training Accuracy:0.77001287001287%Iter:0 Progress:31.11% Training Accuracy:0.7703084832904884%Iter:0 Progress:31.15% Training Accuracy:0.7706033376123235%Iter:0 Progress:31.19% Training Accuracy:0.7707692307692308%Iter:0 Progress:31.23% Training Accuracy:0.7710627400768246%Iter:0 Progress:31.27% Training Accuracy:0.7713554987212277%Iter:0 Progress:31.31% Training Accuracy:0.7715197956577267%Iter:0 Progress:31.35% Training Accura

Iter:0 Progress:36.51% Training Accuracy:0.7831325301204819%Iter:0 Progress:36.55% Training Accuracy:0.7832603938730853%Iter:0 Progress:36.59% Training Accuracy:0.7833879781420765%Iter:0 Progress:36.63% Training Accuracy:0.7836244541484716%Iter:0 Progress:36.67% Training Accuracy:0.7838604143947655%Iter:0 Progress:36.71% Training Accuracy:0.7840958605664488%Iter:0 Progress:36.75% Training Accuracy:0.7842219804134929%Iter:0 Progress:36.79% Training Accuracy:0.7843478260869565%Iter:0 Progress:36.83% Training Accuracy:0.7845819761129207%Iter:0 Progress:36.87% Training Accuracy:0.78470715835141%Iter:0 Progress:36.91% Training Accuracy:0.7848320693391115%Iter:0 Progress:36.95% Training Accuracy:0.78495670995671%Iter:0 Progress:36.99% Training Accuracy:0.784972972972973%Iter:0 Progress:37.03% Training Accuracy:0.7849892008639309%Iter:0 Progress:37.07% Training Accuracy:0.7850053937432578%Iter:0 Progress:37.11% Training Accuracy:0.7850215517241379%Iter:0 Progress:37.15% Train

Iter:0 Progress:42.51% Training Accuracy:0.7946378174976482%Iter:0 Progress:42.55% Training Accuracy:0.7947368421052632%Iter:0 Progress:42.59% Training Accuracy:0.7947417840375587%Iter:0 Progress:42.63% Training Accuracy:0.7946529080675422%Iter:0 Progress:42.67% Training Accuracy:0.7946579194001875%Iter:0 Progress:42.71% Training Accuracy:0.7946629213483146%Iter:0 Progress:42.75% Training Accuracy:0.7948550046772684%Iter:0 Progress:42.79% Training Accuracy:0.7949532710280374%Iter:0 Progress:42.83% Training Accuracy:0.7951447245564892%Iter:0 Progress:42.87% Training Accuracy:0.7953358208955223%Iter:0 Progress:42.91% Training Accuracy:0.7954333643988817%Iter:0 Progress:42.95% Training Accuracy:0.7955307262569833%Iter:0 Progress:42.99% Training Accuracy:0.7955348837209303%Iter:0 Progress:43.03% Training Accuracy:0.7955390334572491%Iter:0 Progress:43.07% Training Accuracy:0.7955431754874652%Iter:0 Progress:43.11% Training Accuracy:0.7953617810760668%Iter:0 Progress:43.15% 

Iter:0 Progress:47.95% Training Accuracy:0.8009174311926606%Iter:0 Progress:47.99% Training Accuracy:0.8010833333333334%Iter:0 Progress:48.03% Training Accuracy:0.8012489592006661%Iter:0 Progress:48.07% Training Accuracy:0.801414309484193%Iter:0 Progress:48.11% Training Accuracy:0.8015793848711554%Iter:0 Progress:48.15% Training Accuracy:0.801578073089701%Iter:0 Progress:48.19% Training Accuracy:0.8017427385892116%Iter:0 Progress:48.23% Training Accuracy:0.8018242122719734%Iter:0 Progress:48.27% Training Accuracy:0.8019884009942005%Iter:0 Progress:48.31% Training Accuracy:0.802069536423841%Iter:0 Progress:48.35% Training Accuracy:0.8020678246484698%Iter:0 Progress:48.39% Training Accuracy:0.8021487603305785%Iter:0 Progress:48.43% Training Accuracy:0.8021469859620148%Iter:0 Progress:48.47% Training Accuracy:0.8023102310231023%Iter:0 Progress:48.51% Training Accuracy:0.8023083264633141%Iter:0 Progress:48.55% Training Accuracy:0.8022240527182867%Iter:0 Progress:48.59% Tra

Iter:0 Progress:54.23% Training Accuracy:0.8087020648967551%Iter:0 Progress:54.27% Training Accuracy:0.808695652173913%Iter:0 Progress:54.31% Training Accuracy:0.8087628865979382%Iter:0 Progress:54.35% Training Accuracy:0.808756438557763%Iter:0 Progress:54.39% Training Accuracy:0.8088970588235294%Iter:0 Progress:54.43% Training Accuracy:0.8088905216752388%Iter:0 Progress:54.47% Training Accuracy:0.8088839941262849%Iter:0 Progress:54.51% Training Accuracy:0.8089508437270726%Iter:0 Progress:54.55% Training Accuracy:0.8089442815249267%Iter:0 Progress:54.59% Training Accuracy:0.809084249084249%Iter:0 Progress:54.63% Training Accuracy:0.8092240117130307%Iter:0 Progress:54.67% Training Accuracy:0.8092172640819313%Iter:0 Progress:54.71% Training Accuracy:0.8090643274853802%Iter:0 Progress:54.75% Training Accuracy:0.8089846603360117%Iter:0 Progress:54.79% Training Accuracy:0.809051094890511%Iter:0 Progress:54.83% Training Accuracy:0.8088986141502553%Iter:0 Progress:54.87% Trai

Iter:0 Progress:60.23% Training Accuracy:0.8123505976095617%Iter:0 Progress:60.27% Training Accuracy:0.8124751161247512%Iter:0 Progress:60.31% Training Accuracy:0.8124668435013263%Iter:0 Progress:60.35% Training Accuracy:0.8124585818422797%Iter:0 Progress:60.39% Training Accuracy:0.8124503311258278%Iter:0 Progress:60.43% Training Accuracy:0.8123759099933818%Iter:0 Progress:60.47% Training Accuracy:0.8123677248677249%Iter:0 Progress:60.51% Training Accuracy:0.8122934567085262%Iter:0 Progress:60.55% Training Accuracy:0.8123513870541612%Iter:0 Progress:60.59% Training Accuracy:0.8122772277227723%Iter:0 Progress:60.63% Training Accuracy:0.8123350923482849%Iter:0 Progress:60.67% Training Accuracy:0.8121951219512196%Iter:0 Progress:60.71% Training Accuracy:0.8122529644268774%Iter:0 Progress:60.75% Training Accuracy:0.8123107307439105%Iter:0 Progress:60.79% Training Accuracy:0.8123026315789473%Iter:0 Progress:60.83% Training Accuracy:0.8124260355029586%Iter:0 Progress:60.87% 

Iter:0 Progress:66.23% Training Accuracy:0.8140096618357487%Iter:0 Progress:66.27% Training Accuracy:0.8141219070609536%Iter:0 Progress:66.31% Training Accuracy:0.8142340168878166%Iter:0 Progress:66.35% Training Accuracy:0.8142857142857143%Iter:0 Progress:66.39% Training Accuracy:0.8143975903614458%Iter:0 Progress:66.43% Training Accuracy:0.8145093317278748%Iter:0 Progress:66.47% Training Accuracy:0.8145006016847172%Iter:0 Progress:66.51% Training Accuracy:0.8146121467227901%Iter:0 Progress:66.55% Training Accuracy:0.8146033653846154%Iter:0 Progress:66.59% Training Accuracy:0.8145945945945946%Iter:0 Progress:66.63% Training Accuracy:0.8146458583433374%Iter:0 Progress:66.67% Training Accuracy:0.8145170965806838%Iter:0 Progress:66.71% Training Accuracy:0.8144484412470024%Iter:0 Progress:66.75% Training Accuracy:0.8145596165368484%Iter:0 Progress:66.79% Training Accuracy:0.8146107784431138%Iter:0 Progress:66.83% Training Accuracy:0.8146618791143028%Iter:0 Progress:66.87% 

Iter:0 Progress:72.31% Training Accuracy:0.8180309734513275%Iter:0 Progress:72.35% Training Accuracy:0.8180210060807076%Iter:0 Progress:72.39% Training Accuracy:0.8180662983425414%Iter:0 Progress:72.43% Training Accuracy:0.8181667586968525%Iter:0 Progress:72.47% Training Accuracy:0.8182671081677704%Iter:0 Progress:72.51% Training Accuracy:0.8182570325427468%Iter:0 Progress:72.55% Training Accuracy:0.8183572216097024%Iter:0 Progress:72.59% Training Accuracy:0.8184022038567493%Iter:0 Progress:72.63% Training Accuracy:0.8183920704845815%Iter:0 Progress:72.67% Training Accuracy:0.8184369840396257%Iter:0 Progress:72.71% Training Accuracy:0.8185368536853685%Iter:0 Progress:72.75% Training Accuracy:0.8186366135239143%Iter:0 Progress:72.79% Training Accuracy:0.8186813186813187%Iter:0 Progress:72.83% Training Accuracy:0.8187259747391543%Iter:0 Progress:72.87% Training Accuracy:0.8188254665203073%Iter:0 Progress:72.91% Training Accuracy:0.8189248491497532%Iter:0 Progress:72.95% 

Iter:0 Progress:78.35% Training Accuracy:0.8227156712608473%Iter:0 Progress:78.39% Training Accuracy:0.8226530612244898%Iter:0 Progress:78.43% Training Accuracy:0.8226925038245793%Iter:0 Progress:78.47% Training Accuracy:0.8226809378185524%Iter:0 Progress:78.51% Training Accuracy:0.8226184411614875%Iter:0 Progress:78.55% Training Accuracy:0.8225560081466395%Iter:0 Progress:78.59% Training Accuracy:0.8225445292620865%Iter:0 Progress:78.63% Training Accuracy:0.8225839267548322%Iter:0 Progress:78.67% Training Accuracy:0.8226232841891205%Iter:0 Progress:78.71% Training Accuracy:0.8226626016260162%Iter:0 Progress:78.75% Training Accuracy:0.8226003047232098%Iter:0 Progress:78.79% Training Accuracy:0.8225888324873096%Iter:0 Progress:78.83% Training Accuracy:0.8226788432267884%Iter:0 Progress:78.87% Training Accuracy:0.8227180527383368%Iter:0 Progress:78.91% Training Accuracy:0.8228079067410036%Iter:0 Progress:78.95% Training Accuracy:0.8228470111448835%Iter:0 Progress:78.99% 

Iter:0 Progress:84.35% Training Accuracy:0.8260787102892366%Iter:0 Progress:84.39% Training Accuracy:0.8261611374407583%Iter:0 Progress:84.43% Training Accuracy:0.8261013737565135%Iter:0 Progress:84.47% Training Accuracy:0.8260890151515151%Iter:0 Progress:84.51% Training Accuracy:0.826171320397539%Iter:0 Progress:84.55% Training Accuracy:0.8261116367076632%Iter:0 Progress:84.59% Training Accuracy:0.8261465721040189%Iter:0 Progress:84.63% Training Accuracy:0.8261342155009452%Iter:0 Progress:84.67% Training Accuracy:0.8261691072272083%Iter:0 Progress:84.71% Training Accuracy:0.8261567516525024%Iter:0 Progress:84.75% Training Accuracy:0.8261444077394997%Iter:0 Progress:84.79% Training Accuracy:0.8261320754716981%Iter:0 Progress:84.83% Training Accuracy:0.8262140499764262%Iter:0 Progress:84.87% Training Accuracy:0.8262959472196042%Iter:0 Progress:84.91% Training Accuracy:0.8262835609985869%Iter:0 Progress:84.95% Training Accuracy:0.8263182674199623%Iter:0 Progress:84.99% T

Iter:0 Progress:90.39% Training Accuracy:0.8283628318584071%Iter:0 Progress:90.43% Training Accuracy:0.82843874391862%Iter:0 Progress:90.47% Training Accuracy:0.8284261715296198%Iter:0 Progress:90.51% Training Accuracy:0.8284577993813522%Iter:0 Progress:90.55% Training Accuracy:0.8284893992932862%Iter:0 Progress:90.59% Training Accuracy:0.828476821192053%Iter:0 Progress:90.63% Training Accuracy:0.8285525154457193%Iter:0 Progress:90.67% Training Accuracy:0.8285399205999118%Iter:0 Progress:90.71% Training Accuracy:0.8285714285714286%Iter:0 Progress:90.75% Training Accuracy:0.8286469810489202%Iter:0 Progress:90.79% Training Accuracy:0.8287224669603525%Iter:0 Progress:90.83% Training Accuracy:0.8287978863936591%Iter:0 Progress:90.87% Training Accuracy:0.8287852112676056%Iter:0 Progress:90.91% Training Accuracy:0.8288605367355917%Iter:0 Progress:90.95% Training Accuracy:0.828891820580475%Iter:0 Progress:90.99% Training Accuracy:0.828967032967033%Iter:0 Progress:91.03% Train

Iter:1 Progress:00.03% Training Accuracy:0.8318617242815494%Iter:1 Progress:00.07% Training Accuracy:0.8319317235636969%Iter:1 Progress:00.11% Training Accuracy:0.831960049937578%Iter:1 Progress:00.15% Training Accuracy:0.8317803660565724%Iter:1 Progress:00.19% Training Accuracy:0.8317671517671518%Iter:1 Progress:00.23% Training Accuracy:0.8317123857024107%Iter:1 Progress:00.27% Training Accuracy:0.831657665143332%Iter:1 Progress:00.31% Training Accuracy:0.8317275747508306%Iter:1 Progress:00.35% Training Accuracy:0.8317974263179743%Iter:1 Progress:00.39% Training Accuracy:0.8318257261410789%Iter:1 Progress:00.43% Training Accuracy:0.8318954790543343%Iter:1 Progress:00.47% Training Accuracy:0.8318407960199005%Iter:1 Progress:00.51% Training Accuracy:0.8318690426854538%Iter:1 Progress:00.55% Training Accuracy:0.8318558409279204%Iter:1 Progress:00.59% Training Accuracy:0.8319254658385093%Iter:1 Progress:00.63% Training Accuracy:0.831953642384106%Iter:1 Progress:00.67% Tra

Iter:1 Progress:06.23% Training Accuracy:0.8350156494522691%Iter:1 Progress:06.27% Training Accuracy:0.835041063746578%Iter:1 Progress:06.31% Training Accuracy:0.8350664581704457%Iter:1 Progress:06.35% Training Accuracy:0.8350918327471668%Iter:1 Progress:06.39% Training Accuracy:0.8351171875%Iter:1 Progress:06.43% Training Accuracy:0.8351425224521671%Iter:1 Progress:06.47% Training Accuracy:0.835167837626854%Iter:1 Progress:06.51% Training Accuracy:0.8351931330472103%Iter:1 Progress:06.55% Training Accuracy:0.8352184087363494%Iter:1 Progress:06.59% Training Accuracy:0.8352046783625731%Iter:1 Progress:06.63% Training Accuracy:0.835190958690569%Iter:1 Progress:06.67% Training Accuracy:0.835255161667316%Iter:1 Progress:06.71% Training Accuracy:0.835202492211838%Iter:1 Progress:06.75% Training Accuracy:0.835266640716232%Iter:1 Progress:06.79% Training Accuracy:0.8352529182879378%Iter:1 Progress:06.83% Training Accuracy:0.8352781019058731%Iter:1 Progress:06.87% Training Acc

Iter:1 Progress:12.59% Training Accuracy:0.8388581952117864%Iter:1 Progress:12.63% Training Accuracy:0.8389175257731959%Iter:1 Progress:12.67% Training Accuracy:0.83894000736106%Iter:1 Progress:12.71% Training Accuracy:0.8389992641648271%Iter:1 Progress:12.75% Training Accuracy:0.8390216991541007%Iter:1 Progress:12.79% Training Accuracy:0.8390808823529412%Iter:1 Progress:12.83% Training Accuracy:0.8391400220507167%Iter:1 Progress:12.87% Training Accuracy:0.8391256429096253%Iter:1 Progress:12.91% Training Accuracy:0.839147998531032%Iter:1 Progress:12.95% Training Accuracy:0.8391703377386197%Iter:1 Progress:12.99% Training Accuracy:0.8391926605504587%Iter:1 Progress:13.03% Training Accuracy:0.8392149669845929%Iter:1 Progress:13.07% Training Accuracy:0.8391272460579391%Iter:1 Progress:13.11% Training Accuracy:0.8390029325513196%Iter:1 Progress:13.15% Training Accuracy:0.8389519970685233%Iter:1 Progress:13.19% Training Accuracy:0.8389377289377289%Iter:1 Progress:13.23% Tra

Iter:1 Progress:18.87% Training Accuracy:0.8414693593314764%Iter:1 Progress:18.91% Training Accuracy:0.8415245388096066%Iter:1 Progress:18.95% Training Accuracy:0.8415448851774531%Iter:1 Progress:18.99% Training Accuracy:0.8416%Iter:1 Progress:19.03% Training Accuracy:0.8416550764951322%Iter:1 Progress:19.07% Training Accuracy:0.8416753562738964%Iter:1 Progress:19.11% Training Accuracy:0.8416956219596943%Iter:1 Progress:19.15% Training Accuracy:0.8417158735672108%Iter:1 Progress:19.19% Training Accuracy:0.8417361111111111%Iter:1 Progress:19.23% Training Accuracy:0.8416869142658799%Iter:1 Progress:19.27% Training Accuracy:0.8416724496877168%Iter:1 Progress:19.31% Training Accuracy:0.8416926812348249%Iter:1 Progress:19.35% Training Accuracy:0.841747572815534%Iter:1 Progress:19.39% Training Accuracy:0.8418024263431543%Iter:1 Progress:19.43% Training Accuracy:0.8418225918225918%Iter:1 Progress:19.47% Training Accuracy:0.8418773813647384%Iter:1 Progress:19.51% Training Accu

Iter:1 Progress:25.23% Training Accuracy:0.8435499835037942%Iter:1 Progress:25.27% Training Accuracy:0.8435686015831134%Iter:1 Progress:25.31% Training Accuracy:0.8435872073854269%Iter:1 Progress:25.35% Training Accuracy:0.8436058009228741%Iter:1 Progress:25.39% Training Accuracy:0.8436243822075783%Iter:1 Progress:25.43% Training Accuracy:0.8436429512516469%Iter:1 Progress:25.47% Training Accuracy:0.8436944352979915%Iter:1 Progress:25.51% Training Accuracy:0.8437129690585912%Iter:1 Progress:25.55% Training Accuracy:0.8436656794998355%Iter:1 Progress:25.59% Training Accuracy:0.8435855263157894%Iter:1 Progress:25.63% Training Accuracy:0.8436040776060506%Iter:1 Progress:25.67% Training Accuracy:0.8436554898093359%Iter:1 Progress:25.71% Training Accuracy:0.8437068682221492%Iter:1 Progress:25.75% Training Accuracy:0.8437253613666229%Iter:1 Progress:25.79% Training Accuracy:0.8437766830870279%Iter:1 Progress:25.83% Training Accuracy:0.8437951411687459%Iter:1 Progress:25.87% 

Iter:1 Progress:31.39% Training Accuracy:0.8465934065934065%Iter:1 Progress:31.43% Training Accuracy:0.8465787821720026%Iter:1 Progress:31.47% Training Accuracy:0.8465327894571697%Iter:1 Progress:31.51% Training Accuracy:0.846486825595985%Iter:1 Progress:31.55% Training Accuracy:0.8465036061461273%Iter:1 Progress:31.59% Training Accuracy:0.846551724137931%Iter:1 Progress:31.63% Training Accuracy:0.8465684738326543%Iter:1 Progress:31.67% Training Accuracy:0.8465852130325815%Iter:1 Progress:31.71% Training Accuracy:0.8466332602568117%Iter:1 Progress:31.75% Training Accuracy:0.8466812773951159%Iter:1 Progress:31.79% Training Accuracy:0.8467292644757434%Iter:1 Progress:31.83% Training Accuracy:0.8467459324155194%Iter:1 Progress:31.87% Training Accuracy:0.8467625899280575%Iter:1 Progress:31.91% Training Accuracy:0.8467792370231395%Iter:1 Progress:31.95% Training Accuracy:0.8468271334792122%Iter:1 Progress:31.99% Training Accuracy:0.846875%Iter:1 Progress:32.03% Training Acc

Iter:1 Progress:37.87% Training Accuracy:0.8493277561995817%Iter:1 Progress:37.91% Training Accuracy:0.8493727598566309%Iter:1 Progress:37.95% Training Accuracy:0.8493878769782024%Iter:1 Progress:37.99% Training Accuracy:0.8494029850746269%Iter:1 Progress:38.03% Training Accuracy:0.8494180841539839%Iter:1 Progress:38.07% Training Accuracy:0.8494033412887828%Iter:1 Progress:38.11% Training Accuracy:0.849418431255592%Iter:1 Progress:38.15% Training Accuracy:0.8494335122242099%Iter:1 Progress:38.19% Training Accuracy:0.8493293591654247%Iter:1 Progress:38.23% Training Accuracy:0.849284862932062%Iter:1 Progress:38.27% Training Accuracy:0.8493297587131368%Iter:1 Progress:38.31% Training Accuracy:0.8493746277546158%Iter:1 Progress:38.35% Training Accuracy:0.8494194700803811%Iter:1 Progress:38.39% Training Accuracy:0.8494345238095238%Iter:1 Progress:38.43% Training Accuracy:0.8494793216304671%Iter:1 Progress:38.47% Training Accuracy:0.8495240928019037%Iter:1 Progress:38.51% Tr

Iter:1 Progress:44.15% Training Accuracy:0.8519406392694064%Iter:1 Progress:44.19% Training Accuracy:0.8519828815977175%Iter:1 Progress:44.23% Training Accuracy:0.851968054763263%Iter:1 Progress:44.27% Training Accuracy:0.8520102651839179%Iter:1 Progress:44.31% Training Accuracy:0.852023945267959%Iter:1 Progress:44.35% Training Accuracy:0.8520661157024794%Iter:1 Progress:44.39% Training Accuracy:0.852051282051282%Iter:1 Progress:44.43% Training Accuracy:0.8520649387638849%Iter:1 Progress:44.47% Training Accuracy:0.8520785876993167%Iter:1 Progress:44.51% Training Accuracy:0.8520637631653857%Iter:1 Progress:44.55% Training Accuracy:0.8520774046670461%Iter:1 Progress:44.59% Training Accuracy:0.8521194879089616%Iter:1 Progress:44.63% Training Accuracy:0.8521331058020478%Iter:1 Progress:44.67% Training Accuracy:0.8521751492749502%Iter:1 Progress:44.71% Training Accuracy:0.8521887436043206%Iter:1 Progress:44.75% Training Accuracy:0.8522023302074453%Iter:1 Progress:44.79% Tra

Iter:1 Progress:50.67% Training Accuracy:0.8544314153258795%Iter:1 Progress:50.71% Training Accuracy:0.854443838604144%Iter:1 Progress:50.75% Training Accuracy:0.8544289997274461%Iter:1 Progress:50.79% Training Accuracy:0.8544686648501363%Iter:1 Progress:50.83% Training Accuracy:0.8544810678289294%Iter:1 Progress:50.87% Training Accuracy:0.8544934640522875%Iter:1 Progress:50.91% Training Accuracy:0.8545330792267901%Iter:1 Progress:50.95% Training Accuracy:0.8545726728361459%Iter:1 Progress:50.99% Training Accuracy:0.8545850340136054%Iter:1 Progress:51.03% Training Accuracy:0.8546245919477693%Iter:1 Progress:51.07% Training Accuracy:0.8546641283655154%Iter:1 Progress:51.11% Training Accuracy:0.8547036432843937%Iter:1 Progress:51.15% Training Accuracy:0.8547159554226692%Iter:1 Progress:51.19% Training Accuracy:0.8547010869565217%Iter:1 Progress:51.23% Training Accuracy:0.8547405596305352%Iter:1 Progress:51.27% Training Accuracy:0.8547256925583921%Iter:1 Progress:51.31% T

Iter:1 Progress:56.63% Training Accuracy:0.8562893081761006%Iter:1 Progress:56.67% Training Accuracy:0.856300759758973%Iter:1 Progress:56.71% Training Accuracy:0.8563122053431116%Iter:1 Progress:56.75% Training Accuracy:0.8563236449332285%Iter:1 Progress:56.79% Training Accuracy:0.8563612565445026%Iter:1 Progress:56.83% Training Accuracy:0.8563726773096049%Iter:1 Progress:56.87% Training Accuracy:0.8564102564102564%Iter:1 Progress:56.91% Training Accuracy:0.8564216583834685%Iter:1 Progress:56.95% Training Accuracy:0.8564330543933054%Iter:1 Progress:56.99% Training Accuracy:0.8564705882352941%Iter:1 Progress:57.03% Training Accuracy:0.8564558285415578%Iter:1 Progress:57.07% Training Accuracy:0.8564933368173504%Iter:1 Progress:57.11% Training Accuracy:0.8565308254963427%Iter:1 Progress:57.15% Training Accuracy:0.8565682945938887%Iter:1 Progress:57.19% Training Accuracy:0.8565796344647519%Iter:1 Progress:57.23% Training Accuracy:0.8566170712607675%Iter:1 Progress:57.27% T

Iter:1 Progress:62.91% Training Accuracy:0.8577397432670526%Iter:1 Progress:62.95% Training Accuracy:0.8577000503271264%Iter:1 Progress:62.99% Training Accuracy:0.857685534591195%Iter:1 Progress:63.03% Training Accuracy:0.8576961770623742%Iter:1 Progress:63.07% Training Accuracy:0.8577068141815439%Iter:1 Progress:63.11% Training Accuracy:0.8576923076923076%Iter:1 Progress:63.15% Training Accuracy:0.8577029404372958%Iter:1 Progress:63.19% Training Accuracy:0.8577386934673367%Iter:1 Progress:63.23% Training Accuracy:0.8577493092187892%Iter:1 Progress:63.27% Training Accuracy:0.8577850326469111%Iter:1 Progress:63.31% Training Accuracy:0.8577956314335927%Iter:1 Progress:63.35% Training Accuracy:0.8577560240963855%Iter:1 Progress:63.39% Training Accuracy:0.857741530740276%Iter:1 Progress:63.43% Training Accuracy:0.8577521324636227%Iter:1 Progress:63.47% Training Accuracy:0.8577627288688237%Iter:1 Progress:63.51% Training Accuracy:0.8577983951855567%Iter:1 Progress:63.55% Tr

Iter:1 Progress:69.15% Training Accuracy:0.8590699927343183%Iter:1 Progress:69.19% Training Accuracy:0.8591041162227603%Iter:1 Progress:69.23% Training Accuracy:0.8591382231905108%Iter:1 Progress:69.27% Training Accuracy:0.8591723136495644%Iter:1 Progress:69.31% Training Accuracy:0.85915799661263%Iter:1 Progress:69.35% Training Accuracy:0.8591920657958394%Iter:1 Progress:69.39% Training Accuracy:0.8592261185006046%Iter:1 Progress:69.43% Training Accuracy:0.8591876208897485%Iter:1 Progress:69.47% Training Accuracy:0.8591491418902586%Iter:1 Progress:69.51% Training Accuracy:0.8591831802803287%Iter:1 Progress:69.55% Training Accuracy:0.8591930417975356%Iter:1 Progress:69.59% Training Accuracy:0.8592270531400966%Iter:1 Progress:69.63% Training Accuracy:0.859236899299686%Iter:1 Progress:69.67% Training Accuracy:0.8592467407049734%Iter:1 Progress:69.71% Training Accuracy:0.8592807144581222%Iter:1 Progress:69.75% Training Accuracy:0.8593146718146718%Iter:1 Progress:69.79% Tra

Iter:1 Progress:75.43% Training Accuracy:0.8610359309379375%Iter:1 Progress:75.47% Training Accuracy:0.8610683461628178%Iter:1 Progress:75.51% Training Accuracy:0.8610307835820895%Iter:1 Progress:75.55% Training Accuracy:0.8610631848915831%Iter:1 Progress:75.59% Training Accuracy:0.8610722610722611%Iter:1 Progress:75.63% Training Accuracy:0.8610347238405966%Iter:1 Progress:75.67% Training Accuracy:0.861067101584343%Iter:1 Progress:75.71% Training Accuracy:0.8610994642441183%Iter:1 Progress:75.75% Training Accuracy:0.8611318118304611%Iter:1 Progress:75.79% Training Accuracy:0.8611641443538999%Iter:1 Progress:75.83% Training Accuracy:0.8611499068901304%Iter:1 Progress:75.87% Training Accuracy:0.8611589481033279%Iter:1 Progress:75.91% Training Accuracy:0.8611447184737087%Iter:1 Progress:75.95% Training Accuracy:0.8611537566876017%Iter:1 Progress:75.99% Training Accuracy:0.8611627906976744%Iter:1 Progress:76.03% Training Accuracy:0.8611718205068589%Iter:1 Progress:76.07% T

Iter:1 Progress:81.35% Training Accuracy:0.8626522327469553%Iter:1 Progress:81.39% Training Accuracy:0.8626606538895152%Iter:1 Progress:81.43% Training Accuracy:0.8626916140667268%Iter:1 Progress:81.47% Training Accuracy:0.8627000225377507%Iter:1 Progress:81.51% Training Accuracy:0.8627309598918431%Iter:1 Progress:81.55% Training Accuracy:0.8627393557107457%Iter:1 Progress:81.59% Training Accuracy:0.8626801801801802%Iter:1 Progress:81.63% Training Accuracy:0.862666066201306%Iter:1 Progress:81.67% Training Accuracy:0.8626969833408374%Iter:1 Progress:81.71% Training Accuracy:0.862660364618501%Iter:1 Progress:81.75% Training Accuracy:0.8626687668766877%Iter:1 Progress:81.79% Training Accuracy:0.8626771653543307%Iter:1 Progress:81.83% Training Accuracy:0.8627080521817364%Iter:1 Progress:81.87% Training Accuracy:0.8627164380481224%Iter:1 Progress:81.91% Training Accuracy:0.8627473021582733%Iter:1 Progress:81.95% Training Accuracy:0.8627331984715666%Iter:1 Progress:81.99% Tr

Iter:1 Progress:87.39% Training Accuracy:0.863969465648855%Iter:1 Progress:87.43% Training Accuracy:0.8639555167902312%Iter:1 Progress:87.47% Training Accuracy:0.8639633747547416%Iter:1 Progress:87.51% Training Accuracy:0.8639930252833479%Iter:1 Progress:87.55% Training Accuracy:0.8640008716495968%Iter:1 Progress:87.59% Training Accuracy:0.86400871459695%Iter:1 Progress:87.63% Training Accuracy:0.863994772380745%Iter:1 Progress:87.67% Training Accuracy:0.8639808362369338%Iter:1 Progress:87.71% Training Accuracy:0.8640104506858263%Iter:1 Progress:87.75% Training Accuracy:0.864018284719199%Iter:1 Progress:87.79% Training Accuracy:0.8640478781284004%Iter:1 Progress:87.83% Training Accuracy:0.8640774586597041%Iter:1 Progress:87.87% Training Accuracy:0.8640852730041332%Iter:1 Progress:87.91% Training Accuracy:0.8640930839495433%Iter:1 Progress:87.95% Training Accuracy:0.864122635355512%Iter:1 Progress:87.99% Training Accuracy:0.8641521739130434%Iter:1 Progress:88.03% Traini

Iter:1 Progress:93.35% Training Accuracy:0.8648077735530207%Iter:1 Progress:93.39% Training Accuracy:0.8648363252375924%Iter:1 Progress:93.43% Training Accuracy:0.8648648648648649%Iter:1 Progress:93.47% Training Accuracy:0.8648722820350433%Iter:1 Progress:93.51% Training Accuracy:0.8648796960742929%Iter:1 Progress:93.55% Training Accuracy:0.8649082084828023%Iter:1 Progress:93.59% Training Accuracy:0.8649367088607595%Iter:1 Progress:93.63% Training Accuracy:0.8649441046192786%Iter:1 Progress:93.67% Training Accuracy:0.8649725854070013%Iter:1 Progress:93.71% Training Accuracy:0.8649799704828168%Iter:1 Progress:93.75% Training Accuracy:0.8650084317032041%Iter:1 Progress:93.79% Training Accuracy:0.8650368809272919%Iter:1 Progress:93.83% Training Accuracy:0.8650442477876106%Iter:1 Progress:93.87% Training Accuracy:0.8650305456077523%Iter:1 Progress:93.91% Training Accuracy:0.8650589721988206%Iter:1 Progress:93.95% Training Accuracy:0.8650663297536323%Iter:1 Progress:93.99% 

#### Comparing word embeddings

In [14]:
from collections import Counter
import math 

def similar(target='beautiful'):
    target_index = word2index[target]
    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - (weights_0_1[target_index])
        squared_difference = raw_difference * raw_difference
        scores[word] = -math.sqrt(sum(squared_difference))

    return scores.most_common(10)

In [15]:
print(similar('beautiful'))

[('beautiful', -0.0), ('entertaining', -0.7465565551463458), ('recommended', -0.779555108217311), ('freedom', -0.7808616525739756), ('plenty', -0.7811375592487844), ('haunting', -0.7839631582799089), ('deserves', -0.7874797074290336), ('outstanding', -0.7880263885923592), ('atmosphere', -0.7992486752172738), ('great', -0.8003973017055172)]


In [16]:
print(similar('terrible'))

[('terrible', -0.0), ('horrible', -0.7487723560249665), ('boring', -0.7624635945633019), ('disappointment', -0.7691802437004109), ('avoid', -0.783921611381947), ('fails', -0.789242933233881), ('annoying', -0.7980921858729391), ('poor', -0.8022217860556791), ('badly', -0.8234072099106997), ('laughable', -0.8248411889308389)]


#### Filling in the blank

In [17]:
import sys,random,math
from collections import Counter
import numpy as np

In [18]:
np.random.seed(1)
random.seed(1)
f = open('reviews.txt')
raw_reviews = f.readlines()
f.close()


tokens = list(map(lambda x:(x.split(" ")),raw_reviews))
wordcnt = Counter()
for sent in tokens:
    for word in sent:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0],wordcnt.most_common())))

word2index = {}

for i,word in enumerate(vocab):
    word2index[word]=i
    
concatenated = list()
input_dataset = list()
for sent in tokens:
    sent_indices = list()
    for word in sent:
        try:
            sent_indices.append(word2index[word])
            concatenated.append(word2index[word])
        except:
            ""
        input_dataset.append(sent_indices)
concatenated = np.array(concatenated)
random.shuffle(input_dataset)


In [None]:
alpha, iterations = (0.05, 2)
hidden_size,window,negative = (50,2,5)

weights_0_1 = (np.random.rand(len(vocab),hidden_size) - 0.5) * 0.2
weights_1_2 = np.random.rand(len(vocab),hidden_size)*0

layer_2_target = np.zeros(negative+1)
layer_2_target[0] = 1

def similar(target='beautiful'):
  target_index = word2index[target]

  scores = Counter()
  for word,index in word2index.items():
    raw_difference = weights_0_1[index] - (weights_0_1[target_index])
    squared_difference = raw_difference * raw_difference
    scores[word] = -math.sqrt(sum(squared_difference))
  return scores.most_common(10)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

for rev_i,review in enumerate(input_dataset * iterations):
  for target_i in range(len(review)):

    
    target_samples = [review[target_i]]+list(concatenated\
    [(np.random.rand(negative)*len(concatenated)).astype('int').tolist()])

    left_context = review[max(0,target_i-window):target_i]
    right_context = review[target_i+1:min(len(review),target_i+window)]

    layer_1 = np.mean(weights_0_1[left_context+right_context],axis=0)
    layer_2 = sigmoid(layer_1.dot(weights_1_2[target_samples].T))
    layer_2_delta = layer_2 - layer_2_target
    layer_1_delta = layer_2_delta.dot(weights_1_2[target_samples])

    weights_0_1[left_context+right_context] -= layer_1_delta * alpha
    weights_1_2[target_samples] -= np.outer(layer_2_delta,layer_1)*alpha

  if(rev_i % 250 == 0):
    sys.stdout.write('\rProgress:'+str(rev_i/float(len(input_dataset)
        *iterations)) + "   " + str(similar('terrible')))
  sys.stdout.write('\rProgress:'+str(rev_i/float(len(input_dataset)
        *iterations)))
print(similar('terrible'))

Progress:0.010621949620595341  [('terrible', -0.0), ('horrible', -2.717205586638701), ('brilliant', -3.8221067017718173), ('bad', -3.919026886107076), ('good', -4.289102034256767), ('great', -4.365336364583526), ('horrendous', -4.365421339303056), ('wonderful', -4.381293924826662), ('stupid', -4.481175930290705), ('slipshod', -4.515244992376265)])]498)]]3)]41)]])])]])]738)]]