# Download the IMDB Dataset

In [1]:
# Download reviews.txt and labels.txt from here: https://github.com/udacity/deep-learning/tree/master/sentiment-network

def pretty_print_review_and_label(i):
   print(labels[i] + "\t:\t" + reviews[i][:80] + "...")

g = open('reviews.txt','r') # What we know!
reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()

g = open('labels.txt','r') # What we WANT to know!
labels = list(map(lambda x:x[:-1].upper(),g.readlines()))
g.close()

# Capturing Word Correlation in Input Data

In [2]:
import numpy as np

onehots = {}
onehots['cat'] = np.array([1,0,0,0]) # one-hot encoding
onehots['the'] = np.array([0,1,0,0])
onehots['dog'] = np.array([0,0,1,0])
onehots['sat'] = np.array([0,0,0,1])

sentence = ['the','cat','sat']
x = onehots[sentence[0]] + \
    onehots[sentence[1]] + \
    onehots[sentence[2]]

print("Sent Encoding:" + str(x))

Sent Encoding:[1 1 0 1]


# Predicting Movie Reviews

In [51]:
import sys

f = open('reviews.txt') # load files
raw_reviews = f.readlines() # one review = one review list item
f.close()

f = open('labels.txt')
raw_labels = f.readlines()
f.close()

docs = list(map(lambda x: set(x.split(" ")), raw_reviews)) # build a set out of the reviews (sentences are sets here)

vocab = set() # build vocabulary
for doc in docs: # sent -> docs as one review is a document consisting of a few sentences
    for word in doc:
        if(len(word) > 0):
            vocab.add(word)
vocab = list(vocab)

word2index = {}
for i,word in enumerate(vocab):
    word2index[word] = i

input_dataset = list() # each list item is a review, the vector representing the review has the word indices
for doc in docs:
    doc_indices = list()
    for word in doc:
        try:
            doc_indices.append(word2index[word])
        except:
            ""
    input_dataset.append(list(set(doc_indices)))

target_dataset = list()
for label in raw_labels:
    if label == 'positive\n':
        target_dataset.append(1) # 1 for pos, 0 for neg, index here == index input_dataset
    else:
        target_dataset.append(0)

In [19]:
raw_reviews = ['bromwell high is a cartoon comedy .']
print(list(map(lambda x: set(x.split(' ')), raw_reviews)))

[{'cartoon', '.', 'comedy', 'a', 'high', 'bromwell', 'is'}]


In [32]:
# details on this

import sys

#f = open('reviews.txt') # load files
#raw_reviews = f.readlines() # one review = one review list item, this is a list
#f.close()

raw_reviews = ['bromwell high is a cartoon comedy .  programs about school life  such as  teachers  . my   hetic teachers  pomp  the pettiness of the whole situation  all remind me of the schools i knew and their students . when i saw the episode in which a student repeatedly tried to burn down the school  i immediately recalled .. at . . high . a classic line . student welcome to bromwell high . i expect that many adults of my age think that bromwell high is far fetched . what a pity that it isn  t ']

raw_labels = ['positive\n']

docs = list(map(lambda x: set(x.split(" ")), raw_reviews)) # build a set out of the reviews (sentences are sets here)

print(docs)

vocab = set() # build vocabulary
for doc in docs: # sent -> docs as one review is a document consisting of a few sentences
    print('doc:', doc)
    for word in list(doc):
        print('word:', word)
        if(len(word) > 0):
            vocab.add(word)
vocab = list(vocab)

print('vocab:', vocab)

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
    
print('word indices: ', word2index)

input_dataset = list() # each list item is a review, the vector representing the review has the word indices
for doc in docs:
    doc_indices = list()
    for word in doc:
        try:
            doc_indices.append(word2index[word])
        except:
            ""
    input_dataset.append(list(set(doc_indices)))
    
print('doc indices: ', doc_indices)
print('input ds: ', input_dataset)

target_dataset = list()
for label in raw_labels:
    if label == 'positive\n':
        target_dataset.append(1) # 1 for pos, 0 for neg, index here == index input_dataset
    else:
        target_dataset.append(0)
        
print('target_dataset: ', target_dataset)

[{'', 'life', 'far', '.', 'at', 'i', 'tried', 'high', 'bromwell', 'programs', 'students', 'is', 'immediately', 'my', 'when', 'all', 'line', 't', 'such', 'pomp', 'isn', 'welcome', 'age', 'fetched', 'a', 'burn', 'down', 'pettiness', 'classic', 'student', 'and', 'many', 'of', 'which', 'episode', 'it', 'cartoon', 'remind', 'me', 'saw', 'schools', 'to', 'comedy', 'school', 'expect', 'that', 'recalled', 'adults', 'hetic', 'as', 'repeatedly', '..', 'teachers', 'whole', 'knew', 'in', 'about', 'their', 'the', 'what', 'situation', 'think', 'pity'}]
doc: {'', 'life', 'far', '.', 'at', 'i', 'tried', 'high', 'bromwell', 'programs', 'students', 'is', 'immediately', 'my', 'when', 'all', 'line', 't', 'such', 'pomp', 'isn', 'welcome', 'age', 'fetched', 'a', 'burn', 'down', 'pettiness', 'classic', 'student', 'and', 'many', 'of', 'which', 'episode', 'it', 'cartoon', 'remind', 'me', 'saw', 'schools', 'to', 'comedy', 'school', 'expect', 'that', 'recalled', 'adults', 'hetic', 'as', 'repeatedly', '..', 'teac

In [4]:
# NN - for details on weight updates, see https://github.com/iamtrask/Grokking-Deep-Learning/issues/50
import numpy as np

np.random.seed(1)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoidd(x):
    #s = sigmoid(x)
    #return s * (1 - s)
    return 0.25 # test deriv approx

def relu(x):
    return (x>0) * x

def relud(x):
    return x>0

alpha, iterations = (0.01, 50)
hidden_size = 100

weights_0_1 = 0.2*np.random.random((len(vocab), hidden_size)) - 0.1 # vocab to hidden
weights_1_2 = 0.2*np.random.random((hidden_size, 1)) - 0.1 # hidden to 1

correct, total = (0, 0)

for iter in range(iterations):
    
    # train on first 24,000
    for i in range(len(input_dataset) - 1000):
        
        #dropout_mask = np.random.randint(2, size = layer_1.shape)
        #dropout_mask = np.random.randint(2, size = layer_2.shape)

        x, y = (input_dataset[i], target_dataset[i]) # i-th element of dataset
        
        layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0)) # embed + sigmoid: one-hot encoding: summation faster than product
        #layer_1 = relu(np.sum(weights_0_1[x], axis=0))
        
        #layer_1 *= dropout_mask * 2
        
        layer_2 = sigmoid(np.dot(layer_1, weights_1_2)) # linear + softmax
        #layer_2 = relu(np.dot(layer_1, weights_1_2))
        
        #layer_2 *= dropout_mask * 2

        layer_2_delta = layer_2 - y # compare pred with truth = error
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) # backprop
        
        #layer_1_delta *= dropout_mask
        #layer_2_delta *= dropout_mask

        #weights_0_1[x] -= layer_1_delta * alpha # weight updates
        #weights_1_2 -= np.outer(layer_1, layer_2_delta) * alpha
        
        dw12 = alpha * np.outer(layer_2_delta, sigmoidd(np.dot(layer_1, weights_1_2)))
        weights_1_2 -= dw12
        weights_0_1[x] -= np.dot(dw12, weights_1_2.T) * sigmoidd(np.sum(weights_0_1[x], axis=0)) # weight updates
        # there would be * x but this is implied in the weights_0_1[x] (x is one-hot so only those indices with x!=0 get updates)
        
        if(np.abs(layer_2_delta) < 0.5): # is it a close fit?
            correct += 1 # increment correct guesses
            
        total += 1 # total guesses
        
        if(i % 20 == 1):
            progress = str(i/float(len(input_dataset)))
            sys.stdout.write('\rIter:'+str(iter)\
                             +' Progress:'+progress[2:4]\
                             +'.'+progress[4:6]\
                             +'% Training Accuracy:'\
                             + str(correct/float(total)) + '%')
    
    print()
    
correct, total = (0, 0)

for i in range(len(input_dataset)-1000, len(input_dataset)): # use the last 1000 as test

    x = input_dataset[i]
    y = target_dataset[i]

    layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0))
    layer_2 = sigmoid(np.dot(layer_1, weights_1_2))
    #layer_1 = relu(np.sum(weights_0_1[x], axis=0))
    #layer_2 = relu(np.dot(layer_1, weights_1_2))
    
    if(np.abs(layer_2 - y) < 0.5):
        correct += 1
    total += 1
print("Test Accuracy:" + str(correct / float(total)))

Iter:0 Progress:95.92% Training Accuracy:0.5710532899674756%%
Iter:1 Progress:95.92% Training Accuracy:0.6496186069776166%
Iter:2 Progress:95.92% Training Accuracy:0.68688005334667%5%
Iter:3 Progress:95.92% Training Accuracy:0.7099143589423017%
Iter:4 Progress:95.92% Training Accuracy:0.7259422246670334%
Iter:5 Progress:95.92% Training Accuracy:0.7378700115292189%
Iter:6 Progress:95.92% Training Accuracy:0.7473062590039409%
Iter:7 Progress:95.92% Training Accuracy:0.7548624350199498%
Iter:8 Progress:95.92% Training Accuracy:0.7610310118435796%
Iter:9 Progress:95.92% Training Accuracy:0.766365810769141%%
Iter:10 Progress:95.92% Training Accuracy:0.7709275632429484%
Iter:11 Progress:95.92% Training Accuracy:0.7749338500322936%
Iter:12 Progress:95.92% Training Accuracy:0.778532094800341%%
Iter:13 Progress:95.92% Training Accuracy:0.781747236459096%%
Iter:14 Progress:95.92% Training Accuracy:0.7846586773783134%
Iter:15 Progress:95.92% Training Accuracy:0.7873233641160263%
Iter:16 Progress:

In [34]:
# NN (original version)
import numpy as np

np.random.seed(1)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoidd(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return (x>0) * x

def relud(x):
    return x>0

alpha, iterations = (0.01, 2)
hidden_size = 100

weights_0_1 = 0.2*np.random.random((len(vocab), hidden_size)) - 0.1 # vocab to hidden
weights_1_2 = 0.2*np.random.random((hidden_size, 1)) - 0.1 # hidden to 1

correct, total = (0, 0)

for iter in range(iterations):
    
    # train on first 24,000
    for i in range(len(input_dataset) - 1000):

        x, y = (input_dataset[i], target_dataset[i]) # i-th element of dataset
        
        layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0)) # embed + sigmoid: one-hot encoding: summation faster than product
        layer_2 = sigmoid(np.dot(layer_1, weights_1_2)) # linear + softmax

        layer_2_delta = layer_2 - y # compare pred with truth = error
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) # backprop
        
        # W_1_2-=alpha * L2delta * sigmoidd(L1 * W_1_2) * L1 -> alpha * L2delta * L1
        # W_0_1-=alpha * L1delta * sigmoidd(L1 * W_1_2) * sigmoidd(W_0_1 * x) * x -> alpha * L1delta # x dropped since W_0_1[x]
        weights_0_1[x] -= layer_1_delta * alpha # weight updates: sig' = approx 1/4 (Taylor exp. 1/2+x/4+-...)
        weights_1_2 -= np.outer(layer_1, layer_2_delta) * alpha
        
        if(np.abs(layer_2_delta) < 0.5): # is it a close fit?
            correct += 1 # increment correct guesses
            
        total += 1 # total guesses
        
        if(i % 20 == 1):
            progress = str(i/float(len(input_dataset)))
            sys.stdout.write('\rIter:'+str(iter)\
                             +' Progress:'+progress[2:4]\
                             +'.'+progress[4:6]\
                             +'% Training Accuracy:'\
                             + str(correct/float(total)) + '%')
    
    print()
    
correct, total = (0, 0)

for i in range(len(input_dataset)-1000, len(input_dataset)): # use the last 1000 as test

    x = input_dataset[i]
    y = target_dataset[i]

    layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0))
    layer_2 = sigmoid(np.dot(layer_1, weights_1_2))
    
    if(np.abs(layer_2 - y) < 0.5):
        correct += 1
    total += 1
print("Test Accuracy:" + str(correct / float(total)))

Iter:0 Progress:95.92% Training Accuracy:0.8334584271536986%%
Iter:1 Progress:95.92% Training Accuracy:0.8669709474386228%
Test Accuracy:0.851


In [5]:
tokens[0]

{'',
 '\n',
 '.',
 'a',
 'about',
 'adults',
 'age',
 'all',
 'and',
 'as',
 'at',
 'believe',
 'bromwell',
 'burn',
 'can',
 'cartoon',
 'classic',
 'closer',
 'comedy',
 'down',
 'episode',
 'expect',
 'far',
 'fetched',
 'financially',
 'here',
 'high',
 'i',
 'immediately',
 'in',
 'insightful',
 'inspector',
 'is',
 'isn',
 'it',
 'knew',
 'lead',
 'life',
 'line',
 'm',
 'many',
 'me',
 'much',
 'my',
 'of',
 'one',
 'other',
 'pathetic',
 'pettiness',
 'pity',
 'pomp',
 'profession',
 'programs',
 'ran',
 'reality',
 'recalled',
 'remind',
 'repeatedly',
 'right',
 's',
 'sack',
 'same',
 'satire',
 'saw',
 'school',
 'schools',
 'scramble',
 'see',
 'situation',
 'some',
 'student',
 'students',
 'such',
 'survive',
 't',
 'teachers',
 'teaching',
 'than',
 'that',
 'the',
 'their',
 'think',
 'through',
 'time',
 'to',
 'tried',
 'welcome',
 'what',
 'when',
 'which',
 'who',
 'whole',
 'years',
 'your'}

# Comparing Word Embeddings

In [35]:
from collections import Counter
import math 

def similar(weights_0_1, target='beautiful'):
    target_index = word2index[target]
    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - (weights_0_1[target_index]) # just the differences in weights
        squared_difference = raw_difference * raw_difference
        scores[word] = -math.sqrt(sum(squared_difference)) # Euclidean

    return scores.most_common(10)

In [36]:
print(similar(weights_0_1, 'beautiful'))

[('beautiful', -0.0), ('tragic', -0.7306265654065514), ('enjoyed', -0.7632868322705841), ('driven', -0.7686305811580804), ('fun', -0.7714824014847391), ('sweet', -0.7837695989832669), ('heart', -0.7844109735762739), ('realistic', -0.7901695264557109), ('surprisingly', -0.7926595452094244), ('captures', -0.792843332954173)]


In [37]:
print(similar(weights_0_1, 'terrible'))

[('terrible', -0.0), ('annoying', -0.7566043081757956), ('boring', -0.7707002608577496), ('dull', -0.7836198932116197), ('lame', -0.7974698427524632), ('horrible', -0.8078527412982647), ('poor', -0.8098066367432781), ('disappointing', -0.8165757704189643), ('disappointment', -0.8484309632881819), ('fails', -0.8515951157533852)]


# Filling in the Blank

In [43]:
# remove focus term from training phrase and negative sampling
# test case
import sys,random,math
from collections import Counter # elements are stored as dictionary keys and their counts are stored as dictionary values
import numpy as np

np.random.seed(1)
random.seed(1)

raw_reviews = ['bromwell high is a cartoon comedy .  programs about school life  such as  teachers  . my   hetic teachers  pomp  the pettiness of the whole situation  all remind me of the schools i knew and their students . when i saw the episode in which a student repeatedly tried to burn down the school  i immediately recalled .. at . . high . a classic line . student welcome to bromwell high . i expect that many adults of my age think that bromwell high is far fetched . what a pity that it isn  t ']

docs = list(map(lambda x:(x.split(" ")), raw_reviews))

wordcnt = Counter()
for doc in docs:
    for word in doc:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0], wordcnt.most_common()))) # vocab now sorted by frequency

print('vocabulary: ', vocab)

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
    
print('wordidx: ', word2index)

concatenated = list()
input_dataset = list()
for doc in docs:
    doc_indices = list()
    for word in doc:
        try:
            doc_indices.append(word2index[word])
            concatenated.append(word2index[word])
        except:
            ""
    input_dataset.append(doc_indices)
    
    print('doc indices:', doc_indices)
    
concatenated = np.array(concatenated)
random.shuffle(input_dataset)

print('concatenated:', concatenated)
print('input_dataset:', input_dataset)

vocabulary:  ['', 'life', 'far', 'at', '.', 'tried', 'i', 'programs', 'bromwell', 'high', 'students', 'is', 'immediately', 'my', 'when', 'all', 'line', 't', 'such', 'pomp', 'isn', 'welcome', 'age', 'fetched', 'a', 'burn', 'down', 'pettiness', 'classic', 'student', 'and', 'many', 'of', 'which', 'episode', 'it', 'cartoon', 'remind', 'me', 'saw', 'schools', 'expect', 'comedy', 'school', 'to', 'that', 'recalled', 'adults', 'hetic', 'as', 'repeatedly', '..', 'teachers', 'whole', 'knew', 'in', 'about', 'their', 'the', 'what', 'situation', 'think', 'pity']
wordidx:  {'': 0, 'life': 1, 'far': 2, 'at': 3, '.': 4, 'tried': 5, 'i': 6, 'programs': 7, 'bromwell': 8, 'high': 9, 'students': 10, 'is': 11, 'immediately': 12, 'my': 13, 'when': 14, 'all': 15, 'line': 16, 't': 17, 'such': 18, 'pomp': 19, 'isn': 20, 'welcome': 21, 'age': 22, 'fetched': 23, 'a': 24, 'burn': 25, 'down': 26, 'pettiness': 27, 'classic': 28, 'student': 29, 'and': 30, 'many': 31, 'of': 32, 'which': 33, 'episode': 34, 'it': 35, '

In [52]:
# remove focus term from training phrase and negative sampling
import sys,random,math
from collections import Counter # elements are stored as dictionary keys and their counts are stored as dictionary values
import numpy as np

np.random.seed(1)
random.seed(1)

f = open('reviews.txt')
raw_reviews = f.readlines()
f.close()

docs = list(map(lambda x:(x.split(" ")), raw_reviews))

wordcnt = Counter()
for doc in docs:
    for word in doc:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0], wordcnt.most_common())))

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i

concatenated = list()
input_dataset = list()
for doc in docs:
    doc_indices = list()
    for word in doc:
        try:
            doc_indices.append(word2index[word])
            concatenated.append(word2index[word])
        except:
            ""
    input_dataset.append(doc_indices)
    
concatenated = np.array(concatenated)
random.shuffle(input_dataset)

In [45]:
(np.random.rand(negative)*len(concatenated)).astype('int').tolist()

[3110699, 5373129, 853, 2255194, 1094698]

In [54]:
# NN with word identity and negative sampling

alpha, iterations = (0.05, 2)
hidden_size, window, negative = (40, 4, 5) # negative = size of the subset

weights_0_1 = (np.random.rand(len(vocab), hidden_size) - 0.5) * 0.2
weights_1_2 = np.random.rand(len(vocab), hidden_size)*0

layer_2_target = np.zeros(negative + 1)
layer_2_target[0] = 1

def similar(target='beautiful'):
    target_index = word2index[target]
    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - (weights_0_1[target_index])
        squared_difference = raw_difference * raw_difference
        scores[word] = -math.sqrt(sum(squared_difference))
    return scores.most_common(10)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

for rev_i, review in enumerate(input_dataset * iterations):
    for target_i in range(len(review)):

        # since it's really expensive to predict every vocabulary
        # we're only going to predict a random subset
        target_samples = [review[target_i]] + list(concatenated\
                                                 [(np.random.rand(negative)*len(concatenated)).astype('int').tolist()])
        # this is the target word (for each word in the review) plus a random sample of 5 words
        # something like [1, 37015, 1, 11752, 51361, 61047]
        
        left_context = review[max(0, target_i - window):target_i]
        right_context = review[target_i+1:min(len(review), target_i + window)]

        #layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0))
        #layer_2 = sigmoid(np.dot(layer_1, weights_1_2))
        layer_1 = np.mean(weights_0_1[left_context + right_context], axis=0) # mean over window of W01
        layer_2 = sigmoid(layer_1.dot(weights_1_2[target_samples].T))
        layer_2_delta = layer_2 - layer_2_target # weight updates like above: sigmoidd assumed to be constant
        layer_1_delta = layer_2_delta.dot(weights_1_2[target_samples])

        weights_0_1[left_context + right_context] -= layer_1_delta * alpha
        weights_1_2[target_samples] -= np.outer(layer_2_delta,layer_1) * alpha

    if(rev_i % 250 == 0):
        sys.stdout.write('\rProgress:'+str(rev_i/float(len(input_dataset)
                                                       *iterations)) + "   " + str(similar('terrible')))
    sys.stdout.write('\rProgress:'+str(rev_i/float(len(input_dataset)
                                                   *iterations)))

print(similar('terrible'))

Progress:0.99998 [('terrible', -0.0), ('horrible', -3.865448020467825), ('fantastic', -4.663143498112564), ('dreadful', -4.715999496121085), ('dire', -4.7468630196928165), ('abysmal', -4.9823417743379235), ('marvelous', -5.024885049664849), ('atrocious', -5.178928121556328), ('phenomenal', -5.188060693819728), ('horrendous', -5.2591910675891755)]]255495)]4)][('terrible', -0.0), ('horrible', -3.765849644584859), ('dreadful', -4.528184528887639), ('dire', -4.880631539903571), ('fantastic', -4.9519905798824775), ('phenomenal', -5.074161569728423), ('abysmal', -5.086930645748105), ('marvelous', -5.122752442871552), ('horrid', -5.286144172273731), ('horrendous', -5.313135003074647)]


# King - Man + Woman ~= Queen

In [55]:
def analogy(positive=['terrible','good'],negative=['bad']):
    
    norms = np.sum(weights_0_1 * weights_0_1,axis=1)
    norms.resize(norms.shape[0],1)
    
    normed_weights = weights_0_1 * norms
    
    query_vect = np.zeros(len(weights_0_1[0]))
    for word in positive:
        query_vect += normed_weights[word2index[word]] # add this vector
    for word in negative:
        query_vect -= normed_weights[word2index[word]] # and subtract this (the negative) vector
    
    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - query_vect
        squared_difference = raw_difference * raw_difference
        scores[word] = -math.sqrt(sum(squared_difference))
        
    return scores.most_common(10)[1:]

In [56]:
analogy(['terrible','good'],['bad'])

[('perfect', -514.357054637724),
 ('superb', -514.6100564491167),
 ('decent', -514.8468298800373),
 ('stunning', -515.0387448848408),
 ('wonderful', -515.2863004152655),
 ('nice', -515.4087747709082),
 ('terrific', -515.6317972525602),
 ('excellent', -515.7388992694316),
 ('pleasant', -515.7514889576819)]

In [57]:
analogy(['elizabeth','he'],['she'])

[('j', -578.7565848638679),
 ('u', -579.1389356816334),
 ('dr', -579.2460586812555),
 ('jr', -579.249838158948),
 ('ms', -579.5895941600976),
 ('p', -579.9270204252564),
 ('smith', -579.9936149731445),
 ('v', -580.2005067038277),
 ('mr', -580.240517452727)]

In [58]:
analogy(['powerful','weak'],['bad'])

[('tale', -559.2467961945546),
 ('complex', -559.3350441577724),
 ('intense', -559.3929135314795),
 ('fascinating', -559.5849700874645),
 ('compelling', -559.755543066805),
 ('wit', -559.8002931248143),
 ('powerful', -559.8481473513584),
 ('tension', -559.8865856219109),
 ('poignant', -559.9413573579982)]