* Load PMI matrix
* Load dictionary
* Calculate scores
* Create plots

In [1]:
import numpy as np
def load_pmi(path):
    pmi = np.load(path)
    print('Loaded {}'.format(path))
    return pmi

import pickle
def load_dictionary(path):
    fname = open(path, 'rb')
    data = pickle.load(fname)
    return data

def calc_score_v2(pmi, d, sentence_a, sentence_b):
    s = 0
    pairs = 0
    for word_1 in sentence_a.split():
        for word_2 in sentence_b.split():
            x = d.token2id.get(word_1,-1)
            y = d.token2id.get(word_2,-1)
            if x == -1 or y == -1:
                continue
            s += pmi[x][y]
            pairs += 1
    with np.errstate(divide='ignore', invalid='ignore'):
        c = np.true_divide(s, pairs)
    return c

In [2]:
def interpolate(lex, removed_indices):
    for index in removed_indices:
        if index < len(lex):
            lex = np.insert(lex, index, lex[index - 1])
    return lex

In [3]:
def get_all_scores(pmi, d, all_sentences, k):
    successive = []
    for i in range(len(all_sentences) - k):
        successive.append(calc_score_v2(pmi, d, all_sentences[i], all_sentences[i+k]))
    return successive

In [4]:
def get_all_sentences(data):
    return [line for line in data.split('\n') if len(line)>0]

In [5]:
def read_txt(fname):
    with open(fname, 'r') as f:
        all = f.read()
    return all

In [6]:
import os
import pandas as pd

In [7]:
from numpy import dot
from numpy.linalg import norm
def cos_sim(a, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        c = np.true_divide(dot(a, b), norm(a)*norm(b))
    return c
#     return dot(a, b)/(norm(a)*norm(b))

In [8]:
def iterator(path):
    for fx in os.listdir(path):
        if fx.endswith('.pkl'):
            d = load_dictionary(path+fx)
            
        if fx.endswith('_norm.npy'):
            pmi = load_pmi(path+fx)
            
        if fx.endswith('lexical.txt'):
            data = read_txt(path+fx)
            sentences = get_all_sentences(data)
            
    return pmi, d, sentences

In [9]:
import time

In [20]:
def generator(path, k):
    pmi, d, sentences = iterator(path)
    parent_dir = os.path.basename(os.path.dirname(path))
#     name = parent_dir + '_lexical_wt_pmi_norm.npy'
    
#     lexical = get_all_scores(pmi, d, sentences, k)
#     np.save(path + name, lexical)
    
#     name = parent_dir + '_lexical_wt_pmi_norm'
#     df = pd.DataFrame(lexical)
#     df.to_csv(path + name+'.csv')
#     print("Created {}.csv".format(name))
    
    print('Begin generating SSM (Lexical Weights)')
    start = time.time()
    ssm = generate_ssm(pmi, d, sentences)
    end = time.time()
    
    print('SSM (Lexical Weights) generated in {}'.format(end-start))
    np.save(path + parent_dir + '_pmi_norm_ssm_lex_wt.npy', ssm)
    

In [11]:
def process_v2(fname):
    all_data = get_data(fname)
    all_data = unidecode.unidecode(all_data)
    sentences = make_sentences(all_data)
    clean_sentences = []
    removed_sentences = []
    for i, sentence in enumerate(sentences):
        t = remove_punc_clean(sentence)
        if len(t) > 0:
            clean_sentences.append(t)
        else:
            removed_sentences.append(i)

    # write_to_file_lexical(clean_sentences, fname)
    print('Done processing', fname)
    return removed_sentences

In [12]:
from itertools import combinations
def generate_ssm(pmi, d, sentences):
    slen = len(sentences)
    ssm = np.ones((slen, slen))
    for i, j in combinations(range(slen), 2):
        score = calc_score_v2(pmi, d, sentences[i], sentences[j])
        ssm[i, j] = score
        ssm[j, i] = score
    return ssm

In [13]:
def similarity_bwn_word_vectors(pmi, d, sentence_a, sentence_b):
    s = 0
    pairs = 0
    for word_1 in sentence_a.split():
        for word_2 in sentence_b.split():
            x = d.token2id.get(word_1,-1)
            y = d.token2id.get(word_2,-1)
            if x == -1 or y == -1:
                continue
            s += cos_sim(pmi[x], pmi[y])
            pairs += 1
    with np.errstate(divide='ignore', invalid='ignore'):
        c = np.true_divide(s, pairs)
    return c

In [14]:
def generate_lexical_vectors(pmi, d, sentences, k):
    lex_vectors = []
    for i in range(len(sentences) - k):
        lex_vectors.append(similarity_bwn_word_vectors(pmi, d, sentences[i], sentences[i+k]))
    return np.asarray(lex_vectors)

In [15]:
def generate_ssm_lex_vectors(pmi, d, sentences):
    slen = len(sentences)
    ssm = np.ones((slen, slen))
    for i, j in combinations(range(slen), 2):
        score = similarity_bwn_word_vectors(pmi, d, sentences[i], sentences[j])
#         print('{} {} {:.2f}'.format(i,j, score))
        ssm[i, j] = score
        ssm[j, i] = score
    return ssm

In [17]:
def save_mat(path, out_path = path):
    for fx in os.listdir(path):
        if fx.endswith('.npy'):
            name = fx[:-4]
            print("Loaded "+fx)
            embed = np.load(path+fx)
            # print(embed)

            # sns.histplot(embed)
            book, method = get_embed_method_and_name(name)

            title = book.title() + ' ' + label(method)
            savemat(out_path+title+'.mat', mdict={'values': embed})

NameError: name 'path' is not defined

### This cell contains all the code to generate SSMs using lexical wt. normalized

In [17]:
path = '../final/lexical results/a christmas carol/'
k = 1
generator(path, k)

path = '../final/lexical results/metamorphosis/'
k = 1
generator(path, k)

path = '../final/lexical results/heart of darkness/'
k = 1
generator(path, k)

path = '../final/lexical results/the prophet/'
k = 1
generator(path, k)

Loaded ../final/lexical results/a christmas carol/PMI_norm.npy
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 177.1595721244812
Loaded ../final/lexical results/metamorphosis/metamorphosisPMI_norm.npy
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 69.24088335037231
Loaded ../final/lexical results/heart of darkness/heart of darkness PMI_norm.npy
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 261.0327205657959
Loaded ../final/lexical results/the prophet/the prophetPMI_norm.npy
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 20.85270357131958


In [19]:
path = '../final/lexical results/a christmas carol/'
k = 1
# generator(path, k)
parent_dir = os.path.basename(os.path.dirname(path))
pmi, d, sentences = iterator(path)
ssm = generate_ssm(pmi, d, sentences)
np.save(path+'_ssm_lex_wt.npy', ssm)

Loaded ../final/lexical results/a christmas carol/a christmas carol_norm.npy
Created a christmas carol_lexical_wt_pmi_norm.csv
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 152.45993733406067


In [34]:
# Lexical vectors
path = '../final/lexical results/a christmas carol/'
k = 1

pmi, d, sentences = iterator(path)
lex = generate_lexical_vectors(pmi, d, sentences, k)
np.save(path + '_pmi_norm_lex_vect.npy', lex)

Loaded ../final/lexical results/a christmas carol/a christmas carol_norm.npy


In [24]:
path = '../final/lexical results/metamorphosis/'
k = 1
generator(path, k)


# pmi, d, sentences = iterator(path)
# ssm = generate_ssm(pmi, d, sentences)
# np.save(path+'new_ssmv2.npy', ssm)

Loaded ../final/lexical results/metamorphosis/metamorphosis_pmi_norm.npy
Created metamorphosis_lexical_wt_pmi_norm.csv
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 73.9301586151123


In [33]:
# Lexical vectors
path = '../final/lexical results/metamorphosis/'
k = 1

pmi, d, sentences = iterator(path)
lex = generate_lexical_vectors(pmi, d, sentences, k)
np.save(path + '_pmi_norm_lex_vect.npy', lex)

Loaded ../final/lexical results/metamorphosis/metamorphosis_pmi_norm.npy


In [25]:
path = '../final/lexical results/heart of darkness/'
k = 1
generator(path, k)

# pmi, d, sentences = iterator(path)
# ssm = generate_ssm(pmi, d, sentences)
# np.save(path+'new_ssm_v2.npy', ssm)

Loaded ../final/lexical results/heart of darkness/heart of darkness_norm.npy
Created heart of darkness_lexical_wt_pmi_norm.csv
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 279.6873469352722


In [32]:
path = '../final/lexical results/heart of darkness/'
k = 1

pmi, d, sentences = iterator(path)
lex = generate_lexical_vectors(pmi, d, sentences, k)
np.save(path + '_pmi_norm_lex_vect.npy', lex)

Loaded ../final/lexical results/heart of darkness/heart of darkness_norm.npy


In [29]:
path = '../final/lexical results/the prophet/'
k = 1
generator(path, k)

# pmi, d, sentences = iterator(path)
# ssm = generate_ssm(pmi, d, sentences)
# np.save(path+'new_ssm_v2.npy', ssm)

Loaded ../final/lexical results/the prophet/the prophet_norm.npy
Created the prophet_lexical_wt_pmi_norm.csv
Begin generating SSM (Lexical Weights)
SSM (Lexical Weights) generated in 22.1460120677948


In [31]:
# Lexical vectors
path = '../final/lexical results/the prophet/'
k = 1

pmi, d, sentences = iterator(path)
lex = generate_lexical_vectors(pmi, d, sentences, k)
np.save(path + '_pmi_norm_lex_vect.npy', lex)

Loaded ../final/lexical results/the prophet/the prophet_norm.npy


In [19]:
path = '../final/lexical results/a christmas carol/'
parent_dir = os.path.basename(os.path.dirname(path))
pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+parent_dir + '_pmi_norm_ssm_lex_vect.npy', ssm)

path = '../final/lexical results/metamorphosis/'
parent_dir = os.path.basename(os.path.dirname(path))
pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+parent_dir + '_pmi_norm_ssm_lex_vect.npy', ssm)


path = '../final/lexical results/heart of darkness/'
parent_dir = os.path.basename(os.path.dirname(path))
pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+parent_dir + '_pmi_norm_ssm_lex_vect.npy', ssm)


path = '../final/lexical results/the prophet/'
parent_dir = os.path.basename(os.path.dirname(path))
pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+parent_dir + '_pmi_norm_ssm_lex_vect.npy', ssm)

Loaded ../final/lexical results/a christmas carol/PMI_norm.npy
Loaded ../final/lexical results/metamorphosis/metamorphosisPMI_norm.npy
Loaded ../final/lexical results/heart of darkness/heart of darkness PMI_norm.npy
Loaded ../final/lexical results/the prophet/the prophetPMI_norm.npy


In [26]:
path = '../final/lexical results/a tale of 2 cities/'
k = 1
generator(path, k)

Created a tale of 2 cities_lexical.npy


In [27]:
path = '../final/lexical results/great gatsby/'
k = 1
generator(path, k)

Created great gatsby_lexical.npy


In [29]:
path = '../final/lexical results/mysterious affair/'
k = 1
generator(path, k)

Created mysterious affair_lexical.npy


In [30]:
path = '../final/lexical results/pride and prejudice/'
k = 1
generator(path, k)

Created pride and prejudice_lexical.npy


In [None]:
path = '../final/lexical results/a christmas carol/'
for fx in os.listdir(path):
    if fx.endswith('.txt'):
        data = read_txt(path+fx)
        indices = process_v2()
        sentences = get_all_sentences(data)
            

In [54]:
path = '../final/lexical results/a christmas carol/'
pmi_carol, d_carol, sentences_carol = iterator(path)
# mean_vectors = lexical_mean_vectors(pmi_carol, d_carol, sentences_carol)

# np.save(path+'lexical_mean_vectors.npy', mean_vectors)
ssm = generate_ssm(pmi_carol, d_carol, sentences_carol)
np.save(path+'new_ssm_v2.npy', ssm)


In [15]:
max_vectors = lexical_max_vectors(pmi_carol, d_carol, sentences_carol)
np.save(path+'lexical_max_vectors.npy', mean_vectors)

In [63]:
path = '../final/lexical results/a christmas carol/'

pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+'new_ssm_lex_vect.npy', ssm)

In [64]:
path = '../final/lexical results/metamorphosis/'

pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+'new_ssm_lex_vect.npy', ssm)

In [78]:
path = '../final/lexical results/heart of darkness/'

pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+'new_ssm_lex_vect2.npy', ssm)

0 1 0.87
0 2 0.87
0 3 0.91
0 4 0.90
0 5 0.91
0 6 0.94
0 7 0.90
0 8 0.85
0 9 0.93
0 10 0.91
0 11 0.89
0 12 0.90
0 13 0.89
0 14 0.92
0 15 0.92
0 16 0.91
0 17 0.91
0 18 0.93
0 19 0.90
0 20 0.93
0 21 0.91
0 22 0.90
0 23 0.89
0 24 0.91
0 25 0.89
0 26 0.89
0 27 0.88
0 28 0.88
0 29 0.89
0 30 0.89
0 31 0.91
0 32 0.89
0 33 0.87
0 34 0.91
0 35 0.90
0 36 0.89
0 37 0.91
0 38 0.88
0 39 0.90
0 40 0.86
0 41 0.91
0 42 0.85
0 43 0.88
0 44 0.87
0 45 0.87
0 46 0.90
0 47 0.83
0 48 0.90
0 49 0.91
0 50 0.91
0 51 0.89
0 52 0.88
0 53 0.80
0 54 0.90
0 55 0.86
0 56 0.87
0 57 0.87
0 58 0.92
0 59 0.89
0 60 0.87
0 61 0.90
0 62 0.89
0 63 0.90
0 64 0.84
0 65 0.91
0 66 0.89
0 67 0.84
0 68 0.91
0 69 0.90
0 70 0.90
0 71 0.90
0 72 0.92
0 73 0.85
0 74 0.92
0 75 0.93
0 76 0.88
0 77 0.92
0 78 0.91
0 79 0.93
0 80 0.91
0 81 0.87
0 82 0.90
0 83 0.90
0 84 0.93
0 85 0.92
0 86 0.90
0 87 0.87
0 88 0.88
0 89 0.88
0 90 0.92
0 91 0.87
0 92 0.90
0 93 0.89
0 94 0.85
0 95 0.90
0 96 0.89
0 97 0.89
0 98 0.85
0 99 0.93
0 100 0.89
0 101 0.

0 768 0.88
0 769 0.92
0 770 0.87
0 771 0.84
0 772 0.89
0 773 0.91
0 774 0.93
0 775 0.88
0 776 0.91
0 777 0.90
0 778 0.89
0 779 0.85
0 780 0.90
0 781 0.91
0 782 0.77
0 783 0.91
0 784 0.85
0 785 0.91
0 786 0.93
0 787 0.91
0 788 0.89
0 789 0.92
0 790 0.91
0 791 0.92
0 792 0.87
0 793 0.92
0 794 0.89
0 795 0.90
0 796 0.93
0 797 0.87
0 798 0.91
0 799 0.87
0 800 0.90
0 801 0.93
0 802 0.91
0 803 0.90
0 804 0.88
0 805 0.86
0 806 0.87
0 807 0.90
0 808 0.88
0 809 0.89
0 810 0.90
0 811 0.88
0 812 0.91
0 813 0.92
0 814 0.89
0 815 0.91
0 816 0.83
0 817 0.79
0 818 0.88
0 819 0.84
0 820 0.90
0 821 0.87
0 822 0.91
0 823 0.90
0 824 0.92
0 825 0.90
0 826 0.89
0 827 0.89
0 828 0.94
0 829 0.92
0 830 0.90
0 831 0.87
0 832 0.89
0 833 0.92
0 834 0.90
0 835 0.93
0 836 0.88
0 837 0.89
0 838 0.95
0 839 0.86
0 840 0.92
0 841 0.86
0 842 0.84
0 843 0.91
0 844 0.93
0 845 0.89
0 846 0.89
0 847 0.91
0 848 0.89
0 849 0.89
0 850 0.92
0 851 0.89
0 852 0.91
0 853 0.89
0 854 0.90
0 855 0.87
0 856 0.90
0 857 0.89
0 858 0.90

0 1477 0.88
0 1478 0.90
0 1479 0.91
0 1480 0.92
0 1481 0.91
0 1482 0.91
0 1483 0.94
0 1484 0.87
0 1485 0.89
0 1486 0.91
0 1487 0.86
0 1488 0.90
0 1489 0.95
0 1490 0.88
0 1491 0.86
0 1492 0.91
0 1493 0.91
0 1494 0.89
0 1495 0.87
0 1496 0.89
0 1497 0.91
0 1498 0.91
0 1499 0.89
0 1500 0.91
0 1501 0.88
0 1502 0.89
0 1503 0.90
0 1504 0.86
0 1505 0.91
0 1506 0.90
0 1507 0.89
0 1508 0.94
0 1509 0.89
0 1510 0.87
0 1511 0.88
0 1512 0.92
0 1513 0.89
0 1514 0.91
0 1515 0.88
0 1516 0.91
0 1517 0.83
0 1518 0.87
0 1519 0.94
0 1520 0.89
0 1521 0.86
0 1522 0.85
0 1523 0.80
0 1524 0.90
0 1525 0.89
0 1526 0.87
0 1527 0.90
0 1528 0.89
0 1529 0.95
0 1530 0.92
0 1531 0.89
0 1532 0.92
0 1533 0.92
0 1534 0.89
0 1535 0.91
0 1536 0.94
0 1537 0.92
0 1538 0.92
0 1539 0.84
0 1540 0.91
0 1541 0.90
0 1542 0.84
0 1543 0.85
0 1544 0.77
0 1545 0.90
0 1546 0.91
0 1547 0.88
0 1548 0.87
0 1549 0.88
0 1550 0.92
0 1551 0.90
0 1552 0.90
0 1553 0.84
0 1554 0.91
0 1555 0.92
0 1556 0.89
0 1557 0.92
0 1558 0.88
0 1559 0.86
0 15

0 2189 0.91
0 2190 0.92
0 2191 0.92
0 2192 0.91
0 2193 0.84
0 2194 0.94
0 2195 0.88
0 2196 0.86
0 2197 0.93
0 2198 0.94
0 2199 0.91
0 2200 0.93
0 2201 0.94
0 2202 0.88
0 2203 0.94
0 2204 0.90
0 2205 0.90
0 2206 0.90
0 2207 0.92
0 2208 0.92
0 2209 0.87
0 2210 0.88
0 2211 0.89
0 2212 0.94
0 2213 0.91
0 2214 0.88
0 2215 0.90
0 2216 0.95
0 2217 0.90
0 2218 0.89
0 2219 0.88
0 2220 0.88
0 2221 0.75
0 2222 0.89
0 2223 0.91
0 2224 0.89
0 2225 0.89
0 2226 0.89
0 2227 0.91
0 2228 0.86
0 2229 0.93
0 2230 0.93
0 2231 0.90
0 2232 0.94
0 2233 0.91
0 2234 0.91
0 2235 0.95
0 2236 0.90
0 2237 0.91
0 2238 0.90
0 2239 0.88
0 2240 0.93
0 2241 0.91
0 2242 0.89
0 2243 0.92
0 2244 0.93
0 2245 0.91
0 2246 0.89
0 2247 0.88
0 2248 0.88
0 2249 0.94
0 2250 0.88
0 2251 0.83
0 2252 0.92
0 2253 0.93
0 2254 0.89
0 2255 0.89
0 2256 0.92
0 2257 0.88
0 2258 0.85
0 2259 0.94
0 2260 0.95
0 2261 0.91
0 2262 0.90
0 2263 0.91
0 2264 0.91
0 2265 0.90
0 2266 0.90
0 2267 0.93
0 2268 0.93
0 2269 0.89
0 2270 0.92
0 2271 0.85
0 22

1 567 0.88
1 568 0.90
1 569 0.87
1 570 0.84
1 571 0.88
1 572 0.90
1 573 0.87
1 574 0.90
1 575 0.87
1 576 0.82
1 577 0.88
1 578 0.87
1 579 0.90
1 580 0.89
1 581 0.89
1 582 0.91
1 583 0.88
1 584 0.90
1 585 0.90
1 586 0.92
1 587 0.89
1 588 0.89
1 589 0.89
1 590 0.90
1 591 0.90
1 592 0.88
1 593 0.88
1 594 0.91
1 595 0.88
1 596 0.90
1 597 0.88
1 598 0.88
1 599 0.88
1 600 0.87
1 601 0.90
1 602 0.89
1 603 0.89
1 604 0.89
1 605 0.89
1 606 0.88
1 607 0.88
1 608 0.89
1 609 0.85
1 610 0.87
1 611 0.91
1 612 0.87
1 613 0.90
1 614 0.90
1 615 0.86
1 616 0.92
1 617 0.86
1 618 0.88
1 619 0.92
1 620 0.90
1 621 0.88
1 622 0.87
1 623 0.87
1 624 0.90
1 625 0.91
1 626 0.89
1 627 0.83
1 628 0.86
1 629 0.88
1 630 0.89
1 631 0.87
1 632 0.87
1 633 0.88
1 634 0.89
1 635 0.89
1 636 0.80
1 637 0.88
1 638 0.85
1 639 0.89
1 640 0.91
1 641 0.88
1 642 0.88
1 643 0.91
1 644 0.90
1 645 0.90
1 646 0.89
1 647 0.88
1 648 0.87
1 649 0.89
1 650 0.87
1 651 0.87
1 652 0.90
1 653 0.87
1 654 0.89
1 655 0.88
1 656 0.89
1 657 0.87

1 1314 0.88
1 1315 0.87
1 1316 0.88
1 1317 0.87
1 1318 0.89
1 1319 0.88
1 1320 0.91
1 1321 0.88
1 1322 0.89
1 1323 0.86
1 1324 0.87
1 1325 0.88
1 1326 0.88
1 1327 0.89
1 1328 0.89
1 1329 0.89
1 1330 0.87
1 1331 0.87
1 1332 0.87
1 1333 0.89
1 1334 0.89
1 1335 0.90
1 1336 0.88
1 1337 0.89
1 1338 0.88
1 1339 0.87
1 1340 0.89
1 1341 0.88
1 1342 0.88
1 1343 0.90
1 1344 0.92
1 1345 0.88
1 1346 0.89
1 1347 0.88
1 1348 0.90
1 1349 0.89
1 1350 0.89
1 1351 0.89
1 1352 0.87
1 1353 0.87
1 1354 0.90
1 1355 0.89
1 1356 0.89
1 1357 0.88
1 1358 0.90
1 1359 0.89
1 1360 0.87
1 1361 0.89
1 1362 0.87
1 1363 0.85
1 1364 0.90
1 1365 0.90
1 1366 0.88
1 1367 0.87
1 1368 0.89
1 1369 0.89
1 1370 0.90
1 1371 0.90
1 1372 0.89
1 1373 0.88
1 1374 0.89
1 1375 0.88
1 1376 0.89
1 1377 0.89
1 1378 0.88
1 1379 0.91
1 1380 0.88
1 1381 0.87
1 1382 0.88
1 1383 0.87
1 1384 0.90
1 1385 0.90
1 1386 0.91
1 1387 0.89
1 1388 0.89
1 1389 0.92
1 1390 0.88
1 1391 0.90
1 1392 0.88
1 1393 0.88
1 1394 0.86
1 1395 0.89
1 1396 0.84
1 13

1 2017 0.89
1 2018 0.90
1 2019 0.89
1 2020 0.90
1 2021 0.89
1 2022 0.88
1 2023 0.89
1 2024 0.87
1 2025 0.90
1 2026 0.89
1 2027 0.88
1 2028 0.89
1 2029 0.88
1 2030 0.86
1 2031 0.85
1 2032 0.85
1 2033 0.90
1 2034 0.89
1 2035 0.89
1 2036 0.88
1 2037 0.89
1 2038 0.88
1 2039 0.88
1 2040 0.86
1 2041 0.86
1 2042 0.89
1 2043 0.88
1 2044 0.86
1 2045 0.88
1 2046 0.91
1 2047 0.91
1 2048 0.88
1 2049 0.88
1 2050 0.89
1 2051 0.89
1 2052 0.88
1 2053 0.86
1 2054 0.88
1 2055 0.88
1 2056 0.88
1 2057 0.88
1 2058 0.87
1 2059 0.88
1 2060 0.87
1 2061 0.90
1 2062 0.86
1 2063 0.89
1 2064 0.88
1 2065 0.90
1 2066 0.91
1 2067 0.91
1 2068 0.90
1 2069 0.89
1 2070 0.91
1 2071 0.90
1 2072 0.88
1 2073 0.90
1 2074 0.89
1 2075 0.88
1 2076 0.90
1 2077 0.89
1 2078 0.88
1 2079 0.89
1 2080 0.86
1 2081 0.87
1 2082 0.86
1 2083 0.87
1 2084 0.89
1 2085 0.89
1 2086 0.88
1 2087 0.89
1 2088 0.87
1 2089 0.88
1 2090 0.87
1 2091 0.90
1 2092 0.89
1 2093 0.90
1 2094 0.89
1 2095 0.90
1 2096 0.90
1 2097 0.90
1 2098 0.88
1 2099 0.89
1 21

2 371 0.89
2 372 0.88
2 373 0.86
2 374 0.88
2 375 0.87
2 376 0.88
2 377 0.87
2 378 0.88
2 379 0.86
2 380 0.88
2 381 0.87
2 382 0.91
2 383 0.89
2 384 0.88
2 385 0.89
2 386 0.87
2 387 0.80
2 388 0.88
2 389 0.87
2 390 0.87
2 391 0.87
2 392 0.86
2 393 0.87
2 394 0.84
2 395 0.90
2 396 0.88
2 397 0.89
2 398 0.88
2 399 0.87
2 400 0.88
2 401 0.87
2 402 0.85
2 403 0.86
2 404 0.88
2 405 0.87
2 406 0.90
2 407 0.87
2 408 0.88
2 409 0.88
2 410 0.90
2 411 0.87
2 412 0.87
2 413 0.86
2 414 0.86
2 415 0.87
2 416 0.88
2 417 0.87
2 418 0.86
2 419 0.91
2 420 0.87
2 421 0.89
2 422 0.88
2 423 0.87
2 424 0.87
2 425 0.87
2 426 0.88
2 427 0.88
2 428 0.87
2 429 0.88
2 430 0.87
2 431 0.87
2 432 0.85
2 433 0.89
2 434 0.88
2 435 0.90
2 436 0.87
2 437 0.87
2 438 0.85
2 439 0.88
2 440 0.89
2 441 0.88
2 442 0.87
2 443 0.88
2 444 0.88
2 445 0.90
2 446 0.88
2 447 0.88
2 448 0.87
2 449 0.86
2 450 0.89
2 451 0.88
2 452 0.86
2 453 0.89
2 454 0.89
2 455 0.90
2 456 0.87
2 457 0.84
2 458 0.87
2 459 0.88
2 460 0.89
2 461 0.87

2 1133 0.90
2 1134 0.88
2 1135 0.87
2 1136 0.87
2 1137 0.87
2 1138 0.88
2 1139 0.86
2 1140 0.88
2 1141 0.86
2 1142 0.87
2 1143 0.90
2 1144 0.88
2 1145 0.88
2 1146 0.92
2 1147 0.89
2 1148 0.88
2 1149 0.87
2 1150 0.86
2 1151 0.87
2 1152 0.86
2 1153 0.88
2 1154 0.87
2 1155 0.87
2 1156 0.87
2 1157 0.87
2 1158 0.87
2 1159 0.91
2 1160 0.89
2 1161 0.90
2 1162 0.87
2 1163 0.88
2 1164 0.86
2 1165 0.87
2 1166 0.88
2 1167 0.85
2 1168 0.89
2 1169 0.87
2 1170 0.85
2 1171 0.87
2 1172 0.86
2 1173 0.88
2 1174 0.86
2 1175 0.87
2 1176 0.87
2 1177 0.85
2 1178 0.86
2 1179 0.87
2 1180 0.81
2 1181 0.86
2 1182 0.87
2 1183 0.89
2 1184 0.89
2 1185 0.88
2 1186 0.88
2 1187 0.88
2 1188 0.86
2 1189 0.89
2 1190 0.86
2 1191 0.90
2 1192 0.88
2 1193 0.89
2 1194 0.88
2 1195 0.86
2 1196 0.87
2 1197 0.89
2 1198 0.87
2 1199 0.87
2 1200 0.86
2 1201 0.88
2 1202 0.89
2 1203 0.87
2 1204 0.90
2 1205 0.88
2 1206 0.84
2 1207 0.88
2 1208 0.91
2 1209 0.88
2 1210 0.86
2 1211 0.88
2 1212 0.89
2 1213 0.87
2 1214 0.88
2 1215 0.87
2 12

2 1838 0.87
2 1839 0.87
2 1840 0.87
2 1841 0.89
2 1842 0.89
2 1843 0.90
2 1844 0.89
2 1845 0.89
2 1846 0.87
2 1847 0.91
2 1848 0.91
2 1849 0.90
2 1850 0.90
2 1851 0.90
2 1852 0.85
2 1853 0.90
2 1854 0.87
2 1855 0.84
2 1856 0.87
2 1857 0.84
2 1858 0.81
2 1859 0.88
2 1860 0.87
2 1861 0.88
2 1862 0.80
2 1863 0.88
2 1864 0.86
2 1865 0.90
2 1866 0.87
2 1867 0.88
2 1868 0.88
2 1869 0.88
2 1870 0.88
2 1871 0.88
2 1872 0.90
2 1873 0.88
2 1874 0.88
2 1875 0.88
2 1876 0.90
2 1877 0.88
2 1878 0.87
2 1879 0.89
2 1880 0.88
2 1881 0.86
2 1882 0.88
2 1883 0.88
2 1884 0.89
2 1885 0.88
2 1886 0.91
2 1887 0.89
2 1888 0.89
2 1889 0.87
2 1890 0.86
2 1891 0.87
2 1892 0.89
2 1893 0.86
2 1894 0.87
2 1895 0.86
2 1896 0.88
2 1897 0.89
2 1898 0.88
2 1899 0.87
2 1900 0.88
2 1901 0.87
2 1902 0.88
2 1903 0.88
2 1904 0.90
2 1905 0.88
2 1906 0.87
2 1907 0.89
2 1908 0.88
2 1909 0.88
2 1910 0.91
2 1911 0.89
2 1912 0.88
2 1913 0.89
2 1914 0.90
2 1915 0.91
2 1916 0.90
2 1917 0.88
2 1918 0.83
2 1919 0.89
2 1920 0.87
2 19

3 168 0.88
3 169 0.90
3 170 0.84
3 171 0.89
3 172 0.88
3 173 0.83
3 174 0.83
3 175 0.92
3 176 0.85
3 177 0.93
3 178 0.88
3 179 0.90
3 180 0.90
3 181 0.90
3 182 0.90
3 183 0.90
3 184 0.92
3 185 0.90
3 186 0.90
3 187 0.80
3 188 0.92
3 189 0.90
3 190 0.89
3 191 0.90
3 192 0.88
3 193 0.82
3 194 0.91
3 195 0.88
3 196 0.90
3 197 0.89
3 198 0.91
3 199 0.83
3 200 0.87
3 201 0.88
3 202 0.88
3 203 0.93
3 204 0.88
3 205 0.92
3 206 0.86
3 207 0.91
3 208 0.91
3 209 0.89
3 210 0.88
3 211 0.90
3 212 0.90
3 213 0.90
3 214 0.89
3 215 0.90
3 216 0.89
3 217 0.88
3 218 0.88
3 219 0.90
3 220 0.84
3 221 0.91
3 222 0.89
3 223 0.92
3 224 0.76
3 225 0.94
3 226 0.94
3 227 0.88
3 228 0.91
3 229 0.90
3 230 0.89
3 231 0.92
3 232 0.90
3 233 0.92
3 234 0.90
3 235 0.87
3 236 0.92
3 237 0.92
3 238 0.89
3 239 0.82
3 240 0.90
3 241 0.92
3 242 0.94
3 243 0.91
3 244 0.90
3 245 0.94
3 246 0.90
3 247 0.94
3 248 0.86
3 249 0.91
3 250 0.87
3 251 0.91
3 252 0.89
3 253 0.90
3 254 0.90
3 255 0.87
3 256 0.86
3 257 0.85
3 258 0.89

3 933 0.85
3 934 0.87
3 935 0.89
3 936 0.84
3 937 0.91
3 938 0.88
3 939 0.93
3 940 0.79
3 941 0.89
3 942 0.92
3 943 0.91
3 944 0.93
3 945 0.93
3 946 0.94
3 947 0.92
3 948 0.89
3 949 0.89
3 950 0.85
3 951 0.90
3 952 0.91
3 953 0.90
3 954 0.88
3 955 0.87
3 956 0.91
3 957 0.85
3 958 0.94
3 959 0.90
3 960 0.89
3 961 0.86
3 962 0.87
3 963 0.87
3 964 0.92
3 965 0.92
3 966 0.90
3 967 0.90
3 968 0.89
3 969 0.86
3 970 0.89
3 971 0.92
3 972 0.87
3 973 0.86
3 974 0.88
3 975 0.90
3 976 0.92
3 977 0.90
3 978 0.89
3 979 0.94
3 980 0.90
3 981 0.90
3 982 0.90
3 983 0.89
3 984 0.89
3 985 0.91
3 986 0.91
3 987 0.90
3 988 0.87
3 989 0.91
3 990 0.87
3 991 0.88
3 992 0.92
3 993 0.90
3 994 0.89
3 995 0.81
3 996 0.88
3 997 0.93
3 998 0.93
3 999 0.90
3 1000 0.88
3 1001 0.88
3 1002 0.87
3 1003 0.90
3 1004 0.89
3 1005 0.90
3 1006 0.90
3 1007 0.93
3 1008 0.90
3 1009 0.88
3 1010 0.90
3 1011 0.91
3 1012 0.88
3 1013 0.89
3 1014 0.90
3 1015 0.91
3 1016 0.89
3 1017 0.89
3 1018 0.90
3 1019 0.87
3 1020 0.89
3 1021 0.86

3 1648 0.88
3 1649 0.88
3 1650 0.86
3 1651 0.93
3 1652 0.90
3 1653 0.89
3 1654 0.87
3 1655 0.91
3 1656 0.93
3 1657 0.91
3 1658 0.90
3 1659 0.89
3 1660 0.88
3 1661 0.90
3 1662 0.92
3 1663 0.91
3 1664 0.91
3 1665 0.90
3 1666 0.92
3 1667 0.88
3 1668 0.90
3 1669 0.89
3 1670 0.89
3 1671 0.89
3 1672 0.94
3 1673 0.86
3 1674 0.81
3 1675 0.91
3 1676 0.87
3 1677 0.94
3 1678 0.90
3 1679 0.87
3 1680 0.89
3 1681 0.87
3 1682 0.88
3 1683 0.91
3 1684 0.84
3 1685 0.93
3 1686 0.88
3 1687 0.92
3 1688 0.89
3 1689 0.92
3 1690 0.90
3 1691 0.92
3 1692 0.87
3 1693 0.92
3 1694 0.91
3 1695 0.89
3 1696 0.88
3 1697 0.89
3 1698 0.93
3 1699 0.91
3 1700 0.89
3 1701 0.90
3 1702 0.90
3 1703 0.90
3 1704 0.87
3 1705 0.89
3 1706 0.88
3 1707 0.89
3 1708 0.88
3 1709 0.87
3 1710 0.90
3 1711 0.91
3 1712 0.88
3 1713 0.89
3 1714 0.90
3 1715 0.88
3 1716 0.88
3 1717 0.90
3 1718 0.87
3 1719 0.81
3 1720 0.88
3 1721 0.90
3 1722 0.88
3 1723 0.88
3 1724 0.93
3 1725 0.90
3 1726 0.89
3 1727 0.89
3 1728 0.91
3 1729 0.87
3 1730 0.90
3 17

3 2344 0.85
3 2345 0.86
3 2346 0.92
3 2347 0.89
3 2348 0.87
3 2349 0.89
3 2350 0.93
3 2351 0.87
3 2352 0.90
3 2353 0.90
3 2354 0.85
3 2355 0.90
3 2356 0.90
3 2357 0.89
3 2358 0.89
3 2359 0.88
3 2360 0.93
3 2361 0.93
3 2362 0.91
3 2363 0.92
3 2364 0.93
3 2365 0.90
3 2366 0.90
3 2367 0.91
3 2368 0.91
3 2369 0.92
3 2370 0.86
3 2371 0.86
3 2372 0.89
3 2373 0.92
3 2374 0.91
3 2375 0.88
3 2376 0.80
3 2377 0.91
3 2378 0.89
3 2379 0.87
3 2380 0.87
3 2381 0.89
4 5 0.90
4 6 0.92
4 7 0.89
4 8 0.86
4 9 0.91
4 10 0.91
4 11 0.90
4 12 0.89
4 13 0.89
4 14 0.91
4 15 0.91
4 16 0.90
4 17 0.91
4 18 0.91
4 19 0.89
4 20 0.92
4 21 0.90
4 22 0.90
4 23 0.88
4 24 0.91
4 25 0.89
4 26 0.88
4 27 0.88
4 28 0.87
4 29 0.89
4 30 0.89
4 31 0.90
4 32 0.88
4 33 0.89
4 34 0.89
4 35 0.89
4 36 0.89
4 37 0.90
4 38 0.89
4 39 0.90
4 40 0.87
4 41 0.90
4 42 0.86
4 43 0.89
4 44 0.88
4 45 0.87
4 46 0.91
4 47 0.85
4 48 0.90
4 49 0.91
4 50 0.90
4 51 0.88
4 52 0.88
4 53 0.82
4 54 0.90
4 55 0.86
4 56 0.87
4 57 0.87
4 58 0.92
4 59 0.88

4 743 0.89
4 744 0.91
4 745 0.87
4 746 0.90
4 747 0.84
4 748 0.89
4 749 0.92
4 750 0.88
4 751 0.91
4 752 0.90
4 753 0.91
4 754 0.86
4 755 0.89
4 756 0.90
4 757 0.89
4 758 0.90
4 759 0.87
4 760 0.91
4 761 0.90
4 762 0.90
4 763 0.87
4 764 0.80
4 765 0.86
4 766 0.86
4 767 0.90
4 768 0.88
4 769 0.91
4 770 0.88
4 771 0.85
4 772 0.89
4 773 0.91
4 774 0.92
4 775 0.88
4 776 0.91
4 777 0.91
4 778 0.89
4 779 0.85
4 780 0.90
4 781 0.90
4 782 0.80
4 783 0.92
4 784 0.86
4 785 0.91
4 786 0.92
4 787 0.91
4 788 0.89
4 789 0.91
4 790 0.91
4 791 0.91
4 792 0.87
4 793 0.91
4 794 0.89
4 795 0.89
4 796 0.92
4 797 0.88
4 798 0.91
4 799 0.88
4 800 0.90
4 801 0.92
4 802 0.91
4 803 0.89
4 804 0.89
4 805 0.87
4 806 0.88
4 807 0.89
4 808 0.89
4 809 0.89
4 810 0.90
4 811 0.88
4 812 0.90
4 813 0.90
4 814 0.89
4 815 0.91
4 816 0.85
4 817 0.81
4 818 0.88
4 819 0.85
4 820 0.91
4 821 0.88
4 822 0.90
4 823 0.89
4 824 0.91
4 825 0.90
4 826 0.88
4 827 0.89
4 828 0.91
4 829 0.91
4 830 0.90
4 831 0.88
4 832 0.88
4 833 0.91

4 1468 0.90
4 1469 0.90
4 1470 0.93
4 1471 0.91
4 1472 0.90
4 1473 0.90
4 1474 0.88
4 1475 0.88
4 1476 0.91
4 1477 0.88
4 1478 0.90
4 1479 0.91
4 1480 0.92
4 1481 0.91
4 1482 0.90
4 1483 0.92
4 1484 0.87
4 1485 0.89
4 1486 0.91
4 1487 0.88
4 1488 0.90
4 1489 0.94
4 1490 0.89
4 1491 0.88
4 1492 0.90
4 1493 0.91
4 1494 0.89
4 1495 0.88
4 1496 0.89
4 1497 0.90
4 1498 0.91
4 1499 0.89
4 1500 0.91
4 1501 0.88
4 1502 0.90
4 1503 0.91
4 1504 0.87
4 1505 0.91
4 1506 0.90
4 1507 0.88
4 1508 0.93
4 1509 0.89
4 1510 0.87
4 1511 0.88
4 1512 0.91
4 1513 0.90
4 1514 0.90
4 1515 0.89
4 1516 0.90
4 1517 0.84
4 1518 0.88
4 1519 0.92
4 1520 0.89
4 1521 0.86
4 1522 0.86
4 1523 0.82
4 1524 0.89
4 1525 0.89
4 1526 0.87
4 1527 0.90
4 1528 0.89
4 1529 0.93
4 1530 0.91
4 1531 0.89
4 1532 0.92
4 1533 0.92
4 1534 0.89
4 1535 0.91
4 1536 0.91
4 1537 0.92
4 1538 0.91
4 1539 0.86
4 1540 0.91
4 1541 0.90
4 1542 0.85
4 1543 0.86
4 1544 0.79
4 1545 0.90
4 1546 0.91
4 1547 0.88
4 1548 0.88
4 1549 0.89
4 1550 0.92
4 15

4 2173 0.90
4 2174 0.91
4 2175 0.90
4 2176 0.90
4 2177 0.91
4 2178 0.92
4 2179 0.91
4 2180 0.91
4 2181 0.92
4 2182 0.91
4 2183 0.93
4 2184 0.89
4 2185 0.90
4 2186 0.90
4 2187 0.91
4 2188 0.88
4 2189 0.89
4 2190 0.91
4 2191 0.90
4 2192 0.90
4 2193 0.86
4 2194 0.92
4 2195 0.88
4 2196 0.87
4 2197 0.92
4 2198 0.93
4 2199 0.91
4 2200 0.92
4 2201 0.91
4 2202 0.89
4 2203 0.91
4 2204 0.89
4 2205 0.89
4 2206 0.90
4 2207 0.92
4 2208 0.91
4 2209 0.87
4 2210 0.88
4 2211 0.89
4 2212 0.93
4 2213 0.91
4 2214 0.88
4 2215 0.90
4 2216 0.93
4 2217 0.89
4 2218 0.89
4 2219 0.89
4 2220 0.89
4 2221 0.78
4 2222 0.88
4 2223 0.91
4 2224 0.89
4 2225 0.89
4 2226 0.89
4 2227 0.90
4 2228 0.87
4 2229 0.92
4 2230 0.92
4 2231 0.90
4 2232 0.91
4 2233 0.91
4 2234 0.91
4 2235 0.94
4 2236 0.90
4 2237 0.91
4 2238 0.89
4 2239 0.89
4 2240 0.93
4 2241 0.91
4 2242 0.89
4 2243 0.91
4 2244 0.91
4 2245 0.90
4 2246 0.89
4 2247 0.89
4 2248 0.88
4 2249 0.93
4 2250 0.88
4 2251 0.85
4 2252 0.91
4 2253 0.91
4 2254 0.89
4 2255 0.88
4 22

5 565 0.91
5 566 0.89
5 567 0.90
5 568 0.93
5 569 0.90
5 570 0.86
5 571 0.92
5 572 0.92
5 573 0.90
5 574 0.92
5 575 0.89
5 576 0.83
5 577 0.90
5 578 0.88
5 579 0.92
5 580 0.92
5 581 0.92
5 582 0.93
5 583 0.91
5 584 0.92
5 585 0.92
5 586 0.95
5 587 0.91
5 588 0.91
5 589 0.92
5 590 0.93
5 591 0.92
5 592 0.91
5 593 0.89
5 594 0.93
5 595 0.90
5 596 0.92
5 597 0.90
5 598 0.90
5 599 0.91
5 600 0.90
5 601 0.92
5 602 0.91
5 603 0.91
5 604 0.91
5 605 0.92
5 606 0.92
5 607 0.91
5 608 0.91
5 609 0.87
5 610 0.89
5 611 0.94
5 612 0.90
5 613 0.92
5 614 0.92
5 615 0.89
5 616 0.95
5 617 0.88
5 618 0.91
5 619 0.94
5 620 0.92
5 621 0.90
5 622 0.90
5 623 0.90
5 624 0.92
5 625 0.94
5 626 0.92
5 627 0.85
5 628 0.89
5 629 0.91
5 630 0.93
5 631 0.89
5 632 0.90
5 633 0.90
5 634 0.91
5 635 0.92
5 636 0.81
5 637 0.91
5 638 0.87
5 639 0.91
5 640 0.94
5 641 0.90
5 642 0.90
5 643 0.93
5 644 0.93
5 645 0.93
5 646 0.92
5 647 0.90
5 648 0.90
5 649 0.91
5 650 0.89
5 651 0.89
5 652 0.93
5 653 0.89
5 654 0.91
5 655 0.90

5 1315 0.90
5 1316 0.90
5 1317 0.90
5 1318 0.92
5 1319 0.90
5 1320 0.94
5 1321 0.90
5 1322 0.92
5 1323 0.89
5 1324 0.90
5 1325 0.91
5 1326 0.91
5 1327 0.92
5 1328 0.91
5 1329 0.92
5 1330 0.89
5 1331 0.89
5 1332 0.89
5 1333 0.92
5 1334 0.91
5 1335 0.93
5 1336 0.91
5 1337 0.92
5 1338 0.91
5 1339 0.89
5 1340 0.91
5 1341 0.90
5 1342 0.91
5 1343 0.92
5 1344 0.94
5 1345 0.90
5 1346 0.91
5 1347 0.90
5 1348 0.93
5 1349 0.91
5 1350 0.92
5 1351 0.92
5 1352 0.89
5 1353 0.89
5 1354 0.92
5 1355 0.92
5 1356 0.92
5 1357 0.91
5 1358 0.93
5 1359 0.91
5 1360 0.90
5 1361 0.92
5 1362 0.90
5 1363 0.87
5 1364 0.93
5 1365 0.93
5 1366 0.91
5 1367 0.89
5 1368 0.91
5 1369 0.92
5 1370 0.93
5 1371 0.93
5 1372 0.92
5 1373 0.90
5 1374 0.92
5 1375 0.90
5 1376 0.91
5 1377 0.91
5 1378 0.90
5 1379 0.93
5 1380 0.89
5 1381 0.89
5 1382 0.90
5 1383 0.90
5 1384 0.93
5 1385 0.93
5 1386 0.94
5 1387 0.91
5 1388 0.92
5 1389 0.95
5 1390 0.90
5 1391 0.92
5 1392 0.91
5 1393 0.91
5 1394 0.89
5 1395 0.92
5 1396 0.86
5 1397 0.91
5 13

KeyboardInterrupt: 

In [66]:
path = '../final/lexical results/the prophet/'

pmi, d, sentences = iterator(path)
ssm = generate_ssm_lex_vectors(pmi, d, sentences)
np.save(path+'new_ssm_lex_vect.npy', ssm)

In [28]:
import gc
gc.collect()

115