# <center>Implementing LDA in Python for ELOHP Interviews</center>

MOdified tutorial by:

<center>Dr. W.J.B. Mattingly</center>

<center>Smithsonian Data Science Lab and United States Holocaust Memorial Museum</center>

<center>February 2021</center>

## Importing the Required Libraries

In [27]:
#https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/#1introduction
import numpy as np
import json
import glob
import os

#Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
from gensim.models import TfidfModel

#spacy
import spacy
from nltk.corpus import stopwords


#vis
import pyLDAvis
import pyLDAvis.gensim_models

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Preparing the Data

In [28]:
def load_data(file):
    with open (file, "r", encoding="utf-8") as f:
        #data = json.load(f) 
        data=f.read()
    return (data)

def write_data(file, data):
    with open (file, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4)


In [29]:
#data = load_data("ushmm_dn.json")["texts"]
text_list=[f for f in os.listdir("elohp-interview-text") if not f.startswith('.')]
text_list=sorted(text_list)
data=[]
for f in text_list:
    file_data=load_data("elohp-interview-text/"+f)
    data.append(file_data)
    #data = load_data("elohp-interview-text/Coll520_do001_interviewtext.txt")

#print(data[0][0:90])
full_data_hold=data

In [30]:
print(data[0][800:1500])
avg_length = sum(map(len, data))/float(len(data))
print("AVerage length of interview: " + str(avg_length))

d and transcribed contribute.  
Agapito:  I do.   
Long:  Thank you. Let's just begin with the basic question. Can you please 
tell us when and where you were born and something about your 
early background?  
Agapito:  I was born in Torrance, California, which is near Los Angeles, and 
in 1950. I came from Italian, English, Irish, Welsh, Scottish family, 
Italian on one side and the other side a mixture. And we're a really 
close family. Both sides of my family lived in the same city and we 
were always together. We did a lot of— Every Sunday we would 
go to the Italian aunt's house and have— I'd watch the aunts 
making pasta in the kitchen and just have such great memories of 
all my famil
AVerage length of interview: 67570.43661971831


Splitting interviews into smaller chunks of text to optimize topic modeling

In [99]:
# Python3 code to demonstrate working of
# Divide String into Equal K chunks
# Using len() + loop

data=full_data_hold
chunk_dict={}
def splitchunks(full_string):
    # initializing K - how many chunks of text you want to split it into
    K = 10
    # compute chunk length
    chnk_len = len(full_string) // K
    res = []
    for idx in range(0, len(full_string), chnk_len):
        # appending sliced string
        res.append(full_string[idx : idx + chnk_len])
    #merge the remainder (ie. if it's not evenly divisible combine the last bit of text with the last major section )
    if len(res)>K:
        res[-2]=res[-2]+" "+res[-1]
        res.pop(-1)
    return res

list_chunks=[]
for text in full_data_hold:
     
    text_chunks=splitchunks(text)
    list_chunks=list_chunks+text_chunks

avg_length = sum(map(len, list_chunks))/float(len(list_chunks))
print(avg_length)

10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
6757.130985915493


Install necessary model

In [32]:
#!python3 -m spacy download en_core_web_sm

In [33]:
#choose whether you want the full length of texts or the split chunked data
#data=full_data_hold
data=list_chunks

Next cell takes a few minutes to load

In [34]:
def lemmatization(texts, allowed_postags=["NOUN", "ADJ", "VERB", "ADV"]):
    nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
    texts_out = []
    for text in texts:
        doc = nlp(text)
        new_text = []
        for token in doc:
            if token.pos_ in allowed_postags:
                new_text.append(token.lemma_)
        final = " ".join(new_text)
        texts_out.append(final)
    return (texts_out)

lemmatized_texts = lemmatization(data)

In [35]:
len(lemmatized_texts)

710

In [36]:
print (lemmatized_texts[0][0:200])

long interview part recording make available oral history interview take place record studio interviewer library aggie let know agree record project give permission preserve make available record tran


In [37]:
def gen_words(texts):
    final = []
    for text in texts:
        new = gensim.utils.simple_preprocess(text, deacc=True)
        final.append(new)
    return (final)

data_words = gen_words(lemmatized_texts)

In [38]:
print (data_words[0][0:200])

['long', 'interview', 'part', 'recording', 'make', 'available', 'oral', 'history', 'interview', 'take', 'place', 'record', 'studio', 'interviewer', 'library', 'aggie', 'let', 'know', 'agree', 'record', 'project', 'give', 'permission', 'preserve', 'make', 'available', 'record', 'transcribed', 'contribute', 'long', 'thank', 'let', 'just', 'begin', 'basic', 'question', 'please', 'tell', 'bear', 'early', 'background', 'bear', 'come', 'italian', 'english', 'irish', 'scottish', 'family', 'italian', 'side', 'other', 'side', 'mixture', 'really', 'close', 'family', 'side', 'family', 'live', 'same', 'city', 'always', 'together', 'lot', 'go', 'italian', 'aunt', 'house', 'watch', 'aunt', 'make', 'pasta', 'kitchen', 'just', 'such', 'great', 'memory', 'family', 'grandmother', 'mother', 'side', 'take', 'care', 'brother', 'lot', 'anyway', 'family', 'really', 'close', 'really', 'fun', 'grow', 'brother', 'brother', 'year', 'young', 'almost', 'year', 'young', 'name', 'really', 'close', 'really', 'close',

In [39]:
#BIGRAMS AND TRIGRAMS
bigram_phrases = gensim.models.Phrases(data_words, min_count=5, threshold=150)
trigram_phrases = gensim.models.Phrases(bigram_phrases[data_words], threshold=150)

bigram = gensim.models.phrases.Phraser(bigram_phrases)
trigram = gensim.models.phrases.Phraser(trigram_phrases)

def make_bigrams(texts):
    return([bigram[doc] for doc in texts])

def make_trigrams(texts):
    return ([trigram[bigram[doc]] for doc in texts])

data_bigrams = make_bigrams(data_words)
data_bigrams_trigrams = make_trigrams(data_bigrams)

In [40]:
print (data_bigrams_trigrams[0][:100])

['long', 'interview', 'part', 'recording', 'make', 'available_oral_history', 'interview', 'take', 'place', 'record_studio', 'interviewer', 'library', 'aggie', 'let', 'know', 'agree_record_project', 'give_permission_preserve', 'make', 'available_record', 'transcribed', 'contribute', 'long', 'thank', 'let', 'just', 'begin_basic_question_please', 'tell', 'bear', 'early', 'background', 'bear', 'come', 'italian', 'english', 'irish', 'scottish', 'family', 'italian', 'side', 'other', 'side', 'mixture', 'really', 'close', 'family', 'side', 'family', 'live', 'same', 'city', 'always', 'together', 'lot', 'go', 'italian', 'aunt', 'house', 'watch', 'aunt', 'make', 'pasta', 'kitchen', 'just', 'such', 'great', 'memory', 'family', 'grandmother', 'mother', 'side', 'take', 'care', 'brother', 'lot', 'anyway', 'family', 'really', 'close', 'really', 'fun', 'grow', 'brother', 'brother', 'year', 'young', 'almost', 'year', 'young', 'name', 'really', 'close', 'really', 'close', 'raiskin', 'cousin', 'group', 'c

In [41]:
id2word = corpora.Dictionary(data_bigrams_trigrams)

texts = data_bigrams_trigrams

corpus = [id2word.doc2bow(text) for text in texts]
print (corpus[0][0:20])
print(id2word[1])

[(0, 1), (1, 1), (2, 1), (3, 2), (4, 1), (5, 1), (6, 2), (7, 1), (8, 1), (9, 2), (10, 4), (11, 4), (12, 1), (13, 1), (14, 2), (15, 1), (16, 1), (17, 1), (18, 2), (19, 2)]
activity


Processing stopwords

In [42]:
stopword_list = stopwords.words("english")
extra_stopwords=open("extra-stopwords.txt", 'r').read().split()
print(extra_stopwords)
stopword_list = stopword_list+extra_stopwords
print(stopword_list)

['long', 'raiskin', 'stockford', 'drescher', 'rubin', 'laskaya', 'pitt', 'bole', 'grime', 'ree', 'gani', 'hunt', 'anyway', 'interview', 'sort', 'believe', 'guess', 'definitely', 'number', 'literally', 'important', 'go', 'know', 'woman', 'year', 'remember', 'get', 'think', 'say', 'really', 'people', 'time', 'come', 'thing', 'lot', 'want', 'kind', 'live', 'back', 'take', 'mean', 'make', 'see', 'start', 'talk', 'little', 'feel', 'way', 'tell', 'call', 'first', 'still', 'much', 'move', 'right', 'part', 'happen', 'also', 'life', 'together', 'different', 'look', 'place', 'never', 'actually', 'group', 'always', 'even', 'try', 'big', 'find', 'maybe', 'need', 'day', 'like', 'meet', 'great', 'let', 'name', 'well', 'many', 'leave', 'stuff', 'become', 'pretty', 'give', 'high', 'hard', 'end', 'probably', 'use', 'put', 'point', 'whole', 'change']
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he'

In [43]:
from nltk import FreqDist
alltexts_no_stopwords=[]
for text in data_bigrams_trigrams:
    new_word_list=[]
    for word in text:
        if word not in stopword_list:
            new_word_list.append(word)
    alltexts_no_stopwords=alltexts_no_stopwords+new_word_list
fdist = FreqDist(alltexts_no_stopwords)
fdist.most_common(25)
most_common_list=[]
for word in fdist.most_common(50):
    most_common_list.append(word[0])
for word in most_common_list:
    print(word)


work
lesbian
school
community
friend
good
gay
love
man
house
old
job
family
kid
person
couple
class
home
young
ask
child
parent
experience
relationship
play
student
grow
sure
fun
interesting
guy
college
help
run
wonderful
stay
course
mother
town
early
bring
keep
ever
hear
bit
book
teach
partner
mom
story


Remove stopwords - both standard and custom list. 

In [44]:
data_no_stopwords=[]
for text in data_bigrams_trigrams:
    new_word_list=[]
    for word in text:
        if word not in stopword_list:
            new_word_list.append(word)
    data_no_stopwords.append(new_word_list)
id2word=corpora.Dictionary(data_no_stopwords)
texts=data_no_stopwords
corpus = [id2word.doc2bow(text) for text in texts]
#for text in corpus:
    #print(id2word[text[0][0]])

In [45]:
#TF-IDF REMOVAL

#id2word = corpora.Dictionary(data_bigrams_trigrams)
#texts = data_bigrams_trigrams

#print (corpus[0][0:20])
tfidf = TfidfModel(corpus, id2word=id2word)
corpus = [id2word.doc2bow(text) for text in texts]

#low_value = 0.03 #threshold number
low_value = 0.05 #threshold number
words  = []
words_missing_in_tfidf = []
for i in range(0, len(corpus)):
    print("\nnew text ****\n")
    bow = corpus[i]
    low_value_words = [] #reinitialize to be safe. You can skip this.
    tfidf_ids = [id for id, value in tfidf[bow]]
    bow_ids = [id for id, value in bow]
    for id, value in tfidf[bow]:
        if value < low_value:
            print(id2word[id]+ " "+ str(value))
#            next
        else:
#            print(id2word[id]+" "+str(value))
            next 
    low_value_words = [id for id, value in tfidf[bow] if value < low_value]
    drops = low_value_words+words_missing_in_tfidf
    for item in drops:
        #print(item)
        words.append(id2word[item])
        #print(id2word[item])
    words_missing_in_tfidf = [id for id in bow_ids if id not in tfidf_ids] # The words with tf-idf score 0 will be missing
    new_bow = [b for b in bow if b[0] not in low_value_words and b[0] not in words_missing_in_tfidf]
    for nb in new_bow:
        print(id2word[nb[0]])
    corpus[i] = new_bow


new text ****

activity 0.03161774399284589
adult 0.036501485247966176
age 0.0381413906180411
agree_record_project 0.03238868919818853
almost 0.021226209128109488
available_oral_history 0.0329959588538497
background 0.027571142084025325
backyard 0.04498336624239994
bad 0.020527122209284935
bear 0.041744646134173335
bedroom 0.04359868563593874
bit 0.012692021578008963
care 0.018917800010237268
check 0.032191955515794415
childhood 0.03911924252976018
choir 0.047645287544759315
city 0.048640543387119636
clothe 0.040816023429183126
color 0.039773603722524704
contribute 0.04404527009672012
cry 0.04233818049108653
dad 0.025483432304614476
decide 0.029477529140832594
difficult 0.02985717056233935
dinner 0.03238868919818853
drop 0.033848718667046016
early 0.013444405669163068
elementary 0.04498336624239994
father 0.023559749778056742
finish 0.03258821352219379
follow 0.02921033342912563
friend 0.014881741448230536
fun 0.02836980895902602
garden 0.04547696878259687
good 0.010165917209692845
gr

job 0.01518200267491131
miss 0.02406654127381205
next 0.022431576860184686
raise 0.016561406028733232
seem 0.01240679246511887
sex 0.020790843971927778
sure 0.008212555907395213
word 0.015990132975809004
accept 0.021851401439405522
explain 0.023465408297958425
fill 0.02556673131570451
show 0.013537836521183927
summer 0.0356230092958901
truck 0.022898160431901092
bag 0.03644718345461407
camera 0.03699142334282975
line 0.02035312344727154
mind 0.018770895576197544
person 0.02093162928676464
set 0.017012197142368637
sock 0.04649836060246915
chop_wood 0.048050039175669315
question 0.025107487549177004
sneak 0.042908075637606895
story 0.011129753838624442
totally 0.01933549135794939
anniversary 0.034524024130797984
center 0.03809847100265337
education 0.020790843971927778
essay 0.04515423548399912
hand 0.01663514984517318
student 0.012803532917630641
afraid 0.02422260619723448
campus 0.03496825331611563
carry 0.025748105338452423
invite 0.020903314258464718
sense 0.015783996367576148
stick 

process 0.03308142437613985
funny 0.032279689844834265
story 0.03846561535141457
totally 0.033412759347128757
science 0.0458075186531538
student 0.022125187110590396
campus 0.03021350249680753
coast 0.0448132731920711
deal 0.02358317603077067
member 0.03409498639671533
pass 0.02775217986581653
sad 0.04357027595299685
involve 0.03936538412679368
course 0.017812054261634236
guy 0.037444477723351285
idea 0.02006669353057314
health 0.03909908947716402
issue 0.023022012526113318
share 0.03275628294626522
nice 0.04932298792230202
office 0.033750528407347914
theater 0.0441806740221849
director 0.040803415572705115
challenge 0.03980917011162241
consider 0.040052454637325444
discover 0.04960394341028823
faculty 0.04918477710263359
heterosexual 0.0438727976657234
major 0.03886869458527471
path 0.04877547445276277
positive 0.04722827206722685
problem 0.030923583131265933
professional 0.0479846764575205
sexuality 0.040803415572705115
army
bedroom
fun
horse
patch
shoe
wake
wear
white
class
ear
expl

begin
community
date
pay
short
teach
transition
completely
experience
sexual
support
chair
learning
simple
build
burn
develop
food
eventually
career
degree
lead
practice
stick
table
therapist
therapy
bottom
willing
anywhere
folk
supervisor
crisis
incest
hierarchy
contract
organize
self
abuse
former
pocket
challenge
discover
satisfying
act
coursework
decision
shit
flyer
highlight
licensing
sweetie
actualize
advisor
clientele
committee
digression
facilitate
fading
finish_degree
grandfathere
key
midlife
plus
recover
rounded
spanish
suggest
survival
thesis
timer

new text ****

care 0.030286135211674343
decide 0.023595778388137468
family 0.017049712159905014
good 0.005424984987627765
home 0.018444529055008325
old 0.012255828115763557
school 0.044031266469351044
away 0.026180418969228042
community 0.020139626019639388
ever 0.04001693269017971
lesbian 0.004322431526611771
man 0.014738033633841418
marry 0.032862552643570825
month 0.02711237653466836
okay 0.03471058102055221
work 0.01125977449

official
environmental
accountant
finance
absence
account
blatantly
broaden
budget
cake
centerfold
coverage
custody
dessert
domestic_partner
effect
encounter
espresso
fundamentalist
homophobia
inaudible
ironic
joint
lab
laboratory
lunchroom
menu
miser
oblivious
operation
overlook
partn
profession
pumpkin
quality
restricted
technician
treatment
tune
twilight
waitressing
wastewater
wednesday

new text ****

activity 0.03892808471078952
bit 0.015626544741829
care 0.023291785824659822
child 0.040898286536169226
family 0.013112212609006954
friend 0.04275261641166926
good 0.02503275922477832
grow 0.0338755172407672
hear 0.03348850801347194
home 0.014184907297791158
house 0.025708216475846236
middle 0.031525402914243446
parent 0.038593425405843595
roll 0.04816400522962659
school 0.03386258500877744
walk 0.020449143268084613
weekend 0.04337971589239579
world 0.04137825216761586
away 0.040268506177444964
community 0.023232802034313462
ever 0.030775330660883143
fact 0.04910726765517223
last 0.01

anymore 0.047451661179636784
child 0.018800693296251347
color 0.04502211027563685
family 0.024110417191112947
friend 0.04492141131255252
fun 0.03211347597172024
good 0.011507406994367595
kid 0.04872673139519598
middle 0.028984071531078626
neighbor 0.043573621039788006
old 0.017331267890171656
run 0.01630456247731952
watch 0.02666868358155279
wonderful 0.016118333428781738
class 0.016242243798398663
community 0.007119985089079713
date 0.027529563621131074
ever 0.014147232117763413
lesbian 0.01833737011086461
man 0.041682831493684354
matter 0.03043256076692747
month 0.01917013915433271
okay 0.024542543050067044
sign 0.032364917136026204
sit 0.01932035160934011
smart 0.041930680136093454
though 0.03169440113163062
work 0.002653787329676719
around 0.015218523291231571
buy 0.022760540981760752
experience 0.0138233754859692
figure 0.02090597933603477
fine 0.03043256076692747
gay 0.03217698180322618
information 0.04102058088533321
second 0.04805441994979545
sex 0.03288700988106505
single 0.03

buy 0.02334897421655581
die 0.02232485240189786
else 0.0340990898897534
figure 0.021446466183738418
form 0.03738208228834835
gay 0.022005906892171277
job 0.012317896844817374
keep 0.015315519266048554
next 0.01819982882014355
reason 0.027984057028932238
relationship 0.015792459645080123
second 0.024648390679695514
sex 0.03373724492699651
sleep 0.034104317621427724
stay 0.030280393717154068
straight 0.024237449572145714
support 0.019819844877271318
sure 0.026652983447766582
travel 0.03045948026007588
visit 0.02735815488015656
word 0.025947144490551022
ago 0.02628863527297629
struggle 0.03855770084831014
summer 0.028902679258110988
week 0.022506343685518578
workshop 0.04470013937697179
collective 0.03091114168564008
negative 0.03982454073034765
person 0.011321898955812221
somewhere 0.028634901990906247
eventually 0.02748143129532356
funny 0.030311659817034535
money 0.019361850925521572
recently 0.04119795835887847
story 0.018060220726866253
student 0.0415525148047095
area 0.0283714439006

character
create
direct
production
successful
crew
approach
jigsaw
spiritual
percent
healing
budget
nursing
foam
accomplished
acquiesce
angel
bevy
breakaway
brrrm
burlap
coma
consummate
convinced
convincing
creme
decade
designer
endorsement
equivalent
fictional
flashback
investment
investor
lesbo
maker
ovarian
playwright
profit
prospectus
realtor
rental
resident
selling
success
underneath

new text ****

baby 0.04760549853595309
family 0.009053558273004147
friend 0.01265112747116909
fun 0.012058738915801687
good 0.0028807182933658555
hear 0.011561365263695816
home 0.019588438450044345
interested 0.020486606402212428
love 0.02251886228261282
middle 0.021767270015739446
mom 0.034900651995510575
new 0.03041982136278792
old 0.006507960544397022
small 0.04863660576772061
street 0.04192615572839325
wonderful 0.024209995174197994
world 0.014285171709969017
write 0.029361951963616746
away 0.013902049871960405
class 0.01219805526777404
community 0.016041497597311775
company 0.02552341451701798


private 0.0441497248280804
special 0.03814613741293017
student 0.018833492126094056
coast 0.03814613741293017
couple 0.020609936510967517
invite 0.030747951134171628
society 0.04258968660726185
somehow 0.03517487296187258
stand 0.02525207066074562
ask 0.011344459435938748
difference 0.03517487296187258
yet 0.025957283390033708
guy 0.04781046186254982
send 0.02607815482394795
vacation 0.04896814641057605
anywhere 0.03634135529876277
coffee 0.04117846037702281
issue 0.019596891518709688
music 0.02436117531354887
dance 0.026953064816447728
floor 0.032892414957617126
court 0.04456684166267098
heterosexual 0.03734558199472196
professor 0.035400933359457036
space 0.0317813938388772
driver 0.04636379163255174
service 0.033282086615807414
ton 0.04896814641057605
credit 0.04589369918356605
surprised 0.04543807343668738
basis 0.04954887450577245
reaction 0.04684929966692643
super 0.04117846037702281
painful 0.04374403730199145
fun
exactly
married
marry
rent
wife
agent
finally
free
ring
sweet
sho

welcoming
result
anonymous
balance
finding
brand
formal
conscientious
objector
procedure
task_force
appoint
acknowledging
additional
advising
association
consultant
establish
examine
hierarchical
inherit
instruction
intentionally
investigate
recommend
survey
taunt
uproar

new text ****

anymore 0.022158534207424547
color 0.0420480103681283
early 0.014213208159715818
good 0.0035824144007400244
hear 0.014377525740112097
play 0.01876609032380274
read 0.04469117623358261
top 0.03209884783369785
wild 0.04519030120401231
begin 0.03601850509995663
class 0.045507909210975375
date 0.02571099776275409
draw 0.03601850509995663
exactly 0.026686468037302215
fact 0.021083083320844957
hall 0.04976278001892906
huge 0.026560977083974424
last 0.01631608284861964
legal 0.034665701110223324
lesbian 0.008563014081584322
position 0.03649824314599496
clear 0.03284253227975371
enough 0.019370055148909653
experience 0.03873066581273271
half 0.02643653627645543
job 0.011214268260332722
keep 0.027886634181065673

bit 0.011890748990460222
brother 0.022272049391712095
child 0.031120843883021266
church 0.03256228321327356
city 0.022784884528480317
dad 0.02387461249471743
dress 0.029447104628229935
family 0.019955022868990344
friend 0.009294818324210929
good 0.009524122619496861
grow 0.01288849451644114
hair 0.033471485069371906
horse 0.046397049089742855
house 0.009781111376734273
kitchen 0.0385815367736383
love 0.016544670367270416
middle 0.047977420353581844
one 0.013808277023414995
parent 0.02936699966401046
play 0.016630362035124528
school 0.042945247457645976
small 0.01786672435114816
walk 0.0466812658245319
watch 0.02207237587543328
class 0.04032875215783033
cop 0.04918172257893108
cut 0.031919316673888684
exactly 0.023649338633748115
fact 0.018683663053546358
fall 0.03878383173866906
lesbian 0.01264745984480938
movie 0.033471485069371906
okay 0.020312672482821564
sit 0.015990517921997006
strange 0.04308517730164791
turn 0.01656496140482691
around 0.01259563357620897
bring 0.0124994419085558

sexual 0.0410339991218531
spend 0.025057506107802986
sure 0.029434155891643922
bookstore 0.035721526297428065
feminist 0.034306018836140935
minute 0.040071719534601334
person 0.02500662182962805
education 0.037257642412973475
anti 0.03807771384073012
stand 0.03076374582070637
ask 0.01382057223145077
certainly 0.034306018836140935
everywhere 0.043127762880018176
involve 0.0408225956050351
thinking 0.04614116208110256
co 0.0497609533277875
idea 0.020809514076891678
suppose 0.035538393103645524
obviously 0.04647215237035421
nice 0.025574402925916083
create 0.03118819986647225
exist 0.041790968843929285
several 0.03786917435450609
challenge 0.04128280947556392
consider 0.04153509980703252
aware 0.04128280947556392
professor 0.043127762880018176
third 0.043127762880018176
identity 0.04750320239794164
read
write
direction
exciting
teach
attract
guilt
realize
rich
book
chair
famous
learning
bag
topic
original
course
opinion
paper
rather
seriously
embarrassed
naive
nowhere
convent
resign
tenur

wife
wrong
clear
straight
accept
actual
somewhere
sometimes
totally
connect
lead
march
measure
track
co
relate
yell
van
dance
female
kiss
parade
upset
bother
left
costume
bisexual
consider
mm_negative
socially
normal
disown
coach
oca
powerful
truth
cruise
demonstrative
bone
integration
leader
harassment
subject
participate
piss
reality
partly
backtrack
tense
climate
handed
crack
expense
honesty
interface
prejudiced
pronoun
purposefully
truthful
unsafe
used

new text ****

city 0.049416374227663276
difficult 0.03033340113298237
early 0.02731769571438673
good 0.010328066551465651
grade 0.035310909102105666
house 0.010606748073508745
learn 0.016808494705272643
read 0.021474039142818195
run 0.014633583955161051
school 0.009314062649797342
side 0.02505255533010476
strong 0.025405427318985926
wonderful 0.014466440652759757
class 0.014577652026829233
college 0.018763423072792683
community 0.012780582086207795
history 0.02426156984546739
lesbian 0.0027430097700922146
living 0.03193277412511911

room 0.04741084997300874
sit 0.02028842106651585
stop 0.026910145108846737
work 0.00557351707196026
around 0.015981065706579575
feeling 0.029042195016485757
figure 0.021953498577753878
next 0.018630104964394576
raise 0.027509500076288163
relationship 0.016165821323999
seem 0.020608434916306744
sex 0.03453485306981452
sexual 0.038035233545712636
support 0.02028842106651585
sure 0.01364155355946491
town 0.01731939684800412
understand 0.02429910438931214
ago 0.026910145108846737
beautiful 0.02958598971737969
drive 0.02370542498650437
girlfriend 0.03195742083044024
heart 0.03629650335976388
help 0.015859019812509357
interesting 0.01445999817523039
later 0.048497463971997
period 0.03211742409211757
struggle 0.03946927309528825
week 0.023038433458334194
actual 0.04985542950592759
affirmative 0.028131140938919975
bunch 0.032941246538571785
food 0.032941246538571785
front 0.033986600239778
happy 0.026218175469198402
hold 0.027755389996732664
lover 0.041881157316426276
basically 0.0290421950164

wind
monogamy
consciousness_raising
expression
chronology
brake
cadre
adorable
apprehension
assertive
bud
conventional
entice
honest
howl
lamppost
laundromat
mural
thursday
unlock
utility

new text ****

baby 0.03769837355777381
bit 0.03417682903130461
child 0.022362169144123305
decide 0.019844129391040174
friend 0.020036632069926008
hear 0.018310685948158364
home 0.015511911287842734
house 0.04216984626372018
learn 0.022275493322225035
love 0.02377665069606383
old 0.020614385753498138
beginning 0.04256982326091226
class 0.038638128645213224
community 0.025406240348176328
date 0.03274457747163956
exciting 0.04158577678113462
lesbian 0.01817589763029766
mostly 0.027183876201117643
okay 0.029191715979003646
stop 0.030480555613710738
teach 0.02557339077090098
work 0.037878033668425244
around 0.03620283428911172
bar 0.030749193224638288
die 0.025884717792993714
fine 0.03619749871109992
gay 0.012757457013116133
job 0.01428207801294131
next 0.021101928219250447
scene 0.04914524502818121
seco

bodied
wheelchair_basketball
embassy
forefront
frontier
fruitful
germane
implement
inbound
outbound
outreache
overlap
pioneering
whammy

new text ****

activity 0.042063794196531064
almost 0.02823904490274043
bit 0.016885283899955025
care 0.025167970442856645
learn 0.044021407417616634
love 0.011747008384697291
morning 0.038240615473692834
play 0.023615701966915623
small 0.025371380160611728
write 0.02297507917444832
away 0.021756094205943675
community 0.008368078476336709
lesbian 0.025143776087604618
night 0.024375745911185357
partner 0.021012413176938305
phone 0.03649404730522366
room 0.02653146199649283
short 0.0496514694642176
sign 0.038038305289401685
turn 0.023522830760005345
work 0.015594906700228202
bring 0.03549929872143288
else 0.01953323734300741
experience 0.04873960665389995
gay 0.03781742594855038
next 0.04170217763528016
seem 0.023065264938319754
book 0.023897113432108798
help 0.01774964936071644
period 0.03594629571966663
amazing 0.0309261349818099
develop 0.04655411340

ocean
environment
experience
feeling
male
proud
support
bookstore
feminist
southern
struggle
dump
encourage
pull
throw
household
mountain
wood
writing
known
special
talent
touch
bank
energy
sense
adventurous
bus
idea
honor
machine
serious
courage
belly
wise
quite
consider
forest
opportunity
spirit
driver
maintain
acceptance
communal
express
flirt
fairly
occasionally
gut
grab
independence
backpack
fishing
deeply
season
dominate
hole
particularly
driving
fair_amount
claim
commune
admire
boat
perhaps
invigorate
admirable
earthquake
politicize
proclivity
puller
salmon

new text ****

anymore 0.03322071663308845
child 0.026324579117933938
decide 0.02336036145752153
family 0.03375921169036883
friend 0.007862324726226496
interested 0.03819557247517801
love 0.013994848137809077
new 0.02835761249893628
old 0.024267101515873395
school 0.014530654357413913
small 0.03022630109440694
wear 0.04369917450016798
community 0.009969345696063264
company 0.04758628197657419
lesbian 0.008558612576883753
liv

school 0.035120156613237585
thank 0.04096387117768047
wait 0.03485084439969863
watch 0.03008424848934981
young 0.0475095573134978
away 0.020881988825814858
community 0.02409561011948544
cut 0.043505450425765
exactly 0.03223362016312585
exciting 0.039440493756727625
fact 0.025465494297774102
huge 0.032082044176456416
lesbian 0.0034476517333488004
married 0.03193173662295203
marry 0.026211782855629102
night 0.023396389486087736
pay 0.028286584653511023
sit 0.04358957253660997
turn 0.022577742329676572
wife 0.03855331848032709
work 0.011974673923403904
bar 0.02916293639970981
clear 0.039669307651392004
enough 0.023396389486087736
figure 0.02358349167611453
form 0.041107006577687684
keep 0.016841628734159442
next 0.040026688575110186
political 0.026764492551632497
spend 0.02495079868543362
sure 0.029308810500474392
travel 0.03349460433332134
understand 0.026103252931315334
able 0.018748440784800226
country 0.02665258161124732
gathering 0.049154268861193534
instead 0.04954904624328331
later

regret
retirement
granddad
folk
health
medicine
trouble
awful
alley
youngster
affect
necessarily
service
pay_attention
gender
butch
visual
joy
disabled
collect
disturb
severe
appreciation
modern
gender_identity
dementia
non_binary
trendy
suppress
attractive
enormously
perceive
generate
visually
prevention
sexual_minority
distress
surgeon
dysphoria
adolescent
testosterone
beforehand
bemoan
bisexually
competency
concentrate
constitute
conventionally
cure
custodial
discredit
disempowere
dose
femaleness
foresee
inability
increasingly
insure
jeeper
landscaping
leisure
maternal
objectification
paternally
politicking
puberty
schemata
signature
sociologically
stigmatize
sufficient
surely
waste
wonderfulness
yuck

new text ****

agree_record_project 0.03166811686752464
almost 0.02075397578488865
available_oral_history 0.0322618761983794
care 0.03699384670585305
church 0.03398328100145671
cousin 0.041008813131221684
dinner 0.03166811686752464
early 0.026290598383912313
eat 0.0322618761983794
fam

street 0.03866955960531274
college 0.02896204787705474
exciting 0.04843546851181193
girl 0.03127326840291316
huge 0.03939881813062377
lesbian 0.00846787710114939
living 0.04928943559343965
man 0.014436302432510971
night 0.028732274327833186
open 0.027081274966893325
room 0.03127326840291316
sell 0.04815800496322097
stop 0.03550108007890237
teach 0.029785644499158513
bring 0.04184387189734648
else 0.023024293734224942
experience 0.019150179774504627
realize 0.029905852215005774
single 0.04843546851181193
sure 0.017996554212410715
town 0.022848531359912605
girlfriend 0.04215967440635115
later 0.042653364747248755
period 0.0423707579431629
show 0.029666088318845193
describe 0.03458820244520842
basically 0.03831377671049452
study 0.03519257069651979
area 0.03831377671049452
campus 0.03831377671049452
couple 0.015351706039441878
thought 0.043907761901718606
involve 0.0499192881631709
social 0.03534628603124797
building 0.04460074228303005
dance 0.04015301334906486
active 0.04734596008857555

wonderful
write
begin
boss
though
environment
job
natural
seem
book
nervous
series
show
immediately
eventually
general
shower
cassette
collection
marching
role
deal
extremely
twenty
rather
station_wagon
afterwards
familiar
opening
awful
music
serious
song
depressed
paint
perform
performance
theater
direct
singer
guitar_player
significant
aware
basketball
politically
comfort
forty
economy
awareness
identity
unemployment
brain
crush
demonstration
tear_gas
softball
translate
softball_team
radar
evolve
feedback
cheerleader
nursing
recession
pressure
mid
monologue
phrase
frankly
atmosphere
clerical
lyric
bumpy
asn
hooking
hush
instigator
nurse_aide
rack
watercolor

new text ****

activity 0.03367197938009912
bit 0.013516634487368558
friend 0.005282874191341922
good 0.0072176044510961
learn 0.03523904467398644
least 0.027799139708510986
middle 0.027268814398759885
one 0.04708906905079816
play 0.018904320089653894
side 0.02626136759879352
walk 0.01768808138338211
circle 0.03367197938009912
co

post
milestone
unit
season
wisdom
edge
victory
cutting
strategy
patience
cherish
instantaneous
beaming
complacent
connectivity
impressive
inquiry
quietly
separately

new text ****

age 0.020047404461341425
agree_record_project 0.03404748184622501
available_oral_history 0.0346858529284991
black 0.03512727775313963
brother 0.024990410864537273
early 0.02826592673467721
good 0.007124377680724456
grandmother 0.047806080066167274
home 0.01211116633784789
house 0.01097492142359151
kid 0.0452509810998044
learn 0.017391938354787122
love 0.00928199517755827
mother 0.03836855799869734
outside 0.030873428386578246
recording 0.034904918172104914
run 0.015141533761374063
thank 0.01686141926489065
top 0.03191762447082167
wild 0.04493516624103769
away 0.03438151312893208
door 0.023798168533686413
fight 0.03265711023046576
full 0.026916594556915533
hang 0.025217902020667232
independent 0.0467859077621694
knee 0.04948182979953738
married 0.02628728113709221
marry 0.04315684506398361
month 0.01780270458

emotional
clearly
kiss
hierarchy
magazine
textbook
influence
expand
dynamic
farm
literature
aware
complicated
employment
human
gender
awareness
brain
et_cetera
surprised
guilty
intense
mm_affirmative
dramatic
account
goodbye
sensitive
reading
journal
fail
writer
cycle
playing
tension
income
alpha
commune
interaction
openly
intellectual
literary
typing
exclusion
periodically
overlap
intrigue
cetera
counterculture
hugely
invent
analyze
articulating
cello
dawn
deserve
flatten
frequency
marginal
marginally
median
patron
pique

new text ****

bear 0.023115822872759154
bit 0.014056246722106858
check 0.035652166710866864
close 0.038251460058287196
decide 0.01632298762710727
family 0.011794577659357212
friend 0.021975110181383435
good 0.01125861940729312
grow 0.015235697847542083
kid 0.01589109008972591
middle 0.028357442332742244
mom 0.022733517469626895
mother 0.040422430428109016
old 0.016956569722161474
school 0.010153254337315556
strong 0.027694441707825687
world 0.01861009362615775
write

celebratory
video
simultaneous
dammit
ugly
outcome
marriage_license
sodomy
discriminatory
mayor
organized
initial
perceive
devastating
affirm
assassinate
loved
steering
qualified
solidify
advisory_committee
bedrock
deputy
fetus
greener
harvey
mushy
nondisclosure
reform
roey
signing
spearheading
upsetting
zygote

new text ****

bit 0.034763399885341106
check 0.02939121546577743
difficult 0.02725955969855991
early 0.024549451407996584
family 0.009723307313278964
follow 0.026668998198056387
friend 0.009058007658597305
good 0.012375293074724496
grow 0.03768037565627967
hear 0.012416632668635987
home 0.021037519292823995
learn 0.015105202441776381
least 0.02383218029257546
love 0.008061575044740083
one 0.02691294755150458
run 0.02630137343852683
side 0.022513849489825453
strong 0.022830963123231914
wonderful 0.03900144275322874
arrest 0.04461752767123503
community 0.005742729587657564
dangerous 0.04350008844507393
door 0.04133825064368403
ear 0.04870725367237189
ever 0.011410659917075813
gi

question 0.01674867887056724
regular 0.039317603940308865
sometimes 0.019040114145879417
story 0.01484885117130679
topic 0.04042203006926126
wood 0.034849493226961306
hand 0.022193919815740756
hire 0.026189692314295885
state 0.025926358061505464
student 0.034163874155247624
daughter 0.02687154517777652
sense 0.02105834651411771
ask 0.020578800890130113
choice 0.03459846504723628
doctor 0.030008903060405476
less 0.027888325915513667
push 0.03296153152972285
yet 0.02354320051777694
advice 0.0356320223225662
certainly 0.025540792347057903
imagine 0.02632332144266281
retirement 0.03130656441664805
soon 0.030008903060405476
grandchild 0.04935240687231319
hit 0.03231678612295004
land 0.024563662115231952
send 0.02365283064981945
medicine 0.046657476370092384
president 0.03896876311700404
public 0.027012170720261856
obviously 0.03459846504723628
upset 0.03862875466270982
insurance 0.04042203006926126
nice 0.019040114145879417
vote 0.034849493226961306
hospital 0.03387236299513535
quite 0.0229

connection 0.032210486287828584
fast 0.04158215715032688
hope 0.033923431637223896
politic 0.0368781021284369
scary 0.036663894725140866
pregnant 0.04472373395525752
push 0.040744962445611654
bus 0.044381436396924545
conversation 0.03603737344958969
lovely 0.04860193752085607
mile 0.0481707225382563
weird 0.03846033149992403
health 0.037314991643111195
public 0.03339073855166748
bed 0.043394957498068626
clearly 0.049967159313039466
park 0.049967159313039466
paint 0.049967159313039466
hospital 0.041870874747928326
team 0.040475024084653456
easy 0.030806762328972257
necessarily 0.04158215715032688
service 0.037314991643111195
decision 0.03563245475543205
supportive 0.040744962445611654
note 0.049499498995445194
participate 0.04404605441603895
baby
bear
child
glass
mother
walk
write
apart
boy
girl
half
letter
realize
second
bath
repeat
happy
somewhat
writing
copy
daughter
test
village
birth
surprise
wonder
flag
funeral
pool
swam
comic
pregnancy
overnight
prepare
shoulder
nurse
retrospect


inform
deliver
reader
assembly
careful
conscientious
ill
mental
translation
latin
tolerate
alternate
eyed
redo
romantically
auditorium
chit
pledge
affectionate
quietly
excellent
aloud
chatting
corduroy
divisible
drafter
drove
drugstore
elaborate
embellish
flirtation
haired
knowing
lan
oldsmobile
yearbook

new text ****

almost 0.017908683794667243
anymore 0.03536810126875412
baby 0.023623425629555748
bad 0.017318859841711016
block 0.031488050308987554
brother 0.04011464902107865
close 0.01457037337876503
clothe 0.03443673116274783
difficult 0.025190679286028774
elementary 0.03795274404358982
english 0.0442911091118548
family 0.008985351151553935
follow 0.0246449388000353
good 0.0028590157211202745
home 0.009720432159202643
learn 0.0139587841648605
love 0.007449737036766181
memory 0.0246449388000353
moment 0.023996265642155238
old 0.019376793855592576
parent 0.02644675538395426
read 0.03566666531397798
run 0.024305215139150137
thank 0.02706597820559148
wait 0.023026930020291957
wear 0.0

fear
lie
track
land
review
coffee
press
separate
newspaper
left
spelling
court
gather
downtown
crash
normal
physically
fairly
fiction
convince
speaker
operation
report
page
monthly
pressure
theory
reactionary
sky
possibility
slightly
novel
sponsor
adjust
climate
liberation
rainy
separatism
disperse
desert
layout
aberration
faraway
global
madness

new text ****

almost 0.02207191138527567
bad 0.04268994238807013
block 0.03880807010072005
close 0.03591508943412632
early 0.027960125029060673
fun 0.02950012908954094
good 0.014094601785189257
hear 0.042425054159283125
home 0.023960277561926886
least 0.027143202901816075
love 0.018363151374426793
moment 0.0295746719805785
old 0.007960437551134581
play 0.018458261705028785
run 0.014977721447935988
type 0.03741400043481755
enjoy 0.03347455898389308
full 0.026625390924989483
hippie 0.03768238706114947
last 0.016048442804240153
month 0.017610101759625383
mostly 0.0419892305275309
rain 0.047820815720168906
rent 0.032683736080743875
sit 0.01774809

eye 0.044054994015575194
gay 0.012967042458521438
incredible 0.04995262600216329
job 0.029033421284402106
keep 0.01804942550836718
next 0.021448600523942155
political 0.028683907133050184
raise 0.03167133405191932
seem 0.023726226384033103
support 0.02335779854965365
sure 0.015705345374173656
travel 0.035896668629222646
culture 0.03624988526824102
focus 0.045440467093939764
interesting 0.049942830439795326
road 0.044324225910987156
sound 0.03812026476036994
speak 0.03268091646853474
water 0.04756464145330358
crazy 0.044324225910987156
meeting 0.030055020071451683
person 0.013342921533793786
piece 0.03438280677358702
set 0.032533407955717915
somewhere 0.033746392887245825
art 0.04889273813530674
sometimes 0.027291751268044034
story 0.021284071601598672
head 0.03504098256909566
lead 0.04251419056460015
program 0.029926258609903996
study 0.030712072481781413
carry 0.04923959016301643
couple 0.013397222745338281
ask 0.01474863834809468
everywhere 0.04602383800232853
imagine 0.0377313673572

raid
someday
bar
buy
concerned
gay
male
physical
reason
section
accept
housing
detail
egg
test
closet
pass
lie
protection
context
idea
police
public
listen
card
blow
paint
property
quote
alley
nurse
restaurant
concern
positive
sexuality
formally
act
assault
illegal
include
low
employment
discrimination
credit
radical
example
status
lawyer
progress
rating
physician
risk
recall
accommodation
nursing
spray
purchase
mid
civil
arc
return
edge
careful
moral_turpitude
perhaps
ordinance
achieve
honest
spatter
prohibit

new text ****

almost 0.03169475222488168
background 0.041168939382254216
bad 0.030650882990292002
dress 0.046933086073989085
good 0.0050598802193900835
home 0.034406402205441805
house 0.015589231876585261
play 0.026505635195381728
school 0.041067935625824836
walk 0.02480035411116088
community 0.028176325525809728
ever 0.03732376893604004
fall 0.03090702695987737
graduate 0.031036279953589745
lesbian 0.0201576463631236
man 0.013746155063405818
matter 0.04014417296407413
work 0.0

psychology
immigration
recording_studio_interviewer_curator
government
prejudice
immigrate
homosexual
extreme
luck
trek
socialist
ship
seamstress
urge
deviant_behavior
bullying
fascist
fitter
machinist
ten
turner
unsteady

new text ****

age 0.03626636126762073
brother 0.045208409417153005
city 0.04624937607535136
dad 0.048461335432324656
family 0.04050524619102498
friend 0.03773374823920339
good 0.012888214794806332
hear 0.02586253560142899
house 0.03970792987184411
middle 0.048692933999389884
mother 0.03470494119030979
read 0.040195620487957336
strong 0.0475544869810796
world 0.03195563443373969
class 0.027286798006601304
college 0.03512182444452678
door 0.04305160696561537
ever 0.0237672005137598
fact 0.03792460558439558
history 0.04541338718182541
lesbian 0.030806588079707466
okay 0.04123121306931258
work 0.004458334680612649
bring 0.02537170591486715
die 0.036560314416925116
gay 0.03603799301889074
keep 0.02508147376497809
support 0.0324579866134354
sure 0.04364828207852378
unders

old 0.008208224864465626
strong 0.026812286772432373
type 0.03857859906261392
wonderful 0.015267539116269855
write 0.037033030936934545
begin 0.0365304470205671
circle 0.03390085796825959
college 0.03960494534836841
date 0.02607643596011813
fact 0.02138274356875671
full 0.027454168745468917
huge 0.02693849629483353
lesbian 0.0057898151959312845
okay 0.023247083061363156
open 0.037033030936934545
picture 0.0294119809687392
transition 0.041273619670539635
turn 0.01895796990829023
work 0.025137091276292694
experience 0.02618743766325873
eye 0.03451653324501623
job 0.0454946402047102
keep 0.02828299535769966
raise 0.024814091549543813
relationship 0.014581877867458664
shoot 0.04772030889333031
stay 0.02795922946047605
sure 0.012304940401092214
visit 0.02526099683691401
able 0.015742600435226756
focus 0.03560203396151931
girlfriend 0.028826200547983228
help 0.028610274154188462
instead 0.041605104440843456
period 0.02897052652891624
show 0.020283852363536956
speak 0.02560508666429449
term 0

teach
fill
introduce
teaching
encourage
skill
yogurt
eventually
project
connect
program
ed
chance
shop
busy
cabinet
bi
suppose
tool
partially
option
union
gain
credit
construction
advisor
slow
snuck
trade
product
expertise
evolve
passion
passionate
hundred
vary
design
woodshop
ego
networking
nontraditional
panel
addition
coordinator
fruit
cup
woodworke
limited
funky
offshoot
woodworker
soysage
subbing

new text ****

bit 0.022982995189283553
early 0.024345429049106925
friend 0.03593091822928747
good 0.012272446113173505
grow 0.02491148435696727
home 0.02086268343845087
interested 0.043638555996946375
mother 0.03304682047901364
old 0.013862618096275673
parent 0.028380954494251796
run 0.026082791435948226
young 0.022457792240731498
class 0.025983098781998292
ever 0.022631659404345753
lesbian 0.009778240512960673
night 0.03317845612376488
picture 0.04967298853978261
work 0.0042453259038140925
around 0.024345429049106925
experience 0.02211357834611512
real 0.03611265685806315
second 0.0384

secular
segregate
sensuous
tailor
unappeale

new text ****

adult 0.03298456106042971
age 0.03446646127476625
almost 0.019181060340731994
bit 0.011469143183521671
care 0.017095066826121476
city 0.02197700946229747
decide 0.026637364294938234
difficult 0.02698042720220598
dinner 0.02926803359554954
evening 0.04537213551798992
experiment 0.044160653018168995
follow 0.026395912934589153
freedom 0.046711367288967805
friend 0.03137839408497557
grow 0.012431512022303323
hear 0.012289488807493974
home 0.02082209925881625
house 0.01886861239165663
interested 0.04355366591074052
love 0.007979026111754136
mom 0.037098661126113375
morning 0.02597451703462005
mother 0.016491267248346788
old 0.006917825572134254
play 0.016040705554180884
run 0.013016026289920149
small 0.017233230637383124
walk 0.030017403846700895
wear 0.024914657948801922
white 0.027910911680187403
wild 0.038627348745032444
young 0.01120705254791296
away 0.01477758745666212
boy 0.044774151046614835
door 0.020457477569188004
enjoy 

natural_food
website
migration
generous
transportation
replacement
intimate

new text ****

age 0.04053692980794056
close 0.03670825812118662
early 0.028577611885108125
family 0.045274967333568755
friend 0.03163283240236634
fun 0.030151626246494343
grow 0.029242069630375577
hear 0.028907995003802898
love 0.037537386719152924
old 0.01627248050826096
small 0.04053692980794056
thank 0.03409469642422695
world 0.03571859058177023
community 0.013370033781182386
lesbian 0.0459123167203137
man 0.03913637867168106
month 0.035998025961670094
buy 0.04274014594087592
figure 0.039257617320040496
next 0.03331466867269671
relationship 0.028907995003802898
support 0.036280097563320436
town 0.030970838259093806
help 0.028359367353926843
show 0.04021193347748111
question 0.03728884661645686
story 0.03305911696297884
break 0.04569557392784328
ask 0.022908068029507782
deal 0.04053692980794056
agree_record_project
available_oral_history
background
bear
decide
interested
irish
moment
outside
parent
roll
wal

deal
therapist
blood
forever
greet
song
sing
perform
performance
theater
singe
audience
comedy
musical
boyfriend
sore
personal
routine
selection
memorize
boarding
crowd
attorney
queer
final
pain
ban
hood
genetically
cheese
unreal
genitalia
menopause
cope
resident
sting
publicize
grand
inherit
unity
kit
segregated
creatively
fan
disgusting
performer
hardcore
shaft
rehearse
layer
sexy
bloody
thread
pantie
parody
mood
broadcasting
cre
shtick
bedtime
cease
clit
clitoris
copyright
cramp
depart
desist
dysmenorrhea
entertainer
finale
irritable
periodic
slut
survivable
tender
tylenol

new text ****

baby 0.02715639123775714
bit 0.012309806138157069
city 0.02358787588998053
decide 0.028589824475578014
fun 0.041273194303489316
good 0.006573182966360548
hear 0.026380562581981837
house 0.020251640154866538
one 0.014294912237789007
roll 0.037941181304581365
school 0.00889174720924529
thank 0.031113789545579657
away 0.015860752095460014
class 0.013916676492371252
community 0.01830163313858123
cut 0.

house 0.011185541296879523
learn 0.03545141458271912
love 0.01892025215831358
middle 0.027433151306263736
one 0.015790951246250913
outside 0.0314659207902842
parent 0.03358368746199792
read 0.022645843002787727
wait 0.029240986645945075
walk 0.03558935902471034
away 0.01752066461843624
class 0.01537313110744012
ever 0.026780444482086514
girl 0.04273274817410405
lesbian 0.008678074235893701
open 0.018502340217278344
phone 0.02938946473349473
poster 0.04872352722932316
teach 0.020350006740378995
wrong 0.03079601477061604
around 0.014404189268529512
buy 0.021542638131276475
enough 0.019630328168804348
environment 0.03801077942188212
eye 0.03449010923830977
finally 0.021901474226171655
fine 0.028804132754813073
gay 0.010151736957087997
information 0.03882559428862608
keep 0.014130671706645356
miss 0.03603149281340013
political 0.044912551346649725
raise 0.02479509521185064
stay 0.013968912686937111
understand 0.021901474226171655
book 0.01924487483044272
drive 0.021366374087052026
explain 

world 0.016413976621545635
boring 0.04837442015460135
class 0.028031667810312782
community 0.006144011237263291
door 0.022113410757618454
girl 0.019479932114455572
mostly 0.01972169497294741
okay 0.021178367426907097
work 0.013740112349264633
bring 0.03909644685029252
cool 0.0256218320753617
else 0.028683390117561682
honestly 0.04393956712324217
job 0.041446154929073276
next 0.030618576144337974
political 0.02047360604460071
raise 0.02260593068016224
real 0.038959864228911144
second 0.02073367377022656
support 0.016672009268351794
sure 0.0112099461378723
beautiful 0.024312285966678365
book 0.035091481008506786
famous 0.04773524283298016
feminist 0.026130773022585495
help 0.013032148950097508
later 0.013284263080927606
laugh 0.03730526363090124
speak 0.023326536578496225
affirmative 0.04623352807252503
hold 0.02280799071328186
mind 0.0256218320753617
person 0.009523727632354258
alone 0.039521061309309234
copy 0.044921401470900325
invite 0.02853253354773553
stand 0.023432636214800708
uni

healing
grandkid
gut
ball
tune
abandon
certificate
consistent
yak
blessing
bond
lesson
thereafter
blast
wisdom
bean
spill
ceremony
indeed
commitment_ceremony
gig
mellow
thorough
bitch
knowingly
umpteen
annot
consistency
macklemore
plight

new text ****

age 0.03568028911545954
almost 0.03971314449966166
bit 0.02374611895502351
decide 0.027575469722302123
early 0.025153790854933356
friend 0.018561981399920404
good 0.012679938488105233
learn 0.030954101313898048
love 0.01652005735416882
mother 0.034144101920443704
parent 0.029323311253698293
thank 0.030009885590347472
young 0.023203477250183343
fall 0.03872613418788544
full 0.04790604577496954
girl 0.03731173584952766
lesbian 0.005051457838268651
turn 0.03308063641070677
work 0.008772574077883247
job 0.019846424202174393
later 0.0254445914964656
person 0.01824168624579321
question 0.032821351650479114
study 0.04198780520081672
sister 0.045923060297265665
easy 0.048837725025597925
agree_record_project
available_oral_history
bear
cousin
di

straight 0.030130049494531214
able 0.042389248218486705
girlfriend 0.03880937508551897
help 0.019259334214045635
later 0.019631916688688404
struggle 0.04793183505124317
build 0.03494678788532996
mind 0.03786477801973353
basically 0.03526909902028094
eventually 0.03416270687408612
forget 0.04084158724186388
hire 0.03959802470408488
couple 0.028263506598503227
share 0.0382373604943763
self 0.044331036809581566
bedroom
curious
difficult
friend
house
kitchen
school
window
graduate
style
attract
box
buy
cute
duplex
finally
information
keep
visit
bath
lover
ride
survive
art
foot
housing
anniversary
distance
roommate
starflower
undergraduate
bank
excuse
steady
bicycle
relative
band
originally
paint
drag
mill
temp
brief
bungalow
heterosexual
master_degree
difficulty
employment
twice
cheap
economic
credit
employ
thesis
regularly
average
era
chemistry
square
loan
classic
improve
heel
purchase
washer
pleasure
affordable
anyways
remodel
upgrade
temporary
complex
accomplish
fortieth
demolish
trim
g

draw
movie
rain
environment
minute
summer
tent
wash
water
bunch
dry
set
winter
money
camp
carpentry
anti
land
peace
alien
crew
dyke
space
inheritance
carpenter
flat
sub
boom
seasonal
influx
tricky
proximity
dysfunctional
infrastructure
rain_rain
stapler
occupy
frog
nuke
ammunition
columbian
diem
displace
flat_machine
raining
refill
whoosh

new text ****

anymore 0.018036222732430565
bad 0.01766375928429083
bear 0.017960807491768375
close 0.014860537610270204
cry 0.036432356231169055
curious 0.04448130054222608
dress 0.02704700987382379
eat 0.0283932968589789
family 0.009164291487804656
father 0.020273360514679543
friend 0.012805862214514232
good 0.011663808345222444
home 0.009914011393862743
house 0.008983899073387583
interested 0.020737176147152828
love 0.007598096118993908
old 0.006587558794184045
parent 0.026973433887812946
play 0.03054979916348502
read 0.03637695531562376
side 0.04243895057303186
small 0.016410491830667968
write 0.014860537610270204
away 0.028144169057713025
breast

senior
stop
transition
feminist
series
southern
speak
struggle
affirmative
piece
practically
amaze
forget
program
abortion
action
control
push
thousand
assist
worry
conversation
folk
therefore
particular
sake
realistic
serious
tour
at_all
chorus
rally
negotiate
ridiculous
true
movement
setting
domestic_violence
gain
assisted_living
trans
convert
condo
privileged
commitment
circumstance
truth
supportive
broaden
profession
remove
essentially
grab
demographic
attempt
reality
theory
correct
devote
lecture
advise
income
dialogue
theoretically
unusual
outcome
courthouse
discriminatory
mayor
evidence
climate
perfectly
growth
preserve
appropriately
heartbeat
grassroot
conviction
disappear
bureaucracy
disparity
orange
womb
enact
cater
reproductive
assisted
contrary
disappearance
fetal
institutional
older
reconstruct
savior
stubborn

new text ****

age 0.030359906401067376
anymore 0.03336755775729284
background 0.04389233216083136
brother 0.03784562416718966
decide 0.04692723632510158
early 0.02

drink 0.03178414511668227
else 0.026672068479514314
enough 0.016642186667923758
experience 0.011092086303202112
figure 0.016775275133315174
finally 0.018567617374522484
free 0.02742352717281619
job 0.009634972347672188
keep 0.023959383079377665
next 0.028471556406681416
raise 0.02102076946529695
real 0.018113970531500642
spend 0.017747860176617676
straight 0.01895836272601821
support 0.015502943378520609
sure 0.010423888174150514
able 0.026672068479514314
ago 0.04112557153289288
drive 0.018113970531500642
girlfriend 0.024419548669341144
help 0.012118315436480197
later 0.037058252857368815
light 0.03339772138292186
store 0.02789387637776891
teaching 0.03442037720415076
term 0.017747860176617676
truck 0.0290637733766856
collective 0.0483569555952934
hold 0.02120866075074998
individual 0.041306799779264755
lose 0.03955528376477118
meeting 0.01994799610149582
person 0.017711819596587422
somewhere 0.022398019104760202
eventually 0.02149578243272107
general 0.03524494569081457
th 0.048456662

bit 0.035460215553911205
early 0.012520766690385036
good 0.006311674942898775
hear 0.012665518112031105
home 0.010729603135820913
kitchen 0.03835221289794222
learn 0.030815957952186387
love 0.008223165447953129
new 0.016662513018465868
parent 0.014596222927769551
play 0.033063026446201606
value 0.03449771394414326
wonderful 0.013261069319116445
world 0.015649458078642993
away 0.015229746697872937
community 0.011715679692884718
ever 0.011639381119408385
exciting 0.028764919594796375
hang 0.022341207531285247
huge 0.02339822180890064
kill 0.035849414062978195
lesbian 0.02514456982012865
man 0.00857345023133352
mostly 0.018803111874413164
okay 0.04038387294720924
sit 0.031790944528255365
turn 0.03293300265209771
view 0.03475712579469443
work 0.008733423437982769
bring 0.012425146774089747
enough 0.017063560779105504
experience 0.011372933892549942
feeling 0.02275383598656819
gay 0.03529737844241316
incredible 0.03399381065171073
job 0.009878926341836078
keep 0.01228301297066985
next 0.014

person 0.005641877627497707
piece 0.014538314402522156
wet 0.0365123996045099
empty 0.03139583384255813
head 0.014816615319321812
area 0.028275859946785747
couple 0.0056648381755143445
stand 0.013881546293328413
university 0.014269215220751254
chance 0.049470379404453625
control 0.024735189702226813
doctor 0.01818799003067828
less 0.03380547384238947
ready 0.01862807065802688
skin 0.03139583384255813
yet 0.014269215220751254
advice 0.02159608651709855
extremely 0.024735189702226813
plan 0.015634971546825752
course 0.016669737370559257
guy 0.008760775153409422
hit 0.01958676672201678
paper 0.01829579967135475
send 0.0286713211202879
sister 0.042609923718612636
idea 0.028169674266936368
review 0.03555370354052
embarrassed 0.030377067329724523
awful 0.020673646677307533
bed 0.021276849174323462
horrendous 0.040337998315799636
horrible 0.017373359831563732
floor 0.018081589706752707
upset 0.02341236543330652
quick 0.028657115086559153
curtain 0.0365123996045099
desk 0.027916649490525195
re

twenty_thirty
sadly
compensation
determined
comp
island
ass
accomplish
formulate
remarry
calling
dawning
seminal
waterworks

new text ****

bear 0.029149761364379988
bit 0.01772535807544697
church 0.04854009870803359
dinner 0.045233228615603596
early 0.018776118771369803
friend 0.04156693153014442
fun 0.019810280783088843
good 0.014197464257338753
hear 0.018993187737860358
home 0.032180186378289594
house 0.014580553476281295
kid 0.020039151817574166
least 0.03645505883755411
mom 0.028667662529700332
moment 0.03972067744006793
morning 0.040143156982897346
old 0.032074131451878506
parent 0.0437769382789712
run 0.020116038575553306
school 0.012803565014048644
strong 0.034923540094010566
tree 0.04758163829899868
write 0.04823626390787848
away 0.022838500223531094
college 0.025793116949221793
community 0.026353217679136082
date 0.03396507968497566
ever 0.01745439458520335
exciting 0.04313582068225491
graduate 0.029028122947342798
last 0.02155408589015679
lesbian 0.03770675079827861
man 0.03

college 0.023798511313671798
divorce 0.04669615054150113
fight 0.04003098777367809
full 0.03299428087827992
last 0.019887288454613333
lesbian 0.02087451169026032
phone 0.03534716968727621
room 0.025697672866975108
sit 0.021993497600299786
sudden 0.04636356486198503
teach 0.02447527193544285
though 0.036079609177219606
wife 0.03890482370610609
experience 0.015735964941328028
figure 0.023798511313671798
raise 0.029821449482426033
reason 0.03105308315597713
relationship 0.017524426929557257
second 0.027351592538789275
sure 0.014788015020528957
interesting 0.015675243239708835
later 0.017524426929557257
laugh 0.049212618163682796
light 0.04738020949676375
period 0.034816631978503425
road 0.04173530113020418
show 0.02437703098501406
sorry 0.040264867289388544
southern 0.046036972044478035
week 0.024974626133760284
person 0.012563577518260314
main 0.04809110104165128
story 0.04008178910347785
couple 0.012614707061280324
ask 0.027774376204757496
deal 0.02457404826154126
therapist 0.0492126181

originally
uniform
organize
theme
birthday
lace
ballroom
biology
chuckle
dykey
facility
friendly
purpose
socialize
shake
butch
plaid
flyer
kinda
respect
ninety
chemistry
tough
campground
product
avenue
grab
variety
planning
extreme
wholesale
mid
alcoholic
themed
ugly
distribution
nail
latin
recovery
flannel_shirt
technology
dip
butchy
lipstick
hysterical
campsite
manufacture
associat
autonomy
campout
command
hazard
migrating
regulatory
winner

new text ****

care 0.03949947524809272
friend 0.010357458366631803
good 0.014150637494181018
mother 0.03810434958291092
college 0.03856206726544026
community 0.026266298222229034
lesbian 0.011274715370825186
okay 0.04526988096321127
pay 0.04625223419410021
work 0.04895036193725458
around 0.0280712857027271
finally 0.04268220368757638
gay 0.019783988059915485
stay 0.027223006562971386
visit 0.049191647691910374
later 0.028395815231419867
describe 0.04605311733979602
hold 0.04875328696434309
lose 0.045463740568147774
question 0.03662817841826705
a

## Generating the Model

In [53]:
num_topics = 20

In [50]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=num_topics,
                                           random_state=100,
                                           update_every=1,
                                           chunksize=100,
                                           passes=70,
                                           alpha="auto")

lda_model.save("models/num_topics_"+str(num_topics)+".model")

https://neptune.ai/blog/pyldavis-topic-modelling-exploration-tool-that-every-nlp-data-scientist-should-know

## Vizualizing the Data

In [54]:
lda_model = gensim.models.ldamodel.LdaModel.load("models/num_topics_"+str(num_topics)+".model")

In [55]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda_model, corpus, id2word, mds="mmds", R=30)
vis

  default_term_info = default_term_info.sort_values(


For making binder notebook: https://www.youtube.com/watch?v=owSGVOov9pQ

In [91]:
topic_dict = {i: [] for i in range(num_topics)}  # Assuming you have 20 topics. 

#Loop over all the documents to group the probability of each topic
for docID in range(len(corpus)):
    topic_vector = lda_model[corpus[docID]]
    for topicID, prob in topic_vector:
        topic_dict[topicID].append((docID, prob))

for topicID, probs in topic_dict.items():
    doc_probs = sorted(probs, key = lambda x: x[1], reverse = True)
    docs_top_5 = [dp[0] for dp in doc_probs[:5]] 

In [93]:
#Then, you can sort the dictionary to find the top documents:
for topicID, probs in topic_dict.items():
    print("Topic " + str(topicID+1)+" top documents:")
    doc_probs = sorted(probs, key = lambda x: x[1], reverse = True)
    #can specify top scoring document numbers (ex. 5)
    for p in doc_probs[:5]:
        print(str(p))

Topic 1 top documents:
(92, 0.7267653)
(68, 0.50812095)
(32, 0.372207)
(258, 0.3277254)
(41, 0.29305917)
Topic 2 top documents:
(72, 0.23073047)
(79, 0.1922666)
(215, 0.19149719)
(182, 0.17418672)
(355, 0.16248797)
Topic 3 top documents:
(59, 0.81092376)
(97, 0.8033117)
(88, 0.803253)
(98, 0.6893622)
(87, 0.6840705)
Topic 4 top documents:
(46, 0.736441)
(77, 0.51440734)
(45, 0.48379076)
(665, 0.46761337)
(34, 0.4634188)
Topic 5 top documents:
(94, 0.71506315)
(95, 0.67164725)
(18, 0.64156026)
(566, 0.5616994)
(91, 0.52420306)
Topic 6 top documents:
(82, 0.5434546)
(84, 0.524566)
(554, 0.38239452)
(551, 0.3708746)
(153, 0.29526973)
Topic 7 top documents:
(246, 0.3357264)
(310, 0.32365546)
(487, 0.29709834)
(247, 0.28422758)
(680, 0.2753085)
Topic 8 top documents:
(93, 0.6024506)
(15, 0.53430915)
(67, 0.4847168)
(702, 0.39576358)
(10, 0.38731843)
Topic 9 top documents:
(184, 0.14429308)
(217, 0.13262096)
(87, 0.119003266)
(395, 0.103931084)
(216, 0.06638894)
Topic 10 top documents:
(259,

In [97]:
for d in data:
    print(d[0:200])

 
 
Long:  This interview is part of the Eugene Lesbian Oral History Project. 
The recordings will be made available through the University of 
Oregon Libraries’ Special Collections and University Arc
 eighth grade, all in the same school 
and that was Meadow Park Elementary. And then you'd go to high  
school in eighth grade and that was South Torrance High. And my 
mother made all my clothes, alm
hen I remember seeing the 
maid who was cleaning rooms. We both looked at each other. And 
we went— because she knew Dolly Parton was there too.  
 And another delivery was to some agent and he had a 
y, 1970, that's when I came out as a lesbian. And I 
remember early times of stopping racist comments and jokes. I was 
living in Los Angeles, and I had some black friends, and there was  
an early un
 candy. And that was as far as Ruth could give us a ride.  
 So we had to hitchhike to the airport, but we decided to hitchhike 
to WomanShare  if I remember this right. And we finally visited 
Wo

In [89]:
import pandas as pd

##dominant topic for each document
def format_topics_sentences(ldamodel=lda_model, 
                            corpus=corpus, 
                            texts=data_bigrams_trigrams, 
                            n=1):
    """
    A function for extracting a number of dominant topics for a given document
    using an existing LDA model
    """
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            # we use range here to iterate over the n parameter
            if j in range(n):  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(
                    # and also use the i value here to get the document label
                    pd.Series([int(i), int(topic_num), round(prop_topic, 4), topic_keywords]),
                    ignore_index=True,
                )
            else:
                break
    sent_topics_df.columns = ["Document", "Dominant_Topic", "Perc_Contribution", "Topic_Keywords"]

    # Add original text to the end of the output
    text_col = [texts[int(i)] for i in sent_topics_df.Document.tolist()]
    contents = pd.Series(text_col, name='original_texts')
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return sent_topics_df

In [90]:
format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=data_bigrams_trigrams, n=2)

Unnamed: 0,Document,Dominant_Topic,Perc_Contribution,Topic_Keywords,original_texts
0,0.0,19.0,0.0336,"child, baby, daughter, cool, adopt, pregnant, ...","[long, interview, part, recording, make, avail..."
1,0.0,17.0,0.0353,"class, teach, middle, sign, department, surger...","[long, interview, part, recording, make, avail..."
2,1.0,16.0,0.0118,"law, doctor, office, lawyer, person, nurse, ho...","[eighth_grade, same, school, then, go, high, s..."
3,1.0,14.0,0.0212,"job, fun, land, marry, city, wedding, beautifu...","[eighth_grade, same, school, then, go, high, s..."
4,2.0,19.0,0.0167,"child, baby, daughter, cool, adopt, pregnant, ...","[remember, see, maid, clean, room, look, other..."
...,...,...,...,...,...
1415,707.0,14.0,0.1686,"job, fun, land, marry, city, wedding, beautifu...","[party, raiskin, know, summarize, say, generat..."
1416,708.0,19.0,0.0142,"child, baby, daughter, cool, adopt, pregnant, ...","[ound, too, busy, organize, party, mostly, jus..."
1417,708.0,16.0,0.0141,"law, doctor, office, lawyer, person, nurse, ho...","[ound, too, busy, organize, party, mostly, jus..."
1418,709.0,15.0,0.0219,"straight, movement, seem, hire, night, quite, ...","[think, struggle, lesbian, community, pandemic..."


In [139]:
print(corpus[0][0:100])
print(id2word[2])
print(data[0][1000:2000])
print(lemmatized_texts[0][1000:2000])

[(3, 2), (9, 6), (16, 2), (20, 4), (26, 1), (27, 4), (28, 3), (34, 2), (38, 1), (42, 3), (61, 1), (65, 3), (72, 2), (76, 1), (86, 2), (95, 3), (102, 2), (112, 3), (118, 8), (119, 3), (132, 1), (134, 2), (140, 1), (141, 3), (144, 2), (153, 1), (154, 12), (156, 1), (161, 1), (164, 3), (167, 2), (169, 3), (170, 2), (194, 10), (203, 2), (215, 24), (218, 2), (220, 3), (225, 4), (234, 2), (237, 1), (245, 2), (252, 2), (255, 1), (258, 6), (266, 1), (267, 1), (272, 2), (275, 2), (277, 1), (280, 8), (285, 1), (286, 4), (288, 3), (307, 5), (320, 4), (322, 6), (323, 11), (325, 1), (327, 7), (329, 1), (333, 2), (334, 1), (336, 7), (345, 4), (348, 1), (349, 4), (353, 2), (354, 5), (358, 2), (361, 5), (362, 2), (363, 1), (368, 4), (373, 5), (378, 3), (380, 2), (394, 14), (395, 4), (397, 2), (399, 4), (401, 2), (410, 4), (413, 2), (414, 4), (418, 2), (419, 2), (423, 6), (424, 2), (433, 12), (434, 2), (441, 1), (444, 4), (445, 2), (448, 4), (452, 6), (458, 8), (462, 2), (467, 2), (475, 22)]
able
ect. 

Saving Models

https://www.youtube.com/watch?v=xADAr8pPQMI&list=PL2VXyKi-KpYttggRATQVmgFcQst3z6OlX&index=12

In [136]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus[:-1],
                                           id2word=id2word,
                                           num_topics=20,
                                           random_state=100,
                                           update_every=1,
                                           chunksize=100,
                                           passes=30,
                                           alpha="auto")


In [132]:
test_doc = corpus[-1]

vector = lda_model[test_doc]
print (vector)

def Sort(sub_li):
    sub_li.sort(key = lambda x: x[1])
    sub_li.reverse()
    return (sub_li)
new_vector = Sort(vector)
print (new_vector)

[(0, 0.23957382), (1, 0.114857174), (2, 0.068196125), (3, 0.012150145), (4, 0.22865918), (5, 0.036370933), (7, 0.024131227), (8, 0.035303768), (9, 0.046170343), (10, 0.037606623), (11, 0.01454134), (12, 0.049493156), (15, 0.017285148), (16, 0.019996101), (18, 0.055496957)]
[(0, 0.23957382), (4, 0.22865918), (1, 0.114857174), (2, 0.068196125), (18, 0.055496957), (12, 0.049493156), (9, 0.046170343), (10, 0.037606623), (5, 0.036370933), (8, 0.035303768), (7, 0.024131227), (16, 0.019996101), (15, 0.017285148), (11, 0.01454134), (3, 0.012150145)]


In [20]:
lda_model.save("models/test_model.model")

NameError: name 'lda_model' is not defined

In [134]:
new_model = gensim.models.ldamodel.LdaModel.load("models/test_model.model")

In [135]:
test_doc = corpus[-1]

vector = new_model[test_doc]
print (vector)

def Sort(sub_li):
    sub_li.sort(key = lambda x: x[1])
    sub_li.reverse()
    return (sub_li)
new_vector = Sort(vector)
print (new_vector)

[(0, 0.2395575), (1, 0.11485863), (2, 0.068188906), (3, 0.0121496385), (4, 0.22869575), (5, 0.036369193), (7, 0.024140326), (8, 0.035302002), (9, 0.046137094), (10, 0.037605748), (11, 0.014545072), (12, 0.049502864), (15, 0.017285211), (16, 0.020000387), (18, 0.055493727)]
[(0, 0.2395575), (4, 0.22869575), (1, 0.11485863), (2, 0.068188906), (18, 0.055493727), (12, 0.049502864), (9, 0.046137094), (10, 0.037605748), (5, 0.036369193), (8, 0.035302002), (7, 0.024140326), (16, 0.020000387), (15, 0.017285211), (11, 0.014545072), (3, 0.0121496385)]
