In [164]:
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import numpy as np
import textblob
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import os
from os.path import isfile, join
from os import listdir
import pickle
import pandas as pd

In [2]:
doc_a = "Brocolli is good to eat. My brother likes to eat good brocolli, but not my mother. It helps you with heart problems."
doc_b = "My mother spends a lot of time driving my brother around to baseball practice. Baseball if one of my brothers favorite activities"
doc_c = "Some health experts suggest that driving may cause increased tension and blood pressure. Also, driving may cause skin cancer."
doc_d = "I often feel pressure to perform well at school, but my mother never seems to drive my brother to do better."
doc_e = "Health professionals say that brocolli is good for your health. It can also counter the negatives of stress."

# compile sample documents into a list
doc_set = [doc_a, doc_b, doc_c, doc_d, doc_e]

In [3]:
blob  = textblob.blob.TextBlob("".join(doc_set)) # tokenize the blob

In [4]:
blob.tokenize()

WordList(['Brocolli', 'is', 'good', 'to', 'eat', '.', 'My', 'brother', 'likes', 'to', 'eat', 'good', 'brocolli', ',', 'but', 'not', 'my', 'mother', '.', 'It', 'helps', 'you', 'with', 'heart', 'problems.My', 'mother', 'spends', 'a', 'lot', 'of', 'time', 'driving', 'my', 'brother', 'around', 'to', 'baseball', 'practice', '.', 'Baseball', 'if', 'one', 'of', 'my', 'brothers', 'favorite', 'activitiesSome', 'health', 'experts', 'suggest', 'that', 'driving', 'may', 'cause', 'increased', 'tension', 'and', 'blood', 'pressure', '.', 'Also', ',', 'driving', 'may', 'cause', 'skin', 'cancer.I', 'often', 'feel', 'pressure', 'to', 'perform', 'well', 'at', 'school', ',', 'but', 'my', 'mother', 'never', 'seems', 'to', 'drive', 'my', 'brother', 'to', 'do', 'better.Health', 'professionals', 'say', 'that', 'brocolli', 'is', 'good', 'for', 'your', 'health', '.', 'It', 'can', 'also', 'counter', 'the', 'negatives', 'of', 'stress', '.'])

In [5]:
no_features=1000

In [6]:
# LDA can only use raw term counts for LDA because it is a probabilistic graphical model
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=1, max_features=no_features, stop_words='english')
tf = tf_vectorizer.fit_transform(doc_set)
tf_feature_names = tf_vectorizer.get_feature_names()

In [7]:
tf

<5x38 sparse matrix of type '<class 'numpy.int64'>'
	with 47 stored elements in Compressed Sparse Row format>

In [8]:
tf_feature_names

['activities',
 'baseball',
 'better',
 'blood',
 'brocolli',
 'brother',
 'brothers',
 'cancer',
 'cause',
 'counter',
 'drive',
 'driving',
 'eat',
 'experts',
 'favorite',
 'feel',
 'good',
 'health',
 'heart',
 'helps',
 'increased',
 'likes',
 'lot',
 'mother',
 'negatives',
 'perform',
 'practice',
 'pressure',
 'problems',
 'professionals',
 'say',
 'school',
 'skin',
 'spends',
 'stress',
 'suggest',
 'tension',
 'time']

In [9]:
lda_model = LatentDirichletAllocation(n_topics=3, max_iter=5, learning_method='online', learning_offset=50.,random_state=0).fit(tf)


In [10]:
lda_W = lda_model.transform(tf) # documents as the rows, topics as the columns
lda_H = lda_model.components_ # topics as the rows, words as the columns

In [11]:
lda_W

array([[ 0.02908081,  0.94264018,  0.02827901],
       [ 0.02893521,  0.94213988,  0.02892491],
       [ 0.02695629,  0.02646135,  0.94658236],
       [ 0.0416116 ,  0.91698643,  0.04140197],
       [ 0.92633112,  0.03686845,  0.03680043]])

In [12]:
lda_H[0]

array([ 0.95211094,  0.84706215,  0.96155505,  0.74732936,  1.08121608,
        0.8494759 ,  0.8734277 ,  0.82473556,  0.85012361,  1.11074376,
        0.93095234,  0.80411069,  0.64691762,  0.86434555,  0.99410168,
        0.71602261,  1.20676853,  1.4730662 ,  0.75393777,  0.68147528,
        0.9095761 ,  0.90751579,  0.74243208,  0.71969034,  1.04923259,
        0.78538113,  0.70509772,  0.80305647,  0.753064  ,  1.11442345,
        1.08382812,  0.84827887,  0.77094035,  0.78994215,  1.02729989,
        0.69760789,  0.70406667,  0.85035124])

In [13]:

def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print ("Topic %d:" % (topic_idx))
        print (" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

In [14]:
no_top_words = 10

display_topics(lda_model, tf_feature_names, no_top_words)

Topic 0:
health good professionals counter say brocolli negatives stress favorite better
Topic 1:
mother brother good baseball brocolli eat lot spends school pressure
Topic 2:
cause driving experts cancer suggest blood skin tension increased stress


In [15]:
def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
    for topic_idx, topic in enumerate(H):
        print ("Topic %d:" % (topic_idx))
        print (" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))
        top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents]
        for doc_index in top_doc_indices:
            print (f'DOC {doc_index}:',documents[doc_index])

In [16]:
no_top_words = 5
no_top_documents = 2
display_topics(lda_H, lda_W, tf_feature_names, doc_set, no_top_words, no_top_documents)


Topic 0:
health good professionals counter say
DOC 4: Health professionals say that brocolli is good for your health. It can also counter the negatives of stress.
DOC 3: I often feel pressure to perform well at school, but my mother never seems to drive my brother to do better.
Topic 1:
mother brother good baseball brocolli
DOC 0: Brocolli is good to eat. My brother likes to eat good brocolli, but not my mother. It helps you with heart problems.
DOC 1: My mother spends a lot of time driving my brother around to baseball practice. Baseball if one of my brothers favorite activities
Topic 2:
cause driving experts cancer suggest
DOC 2: Some health experts suggest that driving may cause increased tension and blood pressure. Also, driving may cause skin cancer.
DOC 3: I often feel pressure to perform well at school, but my mother never seems to drive my brother to do better.


In [78]:
def display_percent_of_each_topic(lda_W, model, feature_names, no_top_words):
    """Print out the percent of each topic, along with the top five words from each topic.
    
    """
    for topic_idx, topic in enumerate(lda_W):
        print(f"""Document {topic_idx} has the following percent of each topic 1 =  {topic[0]:.2%},\
        topic 2 = {topic[1]:.2%}, topic 3 = {topic[2]:.2%}""")
        
    print()
    print('Topics')
    for topic_idx, topic in enumerate(model.components_):
        print ("Topic %d:" % (topic_idx))
        print (" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))
    

In [None]:
return_percent_of_topic

In [84]:
lda_W[1]

array([ 0.00606061,  0.00606061,  0.00606061,  0.00606061,  0.00606061,
        0.00606061,  0.00606061,  0.00606061,  0.00606061,  0.00606061,
        0.00606061,  0.00606061,  0.00606061,  0.91515152,  0.00606061])

In [79]:
display_percent_of_each_topic(lda_W,lda_model,tf_feature_names, 10)

Document 0 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 1 has the following percent of each topic 1 =  0.61%,        topic 2 = 0.61%, topic 3 = 0.61%
Document 2 has the following percent of each topic 1 =  1.11%,        topic 2 = 1.11%, topic 3 = 1.11%
Document 3 has the following percent of each topic 1 =  3.33%,        topic 2 = 53.33%, topic 3 = 3.33%
Document 4 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 5 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 6 has the following percent of each topic 1 =  35.56%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 7 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 8 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 9 has the following percent of each topic 1 =  1.67%,        t

Document 1249 has the following percent of each topic 1 =  17.78%,        topic 2 = 1.11%, topic 3 = 1.11%
Document 1250 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 1251 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 1252 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 1253 has the following percent of each topic 1 =  26.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 1254 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 1255 has the following percent of each topic 1 =  13.33%,        topic 2 = 0.83%, topic 3 = 0.83%
Document 1256 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 1257 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 1258 has the following percent of 

Document 3105 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 3106 has the following percent of each topic 1 =  35.56%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 3107 has the following percent of each topic 1 =  11.85%,        topic 2 = 0.74%, topic 3 = 11.85%
Document 3108 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 26.67%
Document 3109 has the following percent of each topic 1 =  1.33%,        topic 2 = 21.33%, topic 3 = 21.33%
Document 3110 has the following percent of each topic 1 =  1.33%,        topic 2 = 21.33%, topic 3 = 1.33%
Document 3111 has the following percent of each topic 1 =  3.33%,        topic 2 = 53.33%, topic 3 = 3.33%
Document 3112 has the following percent of each topic 1 =  0.74%,        topic 2 = 11.85%, topic 3 = 0.74%
Document 3113 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 3114 has the following perce

Document 4465 has the following percent of each topic 1 =  25.83%,        topic 2 = 0.83%, topic 3 = 0.83%
Document 4466 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 21.33%
Document 4467 has the following percent of each topic 1 =  0.83%,        topic 2 = 0.83%, topic 3 = 0.83%
Document 4468 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 4469 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 4470 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 4471 has the following percent of each topic 1 =  1.11%,        topic 2 = 1.11%, topic 3 = 1.11%
Document 4472 has the following percent of each topic 1 =  0.67%,        topic 2 = 0.67%, topic 3 = 20.67%
Document 4473 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 4474 has the following percent of 

Document 6799 has the following percent of each topic 1 =  0.67%,        topic 2 = 0.67%, topic 3 = 0.67%
Document 6800 has the following percent of each topic 1 =  0.95%,        topic 2 = 15.24%, topic 3 = 0.95%
Document 6801 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 6802 has the following percent of each topic 1 =  0.95%,        topic 2 = 15.24%, topic 3 = 0.95%
Document 6803 has the following percent of each topic 1 =  1.11%,        topic 2 = 1.11%, topic 3 = 1.11%
Document 6804 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 6805 has the following percent of each topic 1 =  0.83%,        topic 2 = 13.33%, topic 3 = 0.83%
Document 6806 has the following percent of each topic 1 =  0.74%,        topic 2 = 22.96%, topic 3 = 0.74%
Document 6807 has the following percent of each topic 1 =  34.44%,        topic 2 = 1.11%, topic 3 = 17.78%
Document 6808 has the following percent 

Document 8260 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 8261 has the following percent of each topic 1 =  0.95%,        topic 2 = 0.95%, topic 3 = 15.24%
Document 8262 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 8263 has the following percent of each topic 1 =  6.67%,        topic 2 = 6.67%, topic 3 = 6.67%
Document 8264 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 8265 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 8266 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 8267 has the following percent of each topic 1 =  0.61%,        topic 2 = 0.61%, topic 3 = 18.79%
Document 8268 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 8269 has the following percent of e

Document 10818 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 10819 has the following percent of each topic 1 =  21.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 10820 has the following percent of each topic 1 =  1.67%,        topic 2 = 26.67%, topic 3 = 1.67%
Document 10821 has the following percent of each topic 1 =  0.83%,        topic 2 = 0.83%, topic 3 = 0.83%
Document 10822 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 10823 has the following percent of each topic 1 =  61.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 10824 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 10825 has the following percent of each topic 1 =  0.51%,        topic 2 = 0.51%, topic 3 = 38.97%
Document 10826 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 10827 has the following 

Document 12894 has the following percent of each topic 1 =  1.33%,        topic 2 = 21.33%, topic 3 = 41.33%
Document 12895 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 12896 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 12897 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 12898 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 76.67%
Document 12899 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 12900 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 12901 has the following percent of each topic 1 =  1.67%,        topic 2 = 26.67%, topic 3 = 1.67%
Document 12902 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 12903 has the following 

Document 14871 has the following percent of each topic 1 =  1.33%,        topic 2 = 21.33%, topic 3 = 1.33%
Document 14872 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 14873 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 14874 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 14875 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 14876 has the following percent of each topic 1 =  2.22%,        topic 2 = 35.56%, topic 3 = 2.22%
Document 14877 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 14878 has the following percent of each topic 1 =  1.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 14879 has the following percent of each topic 1 =  0.95%,        topic 2 = 0.95%, topic 3 = 0.95%
Document 14880 has the following pe

Document 16923 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 16924 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 16925 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 1.33%
Document 16926 has the following percent of each topic 1 =  2.22%,        topic 2 = 2.22%, topic 3 = 2.22%
Document 16927 has the following percent of each topic 1 =  1.33%,        topic 2 = 1.33%, topic 3 = 21.33%
Document 16928 has the following percent of each topic 1 =  26.67%,        topic 2 = 1.67%, topic 3 = 1.67%
Document 16929 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 53.33%
Document 16930 has the following percent of each topic 1 =  0.95%,        topic 2 = 0.95%, topic 3 = 43.81%
Document 16931 has the following percent of each topic 1 =  3.33%,        topic 2 = 3.33%, topic 3 = 3.33%
Document 16932 has the following 

KeyboardInterrupt: 

In [19]:
doc_set

['Brocolli is good to eat. My brother likes to eat good brocolli, but not my mother. It helps you with heart problems.',
 'My mother spends a lot of time driving my brother around to baseball practice. Baseball if one of my brothers favorite activities',
 'Some health experts suggest that driving may cause increased tension and blood pressure. Also, driving may cause skin cancer.',
 'I often feel pressure to perform well at school, but my mother never seems to drive my brother to do better.',
 'Health professionals say that brocolli is good for your health. It can also counter the negatives of stress.']

In [65]:
# Test random forest with text - need to conver to categorical representation

In [37]:
df_train = pd.DataFrame({'niche':['software','hardware','software','software'],
                  'employees':[1,4,3,4],'service_level':['gold','silver','gold','bronze'],
                        'revenue':[123,321,14,234],})

In [59]:
df_train = pd.get_dummies( df_train,drop_first=True)

In [60]:
df_train

Unnamed: 0,employees,revenue,niche_hardware,niche_software,service_level_bronze,service_level_gold,service_level_silver
0,1,123,0,1,0,1,0
1,4,321,1,0,0,0,1
2,3,14,0,1,0,1,0
3,4,234,0,1,1,0,0


In [56]:
rf=  RandomForestRegressor(n_estimators=100)

In [62]:
rf.fit(X = df_train.loc[:,('employees', 'niche_hardware','niche_software','service_level_bronze',
                          'service_level_gold','service_level_silver')], y= df_train.revenue)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [64]:
rf.predict(np.array([[1,10,1,0,0,1]]))

array([ 206.58])


# Founder Topic Model
- Get all text from all founders

In [89]:
vc_zero_tweets = os.listdir("../data/raw/founders_tweets/vc_invest=0/")

In [90]:
mypath_vc_zero  = "../data/raw/founders_tweets/vc_invest=0/"
mypath_vc_one = "../data/raw/founders_tweets/vc_invest=1/"

In [123]:
print(len(listdir(mypath_vc_zero))) # vc invest zero companies

92


In [125]:
print(len(listdir(mypath_vc_one))) # vc invest one companies

123


In [167]:
r = ['here','now','there']

In [169]:
r.index('now')

1

In [172]:
vc_doc_list = []
vc_founder_company_name_handle_list = [] # founder names index corresponds to their doc in the vc doc list

# vc invest one
for f in listdir(mypath_vc_zero):

    if isfile(join(mypath_vc_zero, f)):
        founder_text = ""
        with open(join(mypath_vc_zero, f),'rb') as fp:
            founder_tweet = pickle.load(fp)
            joined_text = founder_text.join(founder_tweet )
            vc_doc_list.append(joined_text)
            vc_founder_company_name_handle_list.append(f)

            
# vc invest zero
for f in listdir(mypath_vc_one):
    if isfile(join(mypath_vc_one, f)):
        founder_text = ""
        with open(join(mypath_vc_one, f),'rb') as fp:
            founder_tweet = pickle.load(fp)
            joined_text = founder_text.join(founder_tweet )
            vc_doc_list.append(joined_text)
            vc_founder_company_name_handle_list.append(f)
            
    

In [128]:
len(vc_doc_list) # total number of companies

215

## Fit an LDA model

In [177]:
no_features=1_000

In [178]:
# LDA can only use raw term counts for LDA because it is a probabilistic graphical model
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=1, max_features=no_features, stop_words='english')
tf = tf_vectorizer.fit_transform(vc_doc_list)
tf_feature_names = tf_vectorizer.get_feature_names()

In [179]:
lda_model = LatentDirichletAllocation(n_topics=5, max_iter=5, 
                                      learning_method='online', learning_offset=50.,random_state=0).fit(tf)


In [180]:
lda_W = lda_model.transform(tf) # documents as the rows, topics as the columns
lda_H = lda_model.components_ # topics as the rows, words as the columns

In [181]:

def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print ("Topic %d:" % (topic_idx))
        print (" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

In [182]:
no_top_words = 15

display_topics(lda_model, tf_feature_names, no_top_words)

Topic 0:
la en pour khan amir et le est je au du une tu pas på
Topic 1:
new favorite organic wise cancer thanks iorganic help connecting amp remix free love just isoflow
Topic 2:
amp crowdfunding great new data conference don just cloud year free marketing webinar check miss
Topic 3:
just new like amp day today time great good love thanks don know people best
Topic 4:
woopra thanks lethalweapon amp just fresno new diversity great love summer thank like don know


## Determine the percent of each topic per founder

In [161]:
def return_percent_of_topic(lda_W):
    c = 0
    for topic_idx, topic in enumerate(lda_W):
        for topic_percent in range(len(topic)):
            print(f"""Document {topic_idx} has the following percent of each topic  =  {topic[topic_percent]:.2%}""")
            c +=1
            if c ==10:
                break

In [162]:
return_percent_of_topic(lda_W)

Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  61.36%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.00%
Document 0 has the following percent of each topic  =  0.05%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the following percent of each topic  =  0.21%
Document 1 has the foll

Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 66 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document 67 has the following percent of each topic  =  0.00%
Document

Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  0.01%
Document 159 has the following percent of each topic  =  99.87%
Document 160 has the following percent of each topic  