# NLP
Author Brian Tam, 10/16/2020

3. Prepping the words for NLP by:
    - Tokenizing with sklearn and spaCy
    - Lemmatisation
    - Count vectorizing words
    - Topic modeling

In [37]:
# Get pandas and postgres to work together
import psycopg2 as pg
import pandas as pd
import numpy as np
import pickle

# Import spacy to do NLP
import spacy

# Split the data into training and test sets
from sklearn.model_selection import train_test_split

# Import sklearn to do CountVectorizing
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()

import matplotlib.pyplot as plt

# Topic Modeling
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity

# Text Preprocessing
import re
import string

In [39]:
# Postgres info to connect
connection_args = {
    'host': 'localhost',  # We are connecting to our _local_ version of psql
    'dbname': 'myers_briggs',    # DB that we are connecting to
}

connection = pg.connect(**connection_args)  # What is that "**" there??

query = "SELECT * FROM cleaned_posts;"

df = pd.read_sql(query, connection)
df

Unnamed: 0,type,E/I,N/S,F/T,P/J,post,post_no_links,spacy_post,clean_post
0,INFJ,0,1,1,0,"""['http://www.youtube.com/watch?v=qsXHcwe3krw'...",What has been the most life-changing experienc...,life change experience life perc experience im...,life change experience life perc experience im...
1,ENTP,1,1,0,1,"""[""""I'm finding the lack of me in these posts ...",I'm finding the lack of me in these posts very...,find lack post alarming sex boring position ex...,find lack post alarming sex boring position ex...
2,INTP,0,1,0,1,"""['Good one _____ https://www.youtube.com/w...","Of course, to which I say I know; that's my bl...",course know blessing curse absolutely positive...,course know blessing curse absolutely positive...
3,INTJ,0,1,0,0,"""['Dear INTP, I enjoyed our conversation the...","Dear INTP, I enjoyed our conversation the ot...",dear intp enjoy conversation day esoteric gabb...,dear intp enjoy conversation day esoteric gabb...
4,ENTJ,1,1,0,0,"""[""""You're fired."""", """"That's another silly mi...",You're fired. That's another silly misconcepti...,fire silly misconception approach logically ke...,fire silly misconception approach logically ke...
...,...,...,...,...,...,...,...,...,...
8670,ENTP,1,1,0,1,"""['I think generally people experience post tr...",I think generally people experience post traum...,think generally people experience post trauma ...,think generally people experience post trauma ...
8671,INTJ,0,1,0,0,"""[""""Here's a planned stress relieving activity...",Here's a planned stress relieving activity tha...,planned stress relieve activity work ... day g...,planned stress relieve activity work day guy ...
8672,INFJ,0,1,1,0,"""[""""I'm not sure about a method for picking ou...",I'm not sure about a method for picking out IN...,sure method pick infj musical artist throw cau...,sure method pick infj musical artist throw cau...
8673,ISFP,0,0,1,1,"""['https://www.youtube.com/watch?v=t8edHB_h908...",Especially on websites that have become a have...,especially website haven neo nazis perc. nerd ...,especially website haven neo nazis perc nerd l...


#### Remove nulls

In [40]:
df[df.isnull().any(axis=1)]

Unnamed: 0,type,E/I,N/S,F/T,P/J,post,post_no_links,spacy_post,clean_post
3632,INFJ,0,1,1,0,"""['ENTP https://www.youtube.com/watch?v=oJwW...",,,


Remove nulls and blank posts

In [41]:
# Drop nulls
df = df.dropna()

In [42]:
df.to_csv('cleaned_df.csv')

# SpaCy and Regex preprocessing

In [17]:

# Load English tokenizer, tagger, parser, NER and word vectors
parser = spacy.load('en_core_web_sm')

# Creating our tokenizer function
def spacy_tokenizer(sentence):
    # Creating our token object, which is used to create documents with linguistic annotations.
    mytokens = parser(sentence)
    
    # Lemmatizing each token and converting each token into lowercase
    mytokens = [ word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in mytokens ]
    
    # Removing stop words
    mytokens = [ word for word in mytokens if word not in stop_words and word not in punctuations ]
    
    # return preprocessed list of tokens
    return ' '.join(mytokens)

In [18]:
alphanumeric = lambda x: re.sub('\w*\d\w*', '', x)
punc_lower = lambda x: re.sub('[%s]' % re.escape(string.punctuation), '', x.lower())

In [19]:
df['spacy_post'] = df.post_no_links.apply(spacy_tokenizer) #.map(alphanumeric).map(punc_lower)
df

KeyboardInterrupt: 

In [None]:
df['clean_post'] = df.spacy_post.map(alphanumeric).map(punc_lower)

In [None]:
df

# Spacy-made-easy template

In [114]:
nlp('LOVE')[0].pos_

'NOUN'

In [43]:
import spacy 
nlp = spacy.load('en_core_web_sm')

# Create an nlp object
doc = nlp("He went to play basketball")
 
# Iterate over the tokens
for token in doc:
    # Print the token and its part-of-speech tag
    print(token.text, "-->", token.pos_)

He --> PRON
went --> VERB
to --> PART
play --> VERB
basketball --> NOUN


In [108]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [44]:
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS as stopwords 
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.metrics import accuracy_score 
from sklearn.base import TransformerMixin 
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC

import string
punctuations = string.punctuation

from spacy.lang.en import English
parser = English()

#Custom transformer using spaCy 
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        return [clean_text(text) for text in X]
    def fit(self, X, y=None, **fit_params):
        return self
    def get_params(self, deep=True):
        return {}

# Basic utility function to clean the text 
def clean_text(text):     
    return text.strip().lower()



In [45]:
#Create spacy tokenizer that parses a sentence and generates tokens
#these can also be replaced by word vectors 
def spacy_tokenizer(sentence):
    tokens = parser(sentence)
    tokens = [tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_ for tok in tokens]
    tokens = [tok for tok in tokens if (tok not in stopwords and tok not in punctuations)]     
    return tokens
    

#create vectorizer object to generate feature vectors, we will use custom spacy’s tokenizer
vectorizer = CountVectorizer(tokenizer = spacy_tokenizer, ngram_range=(1,1))
classifier = LinearSVC()

In [55]:
list(zip(X_train,y_train))

[('haha sure send   g use tapatalk wow wonder kind potential state hold   send   g use tapatalk lucid dream year hear carlos castaneda incentive time endless adventure appeal find  lately opposite problem dream activity commonplace parallel waking life coast night second glance  lucky wish work young age remember semi lucid manifest different dream situation fully lucid kid struggle increase  thank point forum great stuff  agree good regard work fiction order guard mind good nugget information  mean hope find like minded soul  send   g use tapatalk hello infp fascinate work carlos castaneda especially intrigue practice dream study lucid dreaming astral  huzzah yup exactly look thank share  ohmagod great like thing d yup watch avatar way describe fire way like look anger d feel like lately usually cheer remember need  honor huh maybe like japan haha start sure word infp view life quest quest lol right wrong like think simply extension human race wrong happen time worry know  guess impor

In [63]:
pipe.predict([x[0] for x in test])

array([0, 0, 1, ..., 0, 0, 1])

In [62]:
# Create the  pipeline to clean, tokenize, vectorize, and classify 
pipe = Pipeline([("cleaner", predictors()),
                 ('vectorizer', vectorizer),
                 ('classifier', classifier)])

# Load sample data
train = list(zip(X_train,y_train))
test =  list(zip(X_test,y_test))

# Create model and measure accuracy
pipe.fit([x[0] for x in train], [x[1] for x in train]) 
pred_data = pipe.predict([x[0] for x in test]) 
# for (sample, pred) in zip(test, pred_data):
#     print(sample, pred)
print("Accuracy:", accuracy_score([x[1] for x in test], pred_data))

Accuracy: 0.9533141210374639


# Train-Test Split

In [46]:
df.columns

Index(['type', 'E/I', 'N/S', 'F/T', 'P/J', 'post', 'post_no_links',
       'spacy_post', 'clean_post'],
      dtype='object')

In [56]:
# Split the data into X and y data sets
X = df.clean_post
y = df['type']

In [50]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=34)

# CountVectorizer

In [155]:
# The first document-term matrix has default Count Vectorizer values - counts of unigrams
cv1 = CountVectorizer(max_features=5000, ngram_range=(1,2), binary=True, stop_words='english')

X_train_cv1 = cv1.fit_transform(X_train)
X_test_cv1  = cv1.transform(X_test)

pd.DataFrame(X_train_cv1.toarray(), columns=cv1.get_feature_names())

  and should_run_async(code)


Unnamed: 0,abandon,ability,able,absolute,absolutely,absolutely love,absorb,abstract,absurd,abuse,...,young,young age,youre,youth,youtube,youtube video,yup,zero,zombie,zone
0,0,1,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6500,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
6501,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6502,0,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
6503,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [175]:
!pip install wordcloud

  and should_run_async(code)


Collecting wordcloud
  Downloading wordcloud-1.8.0-cp38-cp38-macosx_10_6_x86_64.whl (162 kB)
[K     |████████████████████████████████| 162 kB 2.8 MB/s eta 0:00:01
Installing collected packages: wordcloud
Successfully installed wordcloud-1.8.0


### Try using TF-IDF instead of Count Vectorizer

In [177]:
more_stop_words = ['isfj','esfj','istj','isfp','estj','esfp','enfp','istp','infp','estp','intp','entp','enfj','intj','entj','infj', 'fe', 'fi', 'ni', 'ne', 'ti', 'se', 'te', 'si']

  and should_run_async(code)


In [178]:
# Create TF-IDF versions of the Count Vectorizers created earlier in the exercise
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf1 = TfidfVectorizer(max_features=5000, stop_words=more_stop_words)
X_train_tfidf1 = tfidf1.fit_transform(X_train)
X_test_tfidf1  = tfidf1.transform(X_test)

  and should_run_async(code)


In [158]:
len(pd.DataFrame(X_train_tfidf1.toarray(), columns=tfidf1.get_feature_names()).columns)

  and should_run_async(code)


5000

# Topic Modeling

### Latent Semantic Analysis (LSA) and nonnegative matrix factorization(NMF)

In [167]:
# Acronynms: Latent Semantic Analysis (LSA) is just another name for 
#  Signular Value Decomposition (SVD) applied to Natural Language Processing (NLP)
from sklearn.decomposition import LatentDirichletAllocation
TopicModel = NMF(40)
doc_topic = TopicModel.fit_transform(pd.DataFrame(X_train_tfidf1.toarray(), columns=tfidf1.get_feature_names()))

  and should_run_async(code)


In [168]:
def display_topics(model, feature_names, no_top_words, topic_names=None):
    """
    Takes in model and feature names and outputs 
    a list of string of the top words from each topic.
    """
    topics = []
    for ix, topic in enumerate(model.components_):
        topics.append(str(", ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]])))
    return topics

  and should_run_async(code)


In [169]:
topics = display_topics(TopicModel, tfidf1.get_feature_names(), 15)

  and should_run_async(code)


In [170]:
topic_word = pd.DataFrame(TopicModel.components_.round(3),
             index =  topics,
             columns = tfidf1.get_feature_names())
topic_word.head(2)

  and should_run_async(code)


Unnamed: 0,abandon,ability,able,abnormal,abortion,abroad,absence,absent,absolute,absolutely,...,yrs,yummy,yup,zealand,zelda,zen,zero,zodiac,zombie,zone
"thing, time, lot, way, happen, usually, find, try, tend, come, work, different, hard, use, idea",0.0,0.005,0.062,0.001,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025
"fe, fi, ni, ne, ti, se, te, si, dom, user, inferior, function, dominant, type, loop",0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,...,0.0,0.0,0.018,0.0,0.0,0.0,0.011,0.0,0.0,0.0


The Vt matrix shows us the documents we started with, and how each document is made up of the 2 resulting topics. In this case:
- The first four documents seem to be about thinking
- The last three documents seem to be about feeling

In [171]:
X_test_topic_array = TopicModel.transform(pd.DataFrame(X_test_tfidf1.toarray(), columns=tfidf1.get_feature_names()))

  and should_run_async(code)


## Organize Topic DataFrames

In [172]:
X_train_topics = pd.DataFrame(doc_topic.round(5),
             index = X_train.index,
             columns = topics)
X_test_topics = pd.DataFrame(X_test_topic_array.round(5),
             index = X_test.index,
             columns = topics)
X_train_topics

  and should_run_async(code)


Unnamed: 0,"thing, time, lot, way, happen, usually, find, try, tend, come, work, different, hard, use, idea","fe, fi, ni, ne, ti, se, te, si, dom, user, inferior, function, dominant, type, loop","like, sound, look, guy, kind, stuff, maybe, girl, cool, nice, lot, act, picture, real, weird","tapatalk, send, iphone, use, gt, sm, nexus, ipad, samsung, ban, ipod, pretty, talk, know, lounge","welcome, forum, perc, enjoy, hope, hey, fellow, site, stay, greeting, fun, happy, cafe, join, nice","day, sleep, time, today, night, hour, eat, dream, week, year, month, walk, look, morning, wake","point, question, mean, like, simply, yes, use, argument, fact, consider, logic, understand, case, answer, problem","sx, sp, enneagram, tritype, instinct, wing, instinctual, fix, type, stacking, dom, core, variant, description, typing","thank, help, response, appreciate, reply, guy, advice, sorry, hey, yes, question, input, helpful, try, answer","feel, like, time, feeling, wish, way, day, hurt, emotion, sad, happy, tired, hard, lonely, lately",...,"school, high, class, college, year, math, teacher, grade, english, study, student, science, major, physics, language","woman, man, sex, female, male, girl, gender, attractive, good, look, feminine, attract, yes, guy, gay","mom, sister, dad, brother, mother, husband, family, parent, child, father, old, kid, young, year, test","job, work, year, good, time, money, career, pay, need, business, know, degree, new, day, company","want, try, tell, help, ask, need, person, good, right, problem, care, question, talk, answer, life","god, believe, religion, atheist, religious, belief, bible, christian, human, faith, exist, science, world, church, evidence","life, self, experience, world, find, dream, people, live, feeling, deep, know, value, mind, real, reality","hi, personality, bit, infps, lot, new, ok, yes, meet, nice, perc, thanks, interested, wonder, happy","pretty, definitely, yeah, actually, lot, sure, probably, kind, bit, guess, good, tend, sort, usually, little","youtube, song, video, crazy, good, lyric, version, link, machine, watch, leave, official, time, happy, pay"
1632,0.00795,0.01511,0.01049,0.00000,0.00000,0.00251,0.00000,0.00000,0.01683,0.01716,...,0.00005,0.00000,0.00407,0.00000,0.00901,0.02744,0.04209,0.00000,0.00238,0.00000
1454,0.01025,0.03114,0.01184,0.00416,0.00000,0.00000,0.02378,0.00143,0.00000,0.00000,...,0.02547,0.02930,0.00000,0.03662,0.00828,0.01841,0.02573,0.00000,0.00000,0.00000
5949,0.01961,0.00000,0.00615,0.00000,0.01208,0.02023,0.00000,0.00107,0.00000,0.03209,...,0.00174,0.01436,0.02206,0.03462,0.00000,0.00000,0.01668,0.00319,0.04550,0.00825
1077,0.01433,0.00000,0.01367,0.00056,0.00530,0.00000,0.01700,0.00000,0.00573,0.00368,...,0.01022,0.00000,0.00244,0.00406,0.01280,0.04026,0.00207,0.00797,0.01486,0.00069
5096,0.02036,0.00000,0.01193,0.00000,0.00255,0.00000,0.00000,0.00324,0.00773,0.05171,...,0.00446,0.01234,0.00444,0.02282,0.01983,0.00476,0.01855,0.00197,0.02553,0.00007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5668,0.00000,0.00000,0.01262,0.00000,0.00985,0.00000,0.00000,0.00000,0.01930,0.00110,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.30879
324,0.01884,0.01473,0.01923,0.00000,0.02091,0.00745,0.00740,0.00000,0.00936,0.03589,...,0.00443,0.00930,0.01380,0.01332,0.02355,0.00000,0.01667,0.00146,0.00756,0.00000
3157,0.01211,0.01665,0.02794,0.00000,0.00000,0.00430,0.00251,0.00139,0.00714,0.02296,...,0.00291,0.00416,0.03750,0.00000,0.01358,0.00000,0.01376,0.00454,0.00401,0.00169
5994,0.02563,0.01795,0.01522,0.00804,0.05370,0.01629,0.02170,0.00000,0.02201,0.00003,...,0.01208,0.00000,0.00118,0.00000,0.02055,0.00269,0.00938,0.00661,0.01184,0.00000


In [173]:
X_test_topics

  and should_run_async(code)


Unnamed: 0,"thing, time, lot, way, happen, usually, find, try, tend, come, work, different, hard, use, idea","fe, fi, ni, ne, ti, se, te, si, dom, user, inferior, function, dominant, type, loop","like, sound, look, guy, kind, stuff, maybe, girl, cool, nice, lot, act, picture, real, weird","tapatalk, send, iphone, use, gt, sm, nexus, ipad, samsung, ban, ipod, pretty, talk, know, lounge","welcome, forum, perc, enjoy, hope, hey, fellow, site, stay, greeting, fun, happy, cafe, join, nice","day, sleep, time, today, night, hour, eat, dream, week, year, month, walk, look, morning, wake","point, question, mean, like, simply, yes, use, argument, fact, consider, logic, understand, case, answer, problem","sx, sp, enneagram, tritype, instinct, wing, instinctual, fix, type, stacking, dom, core, variant, description, typing","thank, help, response, appreciate, reply, guy, advice, sorry, hey, yes, question, input, helpful, try, answer","feel, like, time, feeling, wish, way, day, hurt, emotion, sad, happy, tired, hard, lonely, lately",...,"school, high, class, college, year, math, teacher, grade, english, study, student, science, major, physics, language","woman, man, sex, female, male, girl, gender, attractive, good, look, feminine, attract, yes, guy, gay","mom, sister, dad, brother, mother, husband, family, parent, child, father, old, kid, young, year, test","job, work, year, good, time, money, career, pay, need, business, know, degree, new, day, company","want, try, tell, help, ask, need, person, good, right, problem, care, question, talk, answer, life","god, believe, religion, atheist, religious, belief, bible, christian, human, faith, exist, science, world, church, evidence","life, self, experience, world, find, dream, people, live, feeling, deep, know, value, mind, real, reality","hi, personality, bit, infps, lot, new, ok, yes, meet, nice, perc, thanks, interested, wonder, happy","pretty, definitely, yeah, actually, lot, sure, probably, kind, bit, guess, good, tend, sort, usually, little","youtube, song, video, crazy, good, lyric, version, link, machine, watch, leave, official, time, happy, pay"
7048,0.02964,0.00000,0.02280,0.00000,0.00383,0.02088,0.00390,0.0000,0.00511,0.03171,...,0.00000,0.01711,0.00755,0.00000,0.00326,0.00000,0.00575,0.00156,0.00664,0.00000
4211,0.01051,0.00653,0.01693,0.00338,0.01124,0.01248,0.00540,0.0000,0.00343,0.00552,...,0.00693,0.06790,0.02904,0.01863,0.01342,0.00497,0.02189,0.00385,0.00577,0.00000
6663,0.03238,0.00000,0.01346,0.00013,0.00295,0.00270,0.01111,0.0000,0.00121,0.00275,...,0.00404,0.00362,0.00000,0.02916,0.02097,0.02077,0.08384,0.00000,0.00105,0.00000
3835,0.00413,0.00000,0.01762,0.00000,0.00184,0.00000,0.02594,0.0000,0.03696,0.00300,...,0.00139,0.01009,0.00000,0.01285,0.01884,0.00000,0.01719,0.00848,0.00400,0.00282
8608,0.04372,0.00411,0.01123,0.00000,0.00000,0.01267,0.00844,0.0000,0.02201,0.01094,...,0.00075,0.00000,0.01744,0.02354,0.01980,0.00000,0.00068,0.04265,0.01302,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4630,0.01561,0.01542,0.01454,0.00217,0.00000,0.00433,0.00000,0.0000,0.00291,0.00781,...,0.00000,0.01038,0.00000,0.00000,0.00000,0.00000,0.00178,0.00000,0.01164,0.00000
4907,0.01603,0.00000,0.01323,0.00000,0.01667,0.00018,0.04013,0.0000,0.00292,0.00314,...,0.00000,0.00000,0.00697,0.00000,0.02424,0.00125,0.02673,0.00096,0.00619,0.00302
2164,0.02154,0.00000,0.00147,0.00109,0.27620,0.00000,0.00000,0.0000,0.00010,0.01231,...,0.01039,0.00000,0.00000,0.00000,0.00156,0.00889,0.00000,0.00000,0.02087,0.00000
7526,0.00260,0.00215,0.01355,0.00285,0.00255,0.05514,0.00000,0.0045,0.00423,0.00371,...,0.05441,0.00754,0.03081,0.00000,0.00676,0.00046,0.00014,0.00000,0.00000,0.00000


# Export as csv's

In [174]:
X_train_topics.to_csv('X_train_topics.csv')
X_test_topics.to_csv('X_test_topics.csv')
y_train.to_csv('y_train.csv')
y_test.to_csv('y_test.csv')

  and should_run_async(code)


# Appendix

### This was LDA 

In [133]:
# X_train_topics = pd.DataFrame(doc_topic.round(5),
#              index = X_train.index,
#              columns = topics)
# X_test_topics = pd.DataFrame(X_test_topic_array.round(5),
#              index = X_test.index,
#              columns = topics)
# X_train_topics

Unnamed: 0,"album, way, vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.1","gt, en, mastermind, minus, spite, cheating, versus, cinema, tablet, mystical, eating, galaxy, riddle, liking, steak","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.2","china, vulnerability, sane, cliff, pushy, linger, cherry, moron, tis, unconsciously, ipad, steak, vagina, rap, penis","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.3","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.4","iam, sane, al, playlist, marathon, supervisor, anatomy, waiting, poverty, cos, coz, virginity, region, cartoon, parrot","tapatalk, send, iphone, ipad, nexus, moron, linger, sm, bpd, ban, hsp, lounge, samsung, ptsd, processing","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.5","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.6","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.7","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.8","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.9","wat, cherry, moron, ppl, definately, detect, coz, lets, doll, clothing, girls, instagram, tablet, educational, pump","like, think, people, know, thing, feel, time, good, type, want, love, friend, way, infp, infj","tmlt, extp, extj, enfx, esxp, ixfj, xstj, overanalyze, enxp, xsfj, ixtj, boob, xstp, inxp, inxj","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.10","vulnerability, china, sane, unconsciously, pushy, moron, linger, cherry, tis, cliff, ipad, wat, nexus, twitch, mexican.11"
1632,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.94020,0.00315,0.00315,0.00315
1454,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.94517,0.00289,0.00289,0.00289
5949,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.00289,0.94506,0.00289,0.00289,0.00289
1077,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.00314,0.94042,0.00314,0.00314,0.00314
5096,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.93783,0.00327,0.00327,0.00327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5668,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.00590,0.88782,0.00590,0.00590,0.00590
324,0.00315,0.00315,0.00315,0.00315,0.00315,0.01100,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.00315,0.93230,0.00315,0.00315,0.00315
3157,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.00308,0.94153,0.00308,0.00308,0.00308
5994,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.00291,0.94464,0.00291,0.00291,0.00291


In [143]:
import pyLDAvis
pyLDAvis(X_train_topics)

  and should_run_async(code)


TypeError: 'module' object is not callable