In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction import text 

from __future__ import print_function
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()



In [2]:
data = pd.read_csv('../../yelp-data/new_data/final_data/manip_data.csv')

In [3]:
review_text_1 = data[(data.rating1 == 1)]
review_text_3 = data[(data.rating3 == 1)]
review_text_5 = data[(data.rating5 == 1)]

In [4]:
myList = ["ve", "vegas", "yelp", "montreal", "scottsdale", "don", "didn", "did", "said", "wasn", 
          "just", "got", "told", "las", "los", "know", "http","left", "le", "la"]

# Fit and visualize LDA models for all reviews (8 topics)

In [5]:
review_text = data['text'].fillna('')

In [11]:
tf_vectorizer = CountVectorizer(strip_accents = 'unicode',
                                stop_words = text.ENGLISH_STOP_WORDS.union(myList),
                                lowercase = True)

dtm_tf = tf_vectorizer.fit_transform(review_text)
print(dtm_tf.shape)

(156695, 100095)


In [12]:
tfidf_vectorizer = TfidfVectorizer(**tf_vectorizer.get_params())
dtm_tfidf = tfidf_vectorizer.fit_transform(review_text)
print(dtm_tfidf.shape)

(156695, 100095)


In [13]:
# for TF DTM
lda_tf = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tf.fit(dtm_tf)
# for TFIDF DTM
lda_tfidf = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tfidf.fit(dtm_tfidf)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=8, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [14]:
viz_1 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer)
pyLDAvis.save_html(viz_1, 'steak-lda-viz/8-topics/ldavis-1.html')
pyLDAvis.display(viz_1)

In [15]:
viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, tfidf_vectorizer)
pyLDAvis.save_html(viz_2, 'steak-lda-viz/8-topics/ldavis-2.html')
pyLDAvis.display(viz_2)

#### Using different mds functions

In [16]:
viz_3 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='pcoa')
pyLDAvis.save_html(viz_3, 'steak-lda-viz/8-topics/ldavis-3.html')
pyLDAvis.display(viz_3)

In [17]:
viz_4 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='mmds')
pyLDAvis.save_html(viz_4, 'steak-lda-viz/8-topics/ldavis-4.html')
pyLDAvis.display(viz_4)

In [18]:
viz_5 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='tsne')
pyLDAvis.save_html(viz_5, 'steak-lda-viz/8-topics/ldavis-5.html')
pyLDAvis.display(viz_5)

### 10 Topics?

In [19]:
# for TF DTM
lda_tf = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tf.fit(dtm_tf)
# for TFIDF DTM
lda_tfidf = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tfidf.fit(dtm_tfidf)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=10, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [20]:
viz_1 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer)
pyLDAvis.save_html(viz_1, 'steak-lda-viz/10-topics/ldavis-1.html')
pyLDAvis.display(viz_1)

In [21]:
viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, tfidf_vectorizer)
pyLDAvis.save_html(viz_2, 'steak-lda-viz/10-topics/ldavis-2.html')
pyLDAvis.display(viz_2)

#### Using different mds functions

In [22]:
viz_3 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='pcoa')
pyLDAvis.save_html(viz_3, 'steak-lda-viz/10-topics/ldavis-3.html')
pyLDAvis.display(viz_3)

In [23]:
viz_4 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='mmds')
pyLDAvis.save_html(viz_4, 'steak-lda-viz/10-topics/ldavis-4.html')
pyLDAvis.display(viz_4)

In [24]:
viz_5 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='tsne')
pyLDAvis.save_html(viz_5, 'steak-lda-viz/10-topics/ldavis-5.html')
pyLDAvis.display(viz_5)

### 5 Topics?

In [25]:
# for TF DTM
lda_tf = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tf.fit(dtm_tf)
# for TFIDF DTM
lda_tfidf = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tfidf.fit(dtm_tfidf)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=5, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [26]:
viz_1 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer)
pyLDAvis.save_html(viz_1, 'steak-lda-viz/5-topics/ldavis-1.html')
pyLDAvis.display(viz_1)

In [27]:
viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, tfidf_vectorizer)
pyLDAvis.save_html(viz_2, 'steak-lda-viz/5-topics/ldavis-2.html')
pyLDAvis.display(viz_2)

#### Using different mds functions

In [28]:
viz_3 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='pcoa')
pyLDAvis.save_html(viz_3, 'steak-lda-viz/5-topics/ldavis-3.html')
pyLDAvis.display(viz_3)

In [29]:
viz_4 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='mmds')
pyLDAvis.save_html(viz_4, 'steak-lda-viz/5-topics/ldavis-4.html')
pyLDAvis.display(viz_4)

In [30]:
viz_5 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='tsne')
pyLDAvis.save_html(viz_5, 'steak-lda-viz/5-topics/ldavis-5.html')
pyLDAvis.display(viz_5)

### How about bigrams? 5 Topics

In [6]:
tf_vectorizer = CountVectorizer(ngram_range=[2, 2],
                                strip_accents = 'unicode',
                                stop_words = text.ENGLISH_STOP_WORDS.union(myList),
                                lowercase = True)

dtm_tf = tf_vectorizer.fit_transform(review_text)
print(dtm_tf.shape)

(156695, 3415144)


In [7]:
tfidf_vectorizer = TfidfVectorizer(**tf_vectorizer.get_params())
dtm_tfidf = tfidf_vectorizer.fit_transform(review_text)
print(dtm_tfidf.shape)

(156695, 3415144)


In [8]:
# for TF DTM
lda_tf = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tf.fit(dtm_tf)
# for TFIDF DTM
lda_tfidf = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tfidf.fit(dtm_tfidf)



KeyboardInterrupt: 

In [None]:
viz_1 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer)
pyLDAvis.save_html(viz_1, 'steak-lda-viz/bigrams/ldavis-1.html')
pyLDAvis.display(viz_1)

In [None]:
viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, tfidf_vectorizer)
pyLDAvis.save_html(viz_2, 'steak-lda-viz/bigrams/ldavis-2.html')
pyLDAvis.display(viz_2)

#### Using different mds functions

In [None]:
viz_3 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='pcoa')
pyLDAvis.save_html(viz_3, 'steak-lda-viz/bigrams/ldavis-3.html')
pyLDAvis.display(viz_3)

In [None]:
viz_4 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='mmds')
pyLDAvis.save_html(viz_4, 'steak-lda-viz/bigrams/ldavis-4.html')
pyLDAvis.display(viz_4)

In [None]:
viz_5 = pyLDAvis.sklearn.prepare(lda_tf, dtm_tf, tf_vectorizer, mds='tsne')
pyLDAvis.save_html(viz_5, 'steak-lda-viz/bigrams/ldavis-5.html')
pyLDAvis.display(viz_5)

# Fit and visualize LDA models for reviews with a rating of 1 (8 topics)

In [5]:
review_text_1 = review_text_1['text'].fillna('')

In [6]:
tf_vectorizer1 = CountVectorizer(strip_accents = 'unicode',
                                stop_words = text.ENGLISH_STOP_WORDS.union(myList),
                                lowercase = True)

dtm_tf1 = tf_vectorizer1.fit_transform(review_text_1)
print(dtm_tf1.shape)

(13244, 30165)


In [7]:
tfidf_vectorizer1 = TfidfVectorizer(**tf_vectorizer1.get_params())
dtm_tfidf1 = tfidf_vectorizer1.fit_transform(review_text_1)
print(dtm_tfidf1.shape)

(13244, 30165)


In [8]:
# for TF DTM
lda_tf1 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tf1.fit(dtm_tf1)
# for TFIDF DTM
lda_tfidf1 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tfidf1.fit(dtm_tfidf1)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=8, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [9]:
rating1_viz_1 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1)
pyLDAvis.save_html(rating1_viz_1, 'steak-lda-viz/8-topics/rating1-ldavis-1.html')
pyLDAvis.display(rating1_viz_1)

In [10]:
rating1_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf1, dtm_tfidf1, tfidf_vectorizer1)
pyLDAvis.save_html(rating1_viz_2, 'steak-lda-viz/8-topics/rating1-ldavis-2.html')
pyLDAvis.display(rating1_viz_2)

#### Using different mds functions

In [11]:
rating1_viz_3 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='pcoa')
pyLDAvis.save_html(rating1_viz_3, 'steak-lda-viz/8-topics/rating1-ldavis-3.html')
pyLDAvis.display(rating1_viz_3)

In [12]:
rating1_viz_4 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='mmds')
pyLDAvis.save_html(rating1_viz_4, 'steak-lda-viz/8-topics/rating1-ldavis-4.html')
pyLDAvis.display(rating1_viz_4)

In [13]:
rating1_viz_5 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='tsne')
pyLDAvis.save_html(rating1_viz_5, 'steak-lda-viz/8-topics/rating1-ldavis-5.html')
pyLDAvis.display(rating1_viz_5)

# Fit and visualize LDA models for reviews with a rating of 3 (8 topics)

In [14]:
review_text_3 = review_text_3['text'].fillna('')

In [15]:
tf_vectorizer3 = CountVectorizer(strip_accents = 'unicode',
                                stop_words = text.ENGLISH_STOP_WORDS.union(myList),
                                lowercase = True)

dtm_tf3 = tf_vectorizer3.fit_transform(review_text_3)
print(dtm_tf3.shape)

(21870, 41881)


In [16]:
tfidf_vectorizer3 = TfidfVectorizer(**tf_vectorizer3.get_params())
dtm_tfidf3 = tfidf_vectorizer3.fit_transform(review_text_3)
print(dtm_tfidf3.shape)

(21870, 41881)


In [17]:
# for TF DTM
lda_tf3 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tf3.fit(dtm_tf3)
# for TFIDF DTM
lda_tfidf3 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tfidf3.fit(dtm_tfidf3)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=8, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [18]:
rating3_viz_1 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3)
pyLDAvis.save_html(rating3_viz_1, 'steak-lda-viz/8-topics/rating3-ldavis-1.html')
pyLDAvis.display(rating3_viz_1)

In [19]:
rating3_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf3, dtm_tfidf3, tfidf_vectorizer3)
pyLDAvis.save_html(rating3_viz_2, 'steak-lda-viz/8-topics/rating3-ldavis-2.html')
pyLDAvis.display(rating3_viz_2)

#### Using different mds functions

In [20]:
rating3_viz_3 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='pcoa')
pyLDAvis.save_html(rating3_viz_3, 'steak-lda-viz/8-topics/rating3-ldavis-3.html')
pyLDAvis.display(rating3_viz_3)

In [21]:
rating3_viz_4 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='mmds')
pyLDAvis.save_html(rating3_viz_4, 'steak-lda-viz/8-topics/rating3-ldavis-4.html')
pyLDAvis.display(rating3_viz_4)

In [22]:
rating3_viz_5 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='tsne')
pyLDAvis.save_html(rating3_viz_5, 'steak-lda-viz/8-topics/rating3-ldavis-5.html')
pyLDAvis.display(rating3_viz_5)

# Fit and visualize LDA models for reviews with a rating of 5 (8 topics)

In [23]:
review_text_5 = review_text_5['text'].fillna('')

In [24]:
tf_vectorizer5 = CountVectorizer(strip_accents = 'unicode',
                                stop_words = text.ENGLISH_STOP_WORDS.union(myList),
                                lowercase = True)

dtm_tf5 = tf_vectorizer5.fit_transform(review_text_5)
print(dtm_tf5.shape)

(60846, 59197)


In [25]:
tfidf_vectorizer5 = TfidfVectorizer(**tf_vectorizer5.get_params())
dtm_tfidf5 = tfidf_vectorizer5.fit_transform(review_text_5)
print(dtm_tfidf5.shape)

(60846, 59197)


In [26]:
# for TF DTM
lda_tf5 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tf5.fit(dtm_tf5)
# for TFIDF DTM
lda_tfidf5 = LatentDirichletAllocation(n_topics=8, random_state=0)
lda_tfidf5.fit(dtm_tfidf5)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=8, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [27]:
rating5_viz_1 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5)
pyLDAvis.save_html(rating5_viz_1, 'steak-lda-viz/8-topics/rating5-ldavis-1.html')
pyLDAvis.display(rating5_viz_1)

In [28]:
rating5_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf5, dtm_tfidf5, tfidf_vectorizer5)
pyLDAvis.save_html(rating5_viz_2, 'steak-lda-viz/8-topics/rating5-ldavis-2.html')
pyLDAvis.display(rating5_viz_2)

#### Using different mds functions

In [29]:
rating5_viz_3 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='pcoa')
pyLDAvis.save_html(rating5_viz_3, 'steak-lda-viz/8-topics/rating5-ldavis-3.html')
pyLDAvis.display(rating5_viz_3)

In [30]:
rating5_viz_4 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='mmds')
pyLDAvis.save_html(rating5_viz_4, 'steak-lda-viz/8-topics/rating5-ldavis-4.html')
pyLDAvis.display(rating5_viz_4)

In [31]:
rating5_viz_5 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='tsne')
pyLDAvis.save_html(rating5_viz_5, 'steak-lda-viz/8-topics/rating5-ldavis-5.html')
pyLDAvis.display(rating5_viz_5)

# How about 5 topics?

## Rating 1

In [32]:
# for TF DTM
lda_tf1 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tf1.fit(dtm_tf1)
# for TFIDF DTM
lda_tfidf1 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tfidf1.fit(dtm_tfidf1)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=5, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [33]:
rating1_viz_1 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1)
pyLDAvis.save_html(rating1_viz_1, 'steak-lda-viz/5-topics/rating1-ldavis-1.html')
pyLDAvis.display(rating1_viz_1)

In [34]:
rating1_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf1, dtm_tfidf1, tfidf_vectorizer1)
pyLDAvis.save_html(rating1_viz_2, 'steak-lda-viz/5-topics/rating1-ldavis-2.html')
pyLDAvis.display(rating1_viz_2)

#### Using different mds functions

In [35]:
rating1_viz_3 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='pcoa')
pyLDAvis.save_html(rating1_viz_3, 'steak-lda-viz/5-topics/rating1-ldavis-3.html')
pyLDAvis.display(rating1_viz_3)

In [36]:
rating1_viz_4 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='mmds')
pyLDAvis.save_html(rating1_viz_4, 'steak-lda-viz/5-topics/rating1-ldavis-4.html')
pyLDAvis.display(rating1_viz_4)

In [37]:
rating1_viz_5 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='tsne')
pyLDAvis.save_html(rating1_viz_5, 'steak-lda-viz/5-topics/rating1-ldavis-5.html')
pyLDAvis.display(rating1_viz_5)

## Rating 3

In [38]:
# for TF DTM
lda_tf3 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tf3.fit(dtm_tf3)
# for TFIDF DTM
lda_tfidf3 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tfidf3.fit(dtm_tfidf3)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=5, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [39]:
rating3_viz_1 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3)
pyLDAvis.save_html(rating3_viz_1, 'steak-lda-viz/5-topics/rating3-ldavis-1.html')
pyLDAvis.display(rating3_viz_1)

In [40]:
rating3_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf3, dtm_tfidf3, tfidf_vectorizer3)
pyLDAvis.save_html(rating3_viz_2, 'steak-lda-viz/5-topics/rating3-ldavis-2.html')
pyLDAvis.display(rating3_viz_2)

#### Using different mds functions

In [41]:
rating3_viz_3 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='pcoa')
pyLDAvis.save_html(rating3_viz_3, 'steak-lda-viz/5-topics/rating3-ldavis-3.html')
pyLDAvis.display(rating3_viz_3)

In [42]:
rating3_viz_4 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='mmds')
pyLDAvis.save_html(rating3_viz_4, 'steak-lda-viz/5-topics/rating3-ldavis-4.html')
pyLDAvis.display(rating3_viz_4)

In [43]:
rating3_viz_5 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='tsne')
pyLDAvis.save_html(rating3_viz_5, 'steak-lda-viz/5-topics/rating3-ldavis-5.html')
pyLDAvis.display(rating3_viz_5)

## Rating 5

In [44]:
# for TF DTM
lda_tf5 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tf5.fit(dtm_tf5)
# for TFIDF DTM
lda_tfidf5 = LatentDirichletAllocation(n_topics=5, random_state=0)
lda_tfidf5.fit(dtm_tfidf5)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=5, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [45]:
rating5_viz_1 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5)
pyLDAvis.save_html(rating5_viz_1, 'steak-lda-viz/5-topics/rating5-ldavis-1.html')
pyLDAvis.display(rating5_viz_1)

In [46]:
rating5_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf5, dtm_tfidf5, tfidf_vectorizer5)
pyLDAvis.save_html(rating5_viz_2, 'steak-lda-viz/5-topics/rating5-ldavis-2.html')
pyLDAvis.display(rating5_viz_2)

#### Using different mds functions

In [47]:
rating5_viz_3 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='pcoa')
pyLDAvis.save_html(rating5_viz_3, 'steak-lda-viz/5-topics/rating5-ldavis-3.html')
pyLDAvis.display(rating5_viz_3)

In [48]:
rating5_viz_4 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='mmds')
pyLDAvis.save_html(rating5_viz_4, 'steak-lda-viz/5-topics/rating5-ldavis-4.html')
pyLDAvis.display(rating5_viz_4)

In [49]:
rating5_viz_5 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='tsne')
pyLDAvis.save_html(rating5_viz_5, 'steak-lda-viz/5-topics/rating5-ldavis-5.html')
pyLDAvis.display(rating5_viz_5)

# How about 10 topics?

## Rating 1

In [50]:
# for TF DTM
lda_tf1 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tf1.fit(dtm_tf1)
# for TFIDF DTM
lda_tfidf1 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tfidf1.fit(dtm_tfidf1)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=10, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [51]:
rating1_viz_1 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1)
pyLDAvis.save_html(rating1_viz_1, 'steak-lda-viz/10-topics/rating1-ldavis-1.html')
pyLDAvis.display(rating1_viz_1)

In [52]:
rating1_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf1, dtm_tfidf1, tfidf_vectorizer1)
pyLDAvis.save_html(rating1_viz_2, 'steak-lda-viz/10-topics/rating1-ldavis-2.html')
pyLDAvis.display(rating1_viz_2)

#### Using different mds functions

In [53]:
rating1_viz_3 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='pcoa')
pyLDAvis.save_html(rating1_viz_3, 'steak-lda-viz/10-topics/rating1-ldavis-3.html')
pyLDAvis.display(rating1_viz_3)

In [54]:
rating1_viz_4 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='mmds')
pyLDAvis.save_html(rating1_viz_4, 'steak-lda-viz/10-topics/rating1-ldavis-4.html')
pyLDAvis.display(rating1_viz_4)

In [55]:
rating1_viz_5 = pyLDAvis.sklearn.prepare(lda_tf1, dtm_tf1, tf_vectorizer1, mds='tsne')
pyLDAvis.save_html(rating1_viz_5, 'steak-lda-viz/10-topics/rating1-ldavis-5.html')
pyLDAvis.display(rating1_viz_5)

## Rating 3

In [56]:
# for TF DTM
lda_tf3 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tf3.fit(dtm_tf3)
# for TFIDF DTM
lda_tfidf3 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tfidf3.fit(dtm_tfidf3)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=10, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [57]:
rating3_viz_1 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3)
pyLDAvis.save_html(rating3_viz_1, 'steak-lda-viz/10-topics/rating3-ldavis-1.html')
pyLDAvis.display(rating3_viz_1)

In [58]:
rating3_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf3, dtm_tfidf3, tfidf_vectorizer3)
pyLDAvis.save_html(rating3_viz_2, 'steak-lda-viz/10-topics/rating3-ldavis-2.html')
pyLDAvis.display(rating3_viz_2)

#### Using different mds functions

In [59]:
rating3_viz_3 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='pcoa')
pyLDAvis.save_html(rating3_viz_3, 'steak-lda-viz/10-topics/rating3-ldavis-3.html')
pyLDAvis.display(rating3_viz_3)

In [60]:
rating3_viz_4 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='mmds')
pyLDAvis.save_html(rating3_viz_4, 'steak-lda-viz/10-topics/rating3-ldavis-4.html')
pyLDAvis.display(rating3_viz_4)

In [61]:
rating3_viz_5 = pyLDAvis.sklearn.prepare(lda_tf3, dtm_tf3, tf_vectorizer3, mds='tsne')
pyLDAvis.save_html(rating3_viz_5, 'steak-lda-viz/10-topics/rating3-ldavis-5.html')
pyLDAvis.display(rating3_viz_5)

## Rating 5

In [62]:
# for TF DTM
lda_tf5 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tf5.fit(dtm_tf5)
# for TFIDF DTM
lda_tfidf5 = LatentDirichletAllocation(n_topics=10, random_state=0)
lda_tfidf5.fit(dtm_tfidf5)



LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7, learning_method=None,
             learning_offset=10.0, max_doc_update_iter=100, max_iter=10,
             mean_change_tol=0.001, n_jobs=1, n_topics=10, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=0)

In [63]:
rating5_viz_1 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5)
pyLDAvis.save_html(rating5_viz_1, 'steak-lda-viz/10-topics/rating5-ldavis-1.html')
pyLDAvis.display(rating5_viz_1)

In [64]:
rating5_viz_2 = pyLDAvis.sklearn.prepare(lda_tfidf5, dtm_tfidf5, tfidf_vectorizer5)
pyLDAvis.save_html(rating5_viz_2, 'steak-lda-viz/10-topics/rating5-ldavis-2.html')
pyLDAvis.display(rating5_viz_2)

#### Using different mds functions

In [65]:
rating5_viz_3 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='pcoa')
pyLDAvis.save_html(rating5_viz_3, 'steak-lda-viz/10-topics/rating5-ldavis-3.html')
pyLDAvis.display(rating5_viz_3)

In [66]:
rating5_viz_4 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='mmds')
pyLDAvis.save_html(rating5_viz_4, 'steak-lda-viz/10-topics/rating5-ldavis-4.html')
pyLDAvis.display(rating5_viz_4)

In [67]:
rating5_viz_5 = pyLDAvis.sklearn.prepare(lda_tf5, dtm_tf5, tf_vectorizer5, mds='tsne')
pyLDAvis.save_html(rating5_viz_5, 'steak-lda-viz/10-topics/rating5-ldavis-5.html')
pyLDAvis.display(rating5_viz_5)