In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gensim
import logging
import io
import os
import re
import itertools
import sys

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    
from src.lda import lda_funcs

from gensim import corpora
from gensim.models.coherencemodel import CoherenceModel

In [2]:
# set up logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
handler = logging.FileHandler('../models/training_output.log')

# The handler above is something I needed with respect to logging.
# Gensim performs various calculations while training the LDA model that I am using, but the only way to see them
# is in the logging outputs.
# Specifically, I need to capture the perplexity values during training to verify that perplexity is decreasing.
# This metric is needed to compare models and to do hyperparameter tuning. 


# The following blog post was helpful to me in figure out how to make the log handler I needed.
# https://fangpenlin.com/posts/2012/08/26/good-logging-practice-in-python/

In [3]:
df = pd.read_csv('../data/processed/reviews.csv', index_col=0)
df.head(2)

Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,...,Topic Mixtures Coded,Coded Topic,Coded Fit,Coded Subtopic,Coded Subtopic Fit,Topic Mixtures Valence,Valence Topic,Valence Fit,Valence Subtopic,Valence Subtopic Fit
0,0,10108,10108,21738,B000KV61FC,A1Y1YYH71TPYC6,thefinfan54,2,2,5,...,"{0: 0.1729274, 1: 0.2710636, 2: 0.0019230774, ...",4.0,0.370688,1.0,0.271064,"{0: 0.040954687, 1: 0.26253843, 2: 0.002500002...",3.0,0.684001,1.0,0.262538
1,1,10109,10109,21739,B000KV61FC,A1SLLKDKCZ5IPL,C. Guariglia,2,2,2,...,"{0: 0.0031253225, 1: 0.003125363, 2: 0.0031250...",4.0,0.971874,,,"{0: 0.0044658356, 1: 0.00446429, 2: 0.00446429...",3.0,0.825877,,


In [4]:
# get review counts for each product
review_counts = df['ProductId'].value_counts().sort_values()
review_counts.tail(12)

B002IEZJMA    487
B006MONQMC    491
B005ZBZLT4    506
B003GTR8IO    530
B005K4Q34S    541
B0013A0QXC    542
B000NMJWZO    542
B000KV61FC    556
B001EO5Q64    567
B0026RQTGE    630
Name: ProductId, dtype: int64

In [5]:
# get a product group to work with - the 10 products with the largest number of reviews
top_ten = review_counts.tail(10).index.values

## LDA Model Grid Search & Tuning

For each of the input text types (vanilla, coded, and valence coded), first run an initial grid search with 50 or 80 training passes and either 6, 10, or 12 topics.

NOTE: grid search can take hours to run fully - don't run the below cells unless you actually want to perform the searching and tuning. The csv results files can simply be loaded in where noted below.

In [None]:
# LOAD RESULTS
# load in the final results
vanilla_final_results = pd.read_csv('../data/processed/vanilla_final_results.csv')
vanilla_final_results.set_index('product', inplace=True)

coded_final_results = pd.read_csv('../data/processed/coded_final_results.csv')
coded_final_results.set_index('product', inplace=True)

valence_final_results = pd.read_csv('../data/processed/valence_final_results.csv')
valence_final_results.set_index('product', inplace=True)

### Vanilla Inputs Grid Search & Tuning

In [None]:
# CAUTION - GRID SEARCH CAN TAKE MANY HOURS TO RUN
# ONLY RUN ON FIRST PASS
# create a dataframe to house the results of the model tuning from an initial grid search

vanilla_gs_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                           'passes', 'per-word bounds', 
                                           'perplexity', 'topic diff', 
                                           'final perplexity', 'final topic diff', 
                                           'perplexity decreasing', 'coherence', 
                                           'top_n removed', 'n_above threshold'])

# for each of the top ten products, grid search over a combination of n_passes and n_topics 
# save the parameter combinations (and saved model) of the model with the highest coherence score
for product in top_ten[0:1]:
    output = lda_funcs.tune_lda(df=df, product=product, n_passes=[50, 80], 
                                n_topics=[6, 8, 10, 12], save_path='vanilla_outputs',
                                input_text='clean_vanilla', n_below=0, 
                                top_n=[2, 10], n_above=[0.5, 1.0])
    vanilla_gs_results = lda_funcs.save_best(output, vanilla_gs_results, 
                                             save_path='vanilla_outputs')

In [None]:
vanilla_gs_results

In [None]:
# RUN ONLY ON FIRST PASS
# save off the results with the best model (highest coherence) for each product
# examine the results
vanilla_gs_results.to_csv('../data/interim/vanilla_gs_results.csv')
vanilla_gs_results[['product','coherence', 'num_topics', 
                    'passes', 'top_n removed', 'n_above threshold']]

In [None]:
# LOAD RESULTS
vanilla_gs_results = pd.read_csv('../data/interim/vanilla_gs_results.csv')
vanilla_gs_results[['product','coherence', 'num_topics', 
                    'passes', 'top_n removed', 'n_above threshold']]

Next, after reviewing the results of the initial grid search pass, and manually tune the models for each of the products to try to reach a threshold of 0.5 for the final model coherence. 

In [None]:
# run this cell for the products that needs further tuning
# increase or decrease the number of passes or topics depending on the best model found from previous results
output = lda_funcs.tune_lda(df=df, product='B0026RQTGE', n_passes=[80], 
                            n_topics=[5, 6, 7], save_path='vanilla_outputs',
                            input_text='clean_vanilla', n_below=0, top_n=[2], 
                            n_above=[1.0])

In [None]:
# view the outputs
output

In [None]:
# if the result is an improvement, run this cell to save it
# save off the updated results dataframe
vanilla_gs_results = lda_funcs.save_best(output, vanilla_gs_results, 
                                         save_path='vanilla_outputs')
vanilla_gs_results.to_csv('../data/interim/vanilla_gs_results.csv')

In [None]:
# finally, save off the best results into a final output dataframe
# create a df to collect the best models from all grid search tuning efforts
# save it off to a csv
vanilla_final_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                              'passes', 'per-word bounds', 'perplexity', 
                                              'topic diff', 'final perplexity', 
                                              'final topic diff', 'perplexity decreasing', 
                                              'coherence', 'top_n removed', 'n_above threshold'])

for product in top_ten:
    output = vanilla_gs_results[vanilla_gs_results['product']==product]
    vanilla_final_results = lda_funcs.save_best(output, vanilla_final_results, 
                                                save_path='vanilla_outputs')

vanilla_final_results.to_csv('../data/processed/vanilla_final_results.csv')

In [None]:
# LOAD RESULTS
# load in the final results
vanilla_final_results = pd.read_csv('../data/processed/vanilla_final_results.csv')
vanilla_final_results.set_index('product', inplace=True)
vanilla_final_results[['coherence', 'num_topics', 'passes', 'top_n removed', 'n_above threshold']]

The results above show that success with the vanilla review (i.e., no codewords, just clean text) was pretty poor - I was not able to achieve the 0.5 goal threshold with any of the products. 

### Coded Inputs Grid Search & Tuning

In [None]:
# CAUTION - GRID SEARCH CAN TAKE MANY HOURS TO RUN
# ONLY RUN ON FIRST PASS
# create a dataframe to house the results of the model tuning from an initial grid search

coded_gs_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                         'passes', 'per-word bounds', 'perplexity', 
                                         'topic diff', 'final perplexity', 
                                         'final topic diff', 'perplexity decreasing',
                                         'coherence', 'top_n removed', 'n_above threshold'])

# for each of the top ten products, grid search over a combination of n_passes and n_topics 
# save the parameter combinations (and saved model) of the model with the highest coherence score
for product in top_ten:
    output = lda_funcs.tune_lda(df=df, product=product, n_passes=[50, 80], 
                                n_topics=[6, 8, 10, 12], save_path='coded_outputs',
                                input_text='clean_coded', n_below=0, 
                                top_n=[2, 10], n_above=[0.5, 1.0])
    coded_gs_results = lda_funcs.save_best(output, coded_gs_results, save_path='coded_outputs')

In [None]:
coded_gs_results

In [None]:
# RUN ONLY ON FIRST PASS
# save off the results with the best model (highest coherence) for each product
# examine the results
coded_gs_results.to_csv('../data/interim/coded_gs_results.csv')
coded_gs_results[['product','coherence', 'num_topics', 
                  'passes', 'top_n removed', 'n_above threshold']]

In [None]:
# LOAD RESULTS
coded_gs_results = pd.read_csv('../data/interim/coded_gs_results.csv')
coded_gs_results[['product','coherence', 'num_topics', 
                  'passes', 'top_n removed', 'n_above threshold']]

Next, after reviewing the results of the initial grid search pass, and manually tune the models for each of the products to try to reach a threshold of 0.5 for the final model coherence. 

In [None]:
# run this cell for the product that needs further tuning
output = lda_funcs.tune_lda(df=df, product='B000KV61FC', n_passes=[50], 
                            n_topics=[9, 10, 11], save_path='coded_outputs',
                            input_text='clean_coded', n_below=0, 
                            top_n=[2], n_above=[0.5])

In [None]:
# view the outputs
output[['product','coherence', 'num_topics', 
        'passes', 'top_n removed', 'n_above threshold']]

In [None]:
# if the result is an improvement, run this cell to save it
# save off the updated results dataframe
coded_gs_results = lda_funcs.save_best(output, coded_gs_results, 
                                       save_path='coded_outputs')
coded_gs_results.to_csv('../data/interim/coded_gs_results.csv')

In [None]:
# finally, save off the best results into a final output dataframe
# create a df to collect the best models from all grid search tuning efforts
# save it off to a csv
coded_final_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                            'passes', 'per-word bounds', 
                                            'perplexity', 'topic diff', 
                                            'final perplexity', 'final topic diff', 
                                            'perplexity decreasing', 'coherence', 
                                            'top_n removed', 'n_above threshold'])

for product in top_ten:
    output = coded_gs_results[coded_gs_results['product']==product]
    coded_final_results = lda_funcs.save_best(output, coded_final_results, 
                                              save_path='coded_outputs')

coded_final_results.to_csv('../data/processed/coded_final_results.csv')

In [None]:
# LOAD RESULTS
# load in the final results
coded_final_results = pd.read_csv('../data/processed/coded_final_results.csv')
coded_final_results.set_index('product', inplace=True)
coded_final_results[['coherence', 'num_topics', 'passes', 
                     'top_n removed', 'n_above threshold']]

Using the coded review (i.e., with "GOODREVIEW" and "BADREVIEW" inserted following each positive or negative word), the coherence results are somewhat better than the uncoded reviews.

### Valence Coded Inputs Grid Search & Tuning

In [None]:
# CAUTION - GRID SEARCH CAN TAKE MANY HOURS TO RUN
# ONLY RUN ON FIRST PASS
# create a dataframe to house the results of the model tuning from an initial grid search

valence_gs_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                           'passes', 'per-word bounds', 
                                           'perplexity', 'topic diff', 
                                           'final perplexity', 'final topic diff', 
                                           'perplexity decreasing', 'coherence', 
                                           'top_n removed', 'n_above threshold'])

# for each of the top ten products, grid search over a combination of n_passes and n_topics 
# save the parameter combinations (and saved model) of the model with the highest coherence score
for product in top_ten:
    output = lda_funcs.tune_lda(df=df, product=product, n_passes=[50, 80], 
                                n_topics=[6, 8, 10, 12], save_path='valence_outputs', 
                                input_text='clean_valence', n_below=0, 
                                top_n=[2,10], n_above=[0.5, 1.0])
    valence_gs_results = lda_funcs.save_best(output, valence_gs_results, 
                                             save_path='valence_outputs')

In [None]:
valence_gs_results

In [None]:
# RUN ONLY ON FIRST PASS
# save off the results with the best model (highest coherence) for each product
# examine the results
valence_gs_results.to_csv('../data/interim/valence_gs_results.csv')
valence_gs_results[['product','coherence', 'num_topics', 
                    'passes', 'top_n removed', 'n_above threshold']]

In [6]:
# LOAD RESULTS
valence_gs_results = pd.read_csv('../data/interim/valence_gs_results.csv')
valence_gs_results[['product','coherence', 'num_topics', 
                    'passes', 'top_n removed', 'n_above threshold']]

Unnamed: 0,product,coherence,num_topics,passes,top_n removed,n_above threshold
0,B002IEZJMA,0.479089,8,80,2,0.5
1,B006MONQMC,0.492218,10,50,2,1.0
2,B005ZBZLT4,0.451033,10,50,2,1.0
3,B003GTR8IO,0.488773,12,50,2,1.0
4,B005K4Q34S,0.494551,12,80,2,0.5
5,B0013A0QXC,0.453215,10,80,10,0.5
6,B000NMJWZO,0.49467,6,80,2,0.5
7,B000KV61FC,0.490179,12,80,10,0.5
8,B001EO5Q64,0.518175,6,80,2,1.0
9,B0026RQTGE,0.463676,6,50,2,1.0


Next, after reviewing the results of the initial grid search pass, and manually tune the models for each of the products to try to reach a threshold of 0.5 for the final model coherence. 

In [32]:
# run this cell for the product that needs further tuning
output = lda_funcs.tune_lda(df=df, product='B0026RQTGE', n_passes=[50], 
                            n_topics=[5, 7], save_path='valence_outputs',
                            input_text='clean_valence', n_below=0,
                           top_n=[2], n_above=[1.0])

2018-10-26 14:48:54,348 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2018-10-26 14:48:54,436 : INFO : built Dictionary(2511 unique tokens: ['summer', 'tzu', 'dollar', 'brick', 'town']...) from 630 documents (total 21018 corpus positions)
2018-10-26 14:48:54,443 : INFO : discarding 2 tokens: [('dog', 463), ('love', 343)]...
2018-10-26 14:48:54,453 : INFO : resulting dictionary: Dictionary(2509 unique tokens: ['summer', 'tzu', 'dollar', 'brick', 'town']...)
2018-10-26 14:48:54,460 : INFO : discarding 0 tokens: []...
2018-10-26 14:48:54,463 : INFO : keeping 2509 tokens which were in no less than 0 and no more than 630 (=100.0%) documents
2018-10-26 14:48:54,478 : INFO : resulting dictionary: Dictionary(2509 unique tokens: ['summer', 'tzu', 'dollar', 'brick', 'town']...)
2018-10-26 14:48:54,538 : INFO : storing corpus in Matrix Market format to /tmp/corpus.mm
2018-10-26 14:48:54,541 : INFO : saving sparse matrix to /tmp/corpus.mm
2018-10-26 14:48:54,542 : INFO : PROGRESS:

training LDA with 5 topics over 50 passes
removing top 2 words with 1.0 review freq threshold


2018-10-26 14:48:55,115 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:48:55,123 : INFO : topic #0 (0.200): 0.040*"GOODREVIEW" + 0.034*"BADREVIEW" + 0.033*"VGOODREVIEW" + 0.027*"greenies" + 0.017*"give" + 0.016*"treat" + 0.011*"eat" + 0.011*"year" + 0.010*"like" + 0.009*"one"
2018-10-26 14:48:55,127 : INFO : topic #1 (0.200): 0.056*"GOODREVIEW" + 0.036*"BADREVIEW" + 0.028*"VGOODREVIEW" + 0.023*"greenies" + 0.016*"treat" + 0.015*"get" + 0.013*"one" + 0.012*"teeth" + 0.010*"know" + 0.010*"good"
2018-10-26 14:48:55,130 : INFO : topic #2 (0.200): 0.081*"GOODREVIEW" + 0.049*"VGOODREVIEW" + 0.028*"BADREVIEW" + 0.016*"greenies" + 0.016*"teeth" + 0.014*"get" + 0.013*"treat" + 0.011*"good" + 0.011*"one" + 0.010*"price"
2018-10-26 14:48:55,135 : INFO : topic #3 (0.200): 0.040*"BADREVIEW" + 0.035*"GOODREVIEW" + 0.034*"VGOODREVIEW" + 0.028*"greenies" + 0.020*"teeth" + 0.014*"treat" + 0.013*"breath" + 0.011*"one" + 0.011*"give" + 0.010*"get"
2018-10-26 14:48

2018-10-26 14:48:57,716 : INFO : topic diff=0.427080, rho=0.447214
2018-10-26 14:48:57,726 : INFO : PROGRESS: pass 2, at document #210/630
2018-10-26 14:48:57,947 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:48:57,954 : INFO : topic #0 (0.200): 0.034*"BADREVIEW" + 0.028*"GOODREVIEW" + 0.026*"greenies" + 0.018*"give" + 0.016*"treat" + 0.015*"VGOODREVIEW" + 0.013*"year" + 0.012*"eat" + 0.012*"VBADREVIEW" + 0.010*"old"
2018-10-26 14:48:57,957 : INFO : topic #1 (0.200): 0.037*"GOODREVIEW" + 0.035*"BADREVIEW" + 0.020*"VGOODREVIEW" + 0.018*"greenies" + 0.014*"treat" + 0.013*"get" + 0.009*"one" + 0.008*"greenie" + 0.007*"purchase" + 0.007*"year"
2018-10-26 14:48:57,961 : INFO : topic #2 (0.200): 0.078*"GOODREVIEW" + 0.053*"VGOODREVIEW" + 0.030*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.016*"get" + 0.014*"treat" + 0.014*"one" + 0.012*"price" + 0.011*"great"
2018-10-26 14:48:57,966 : INFO : topic #3 (0.200): 0.043*"BADREVIEW" + 0.038*"GOODREVIE

2018-10-26 14:48:59,832 : INFO : topic #4 (0.200): 0.037*"VGOODREVIEW" + 0.024*"greenies" + 0.015*"size" + 0.014*"order" + 0.012*"great" + 0.011*"price" + 0.010*"amazon" + 0.010*"BADREVIEW" + 0.010*"buy" + 0.010*"time"
2018-10-26 14:48:59,835 : INFO : topic diff=0.240226, rho=0.377964
2018-10-26 14:48:59,846 : INFO : PROGRESS: pass 4, at document #210/630
2018-10-26 14:49:00,040 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:00,046 : INFO : topic #0 (0.200): 0.035*"BADREVIEW" + 0.024*"greenies" + 0.023*"GOODREVIEW" + 0.017*"give" + 0.017*"VBADREVIEW" + 0.014*"treat" + 0.012*"eat" + 0.012*"year" + 0.009*"old" + 0.008*"greenie"
2018-10-26 14:49:00,048 : INFO : topic #1 (0.200): 0.034*"BADREVIEW" + 0.027*"GOODREVIEW" + 0.015*"greenies" + 0.012*"VGOODREVIEW" + 0.010*"treat" + 0.009*"get" + 0.008*"purchase" + 0.008*"greenie" + 0.007*"tub" + 0.007*"never"
2018-10-26 14:49:00,053 : INFO : topic #2 (0.200): 0.076*"GOODREVIEW" + 0.055*"VGOODREVIEW" + 

2018-10-26 14:49:01,867 : INFO : topic #3 (0.200): 0.044*"GOODREVIEW" + 0.043*"BADREVIEW" + 0.025*"VGOODREVIEW" + 0.021*"greenies" + 0.018*"treat" + 0.014*"teeth" + 0.013*"breath" + 0.012*"VBADREVIEW" + 0.011*"one" + 0.011*"give"
2018-10-26 14:49:01,871 : INFO : topic #4 (0.200): 0.028*"VGOODREVIEW" + 0.022*"greenies" + 0.015*"size" + 0.015*"order" + 0.010*"time" + 0.009*"amazon" + 0.009*"price" + 0.009*"buy" + 0.008*"petite" + 0.008*"receive"
2018-10-26 14:49:01,877 : INFO : topic diff=0.181902, rho=0.333333
2018-10-26 14:49:01,887 : INFO : PROGRESS: pass 6, at document #210/630
2018-10-26 14:49:02,098 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:02,103 : INFO : topic #0 (0.200): 0.035*"BADREVIEW" + 0.023*"greenies" + 0.020*"GOODREVIEW" + 0.019*"VBADREVIEW" + 0.016*"give" + 0.012*"treat" + 0.012*"eat" + 0.011*"year" + 0.008*"greenie" + 0.008*"problem"
2018-10-26 14:49:02,106 : INFO : topic #1 (0.200): 0.032*"BADREVIEW" + 0.019*"GOODREVIEW"

2018-10-26 14:49:03,755 : INFO : topic #2 (0.200): 0.075*"GOODREVIEW" + 0.057*"VGOODREVIEW" + 0.031*"BADREVIEW" + 0.024*"greenies" + 0.020*"teeth" + 0.018*"get" + 0.016*"treat" + 0.016*"one" + 0.013*"great" + 0.012*"price"
2018-10-26 14:49:03,760 : INFO : topic #3 (0.200): 0.044*"BADREVIEW" + 0.044*"GOODREVIEW" + 0.025*"VGOODREVIEW" + 0.019*"greenies" + 0.018*"treat" + 0.013*"teeth" + 0.013*"breath" + 0.012*"VBADREVIEW" + 0.011*"one" + 0.011*"give"
2018-10-26 14:49:03,763 : INFO : topic #4 (0.200): 0.022*"VGOODREVIEW" + 0.021*"greenies" + 0.015*"size" + 0.014*"order" + 0.009*"time" + 0.008*"petite" + 0.008*"amazon" + 0.008*"receive" + 0.008*"price" + 0.008*"buy"
2018-10-26 14:49:03,767 : INFO : topic diff=0.155481, rho=0.301511
2018-10-26 14:49:03,781 : INFO : PROGRESS: pass 8, at document #210/630
2018-10-26 14:49:03,978 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:03,985 : INFO : topic #0 (0.200): 0.035*"BADREVIEW" + 0.022*"greenies" + 0.

2018-10-26 14:49:05,547 : INFO : topic #1 (0.200): 0.029*"BADREVIEW" + 0.011*"GOODREVIEW" + 0.009*"greenies" + 0.009*"food" + 0.009*"tub" + 0.008*"purchase" + 0.007*"senior" + 0.007*"greenie" + 0.007*"treat" + 0.006*"VBADREVIEW"
2018-10-26 14:49:05,552 : INFO : topic #2 (0.200): 0.075*"GOODREVIEW" + 0.058*"VGOODREVIEW" + 0.031*"BADREVIEW" + 0.025*"greenies" + 0.020*"teeth" + 0.018*"get" + 0.016*"treat" + 0.016*"one" + 0.013*"great" + 0.012*"price"
2018-10-26 14:49:05,557 : INFO : topic #3 (0.200): 0.044*"BADREVIEW" + 0.043*"GOODREVIEW" + 0.024*"VGOODREVIEW" + 0.018*"greenies" + 0.018*"treat" + 0.013*"breath" + 0.013*"teeth" + 0.012*"VBADREVIEW" + 0.010*"give" + 0.010*"one"
2018-10-26 14:49:05,561 : INFO : topic #4 (0.200): 0.019*"greenies" + 0.016*"VGOODREVIEW" + 0.014*"order" + 0.014*"size" + 0.009*"time" + 0.008*"receive" + 0.008*"amazon" + 0.008*"petite" + 0.008*"could" + 0.007*"petco"
2018-10-26 14:49:05,565 : INFO : topic diff=0.139812, rho=0.277350
2018-10-26 14:49:05,574 : INFO 

2018-10-26 14:49:07,253 : INFO : topic #0 (0.200): 0.037*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.021*"greenies" + 0.016*"GOODREVIEW" + 0.014*"give" + 0.011*"eat" + 0.010*"treat" + 0.009*"problem" + 0.009*"year" + 0.008*"greenie"
2018-10-26 14:49:07,255 : INFO : topic #1 (0.200): 0.028*"BADREVIEW" + 0.009*"food" + 0.009*"tub" + 0.009*"purchase" + 0.008*"GOODREVIEW" + 0.008*"greenies" + 0.007*"senior" + 0.007*"greenie" + 0.006*"VBADREVIEW" + 0.006*"treat"
2018-10-26 14:49:07,257 : INFO : topic #2 (0.200): 0.075*"GOODREVIEW" + 0.058*"VGOODREVIEW" + 0.031*"BADREVIEW" + 0.025*"greenies" + 0.020*"teeth" + 0.018*"get" + 0.016*"treat" + 0.016*"one" + 0.013*"great" + 0.012*"price"
2018-10-26 14:49:07,264 : INFO : topic #3 (0.200): 0.044*"BADREVIEW" + 0.042*"GOODREVIEW" + 0.023*"VGOODREVIEW" + 0.018*"treat" + 0.017*"greenies" + 0.013*"breath" + 0.012*"VBADREVIEW" + 0.012*"teeth" + 0.010*"give" + 0.010*"size"
2018-10-26 14:49:07,272 : INFO : topic #4 (0.200): 0.017*"greenies" + 0.014*"order" + 0.013

2018-10-26 14:49:08,781 : INFO : PROGRESS: pass 13, at document #630/630
2018-10-26 14:49:08,942 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:08,948 : INFO : topic #0 (0.200): 0.037*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.021*"greenies" + 0.016*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.009*"treat" + 0.009*"problem" + 0.009*"year" + 0.008*"greenie"
2018-10-26 14:49:08,952 : INFO : topic #1 (0.200): 0.026*"BADREVIEW" + 0.009*"tub" + 0.009*"food" + 0.009*"purchase" + 0.008*"senior" + 0.007*"greenies" + 0.007*"greenie" + 0.006*"GOODREVIEW" + 0.005*"VBADREVIEW" + 0.005*"regular"
2018-10-26 14:49:08,955 : INFO : topic #2 (0.200): 0.075*"GOODREVIEW" + 0.058*"VGOODREVIEW" + 0.031*"BADREVIEW" + 0.026*"greenies" + 0.020*"teeth" + 0.018*"get" + 0.017*"treat" + 0.016*"one" + 0.013*"great" + 0.012*"price"
2018-10-26 14:49:08,958 : INFO : topic #3 (0.200): 0.044*"BADREVIEW" + 0.041*"GOODREVIEW" + 0.022*"VGOODREVIEW" + 0.018*"treat" + 0.016*"greenies" + 

2018-10-26 14:49:10,222 : INFO : topic diff=0.100325, rho=0.229416
2018-10-26 14:49:10,477 : INFO : -6.544 per-word bound, 93.3 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:10,480 : INFO : PROGRESS: pass 15, at document #630/630
2018-10-26 14:49:10,626 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:10,633 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.021*"greenies" + 0.015*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:10,636 : INFO : topic #1 (0.200): 0.025*"BADREVIEW" + 0.009*"tub" + 0.009*"food" + 0.009*"purchase" + 0.008*"senior" + 0.007*"greenie" + 0.006*"greenies" + 0.005*"quick" + 0.005*"literally" + 0.005*"regular"
2018-10-26 14:49:10,641 : INFO : topic #2 (0.200): 0.075*"GOODREVIEW" + 0.058*"VGOODREVIEW" + 0.032*"BADREVIEW" + 0.026*"greenies" + 0.020*"teeth" + 0.018*"get" + 0.017*"

2018-10-26 14:49:11,852 : INFO : topic #4 (0.200): 0.011*"order" + 0.011*"greenies" + 0.011*"size" + 0.010*"petco" + 0.008*"could" + 0.008*"amazon" + 0.007*"receive" + 0.007*"petite" + 0.006*"time" + 0.006*"petsmart"
2018-10-26 14:49:11,857 : INFO : topic diff=0.094647, rho=0.218218
2018-10-26 14:49:12,111 : INFO : -6.538 per-word bound, 92.9 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:12,114 : INFO : PROGRESS: pass 17, at document #630/630
2018-10-26 14:49:12,264 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:12,270 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.015*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:12,272 : INFO : topic #1 (0.200): 0.024*"BADREVIEW" + 0.010*"tub" + 0.009*"food" + 0.009*"purchase" + 0.008*"senior" + 0.006*"greenie" + 0.006*"quick" + 0.005*"l

2018-10-26 14:49:13,451 : INFO : topic #4 (0.200): 0.011*"order" + 0.011*"size" + 0.010*"petco" + 0.010*"greenies" + 0.008*"could" + 0.007*"receive" + 0.007*"amazon" + 0.007*"petite" + 0.006*"petsmart" + 0.005*"time"
2018-10-26 14:49:13,455 : INFO : topic diff=0.089573, rho=0.208514
2018-10-26 14:49:13,720 : INFO : -6.533 per-word bound, 92.6 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:13,722 : INFO : PROGRESS: pass 19, at document #630/630
2018-10-26 14:49:13,854 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:13,861 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.014*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:13,864 : INFO : topic #1 (0.200): 0.023*"BADREVIEW" + 0.010*"tub" + 0.010*"food" + 0.009*"purchase" + 0.008*"senior" + 0.006*"greenie" + 0.006*"quick" + 0.005*"l

2018-10-26 14:49:15,037 : INFO : topic #4 (0.200): 0.011*"order" + 0.010*"size" + 0.010*"petco" + 0.009*"greenies" + 0.008*"could" + 0.007*"receive" + 0.007*"petite" + 0.006*"amazon" + 0.006*"petsmart" + 0.005*"tax"
2018-10-26 14:49:15,041 : INFO : topic diff=0.085397, rho=0.200000
2018-10-26 14:49:15,280 : INFO : -6.529 per-word bound, 92.4 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:15,283 : INFO : PROGRESS: pass 21, at document #630/630
2018-10-26 14:49:15,422 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:15,428 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.014*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:15,430 : INFO : topic #1 (0.200): 0.022*"BADREVIEW" + 0.010*"tub" + 0.010*"food" + 0.010*"purchase" + 0.008*"senior" + 0.006*"greenie" + 0.006*"quick" + 0.005*"li

2018-10-26 14:49:16,640 : INFO : topic #4 (0.200): 0.011*"order" + 0.010*"petco" + 0.010*"size" + 0.008*"could" + 0.007*"receive" + 0.007*"greenies" + 0.007*"petite" + 0.006*"amazon" + 0.006*"petsmart" + 0.005*"tax"
2018-10-26 14:49:16,644 : INFO : topic diff=0.081565, rho=0.192450
2018-10-26 14:49:16,879 : INFO : -6.526 per-word bound, 92.1 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:16,880 : INFO : PROGRESS: pass 23, at document #630/630
2018-10-26 14:49:17,023 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:17,029 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.014*"GOODREVIEW" + 0.014*"give" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:17,032 : INFO : topic #1 (0.200): 0.021*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"greenie" + 0.006*"quick" + 0.006*"li

2018-10-26 14:49:18,176 : INFO : topic #4 (0.200): 0.011*"order" + 0.010*"petco" + 0.010*"size" + 0.008*"could" + 0.007*"receive" + 0.007*"petite" + 0.006*"petsmart" + 0.005*"return" + 0.005*"tax" + 0.005*"amazon"
2018-10-26 14:49:18,180 : INFO : topic diff=0.078368, rho=0.185695
2018-10-26 14:49:18,424 : INFO : -6.523 per-word bound, 91.9 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:18,427 : INFO : PROGRESS: pass 25, at document #630/630
2018-10-26 14:49:18,560 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:18,566 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.012*"eat" + 0.010*"problem" + 0.009*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:18,570 : INFO : topic #1 (0.200): 0.020*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"greenie" + 0.006*"quick" + 0.006*"lite

2018-10-26 14:49:19,715 : INFO : topic #4 (0.200): 0.011*"order" + 0.011*"petco" + 0.009*"size" + 0.008*"could" + 0.007*"receive" + 0.006*"petite" + 0.006*"petsmart" + 0.005*"return" + 0.005*"tax" + 0.005*"diarrhea"
2018-10-26 14:49:19,719 : INFO : topic diff=0.075476, rho=0.179605
2018-10-26 14:49:19,958 : INFO : -6.519 per-word bound, 91.7 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:19,960 : INFO : PROGRESS: pass 27, at document #630/630
2018-10-26 14:49:20,091 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:20,098 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.020*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.012*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:20,100 : INFO : topic #1 (0.200): 0.019*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"greenie" + 0.006*"li

2018-10-26 14:49:21,238 : INFO : topic #4 (0.200): 0.011*"petco" + 0.010*"order" + 0.009*"size" + 0.008*"could" + 0.007*"receive" + 0.006*"petite" + 0.006*"petsmart" + 0.005*"return" + 0.005*"tax" + 0.005*"diarrhea"
2018-10-26 14:49:21,242 : INFO : topic diff=0.072839, rho=0.174078
2018-10-26 14:49:21,473 : INFO : -6.516 per-word bound, 91.5 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:21,475 : INFO : PROGRESS: pass 29, at document #630/630
2018-10-26 14:49:21,599 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:21,605 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.012*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:21,609 : INFO : topic #1 (0.200): 0.018*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"greenie" + 0.006*"li

2018-10-26 14:49:22,724 : INFO : topic #4 (0.200): 0.011*"petco" + 0.010*"order" + 0.008*"size" + 0.008*"could" + 0.007*"receive" + 0.006*"petite" + 0.006*"petsmart" + 0.006*"return" + 0.005*"tax" + 0.005*"diarrhea"
2018-10-26 14:49:22,728 : INFO : topic diff=0.070480, rho=0.169031
2018-10-26 14:49:22,976 : INFO : -6.514 per-word bound, 91.4 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:22,979 : INFO : PROGRESS: pass 31, at document #630/630
2018-10-26 14:49:23,110 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:23,116 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:23,119 : INFO : topic #1 (0.200): 0.018*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"greenie" + 0.006*"li

2018-10-26 14:49:24,177 : INFO : topic #4 (0.200): 0.011*"petco" + 0.010*"order" + 0.008*"could" + 0.007*"receive" + 0.007*"size" + 0.006*"petsmart" + 0.006*"return" + 0.006*"petite" + 0.005*"tax" + 0.005*"diarrhea"
2018-10-26 14:49:24,181 : INFO : topic diff=0.068370, rho=0.164399
2018-10-26 14:49:24,425 : INFO : -6.511 per-word bound, 91.2 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:24,426 : INFO : PROGRESS: pass 33, at document #630/630
2018-10-26 14:49:24,560 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:24,566 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.027*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:24,568 : INFO : topic #1 (0.200): 0.017*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.006*"

2018-10-26 14:49:25,668 : INFO : topic #4 (0.200): 0.011*"petco" + 0.009*"order" + 0.008*"could" + 0.007*"receive" + 0.006*"size" + 0.006*"petsmart" + 0.006*"return" + 0.005*"tax" + 0.005*"petite" + 0.005*"diarrhea"
2018-10-26 14:49:25,672 : INFO : topic diff=0.066292, rho=0.160128
2018-10-26 14:49:25,895 : INFO : -6.509 per-word bound, 91.1 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:25,897 : INFO : PROGRESS: pass 35, at document #630/630
2018-10-26 14:49:26,030 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:26,037 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:26,040 : INFO : topic #1 (0.200): 0.016*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"

2018-10-26 14:49:27,172 : INFO : topic #4 (0.200): 0.011*"petco" + 0.009*"order" + 0.008*"could" + 0.007*"receive" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"size" + 0.005*"end"
2018-10-26 14:49:27,177 : INFO : topic diff=0.064327, rho=0.156174
2018-10-26 14:49:27,410 : INFO : -6.507 per-word bound, 90.9 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:27,412 : INFO : PROGRESS: pass 37, at document #630/630
2018-10-26 14:49:27,543 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:27,549 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.014*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"greenie"
2018-10-26 14:49:27,552 : INFO : topic #1 (0.200): 0.016*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gre

2018-10-26 14:49:28,605 : INFO : topic #4 (0.200): 0.011*"petco" + 0.008*"order" + 0.008*"could" + 0.007*"receive" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:28,609 : INFO : topic diff=0.062635, rho=0.152499
2018-10-26 14:49:28,837 : INFO : -6.505 per-word bound, 90.8 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:28,840 : INFO : PROGRESS: pass 39, at document #630/630
2018-10-26 14:49:28,966 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:28,975 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"make"
2018-10-26 14:49:28,977 : INFO : topic #1 (0.200): 0.015*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

2018-10-26 14:49:30,072 : INFO : topic #4 (0.200): 0.011*"petco" + 0.008*"could" + 0.008*"order" + 0.006*"receive" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:30,076 : INFO : topic diff=0.061070, rho=0.149071
2018-10-26 14:49:30,304 : INFO : -6.503 per-word bound, 90.7 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:30,306 : INFO : PROGRESS: pass 41, at document #630/630
2018-10-26 14:49:30,435 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:30,441 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"treat" + 0.008*"year" + 0.008*"make"
2018-10-26 14:49:30,444 : INFO : topic #1 (0.200): 0.015*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

2018-10-26 14:49:31,497 : INFO : topic #4 (0.200): 0.011*"petco" + 0.008*"could" + 0.007*"order" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.006*"receive" + 0.005*"diarrhea" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:31,502 : INFO : topic diff=0.059664, rho=0.145865
2018-10-26 14:49:31,718 : INFO : -6.502 per-word bound, 90.6 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:31,720 : INFO : PROGRESS: pass 43, at document #630/630
2018-10-26 14:49:31,838 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:31,845 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"year" + 0.008*"treat" + 0.008*"make"
2018-10-26 14:49:31,848 : INFO : topic #1 (0.200): 0.014*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

2018-10-26 14:49:32,894 : INFO : topic #4 (0.200): 0.011*"petco" + 0.008*"could" + 0.006*"order" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"receive" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:32,898 : INFO : topic diff=0.058326, rho=0.142857
2018-10-26 14:49:33,131 : INFO : -6.500 per-word bound, 90.5 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:33,133 : INFO : PROGRESS: pass 45, at document #630/630
2018-10-26 14:49:33,250 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:33,256 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"year" + 0.008*"treat" + 0.008*"make"
2018-10-26 14:49:33,259 : INFO : topic #1 (0.200): 0.014*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

2018-10-26 14:49:34,259 : INFO : topic #4 (0.200): 0.011*"petco" + 0.008*"could" + 0.006*"petsmart" + 0.006*"return" + 0.006*"order" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"receive" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:34,263 : INFO : topic diff=0.057065, rho=0.140028
2018-10-26 14:49:34,487 : INFO : -6.499 per-word bound, 90.4 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:34,489 : INFO : PROGRESS: pass 47, at document #630/630
2018-10-26 14:49:34,600 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:34,606 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"year" + 0.008*"make" + 0.008*"treat"
2018-10-26 14:49:34,609 : INFO : topic #1 (0.200): 0.014*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

2018-10-26 14:49:35,614 : INFO : topic #4 (0.200): 0.011*"petco" + 0.007*"could" + 0.006*"petsmart" + 0.006*"return" + 0.006*"tax" + 0.005*"diarrhea" + 0.005*"order" + 0.005*"receive" + 0.005*"end" + 0.004*"reduce"
2018-10-26 14:49:35,619 : INFO : topic diff=0.055856, rho=0.137361
2018-10-26 14:49:35,835 : INFO : -6.498 per-word bound, 90.4 perplexity estimate based on a held-out corpus of 210 documents with 6808 words
2018-10-26 14:49:35,838 : INFO : PROGRESS: pass 49, at document #630/630
2018-10-26 14:49:35,952 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:35,958 : INFO : topic #0 (0.200): 0.038*"BADREVIEW" + 0.026*"VBADREVIEW" + 0.019*"greenies" + 0.014*"give" + 0.013*"GOODREVIEW" + 0.011*"eat" + 0.010*"problem" + 0.008*"year" + 0.008*"make" + 0.008*"treat"
2018-10-26 14:49:35,961 : INFO : topic #1 (0.200): 0.013*"BADREVIEW" + 0.010*"tub" + 0.010*"purchase" + 0.010*"food" + 0.008*"senior" + 0.006*"quick" + 0.006*"literally" + 0.005*"gree

training LDA with 7 topics over 50 passes
removing top 2 words with 1.0 review freq threshold


2018-10-26 14:49:38,024 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:38,033 : INFO : topic #3 (0.143): 0.035*"GOODREVIEW" + 0.032*"VGOODREVIEW" + 0.031*"BADREVIEW" + 0.025*"greenies" + 0.018*"teeth" + 0.014*"breath" + 0.013*"one" + 0.013*"treat" + 0.011*"VBADREVIEW" + 0.010*"get"
2018-10-26 14:49:38,035 : INFO : topic #0 (0.143): 0.039*"GOODREVIEW" + 0.031*"BADREVIEW" + 0.031*"VGOODREVIEW" + 0.022*"greenies" + 0.014*"give" + 0.012*"treat" + 0.011*"year" + 0.010*"like" + 0.010*"eat" + 0.009*"buy"
2018-10-26 14:49:38,040 : INFO : topic #5 (0.143): 0.049*"VGOODREVIEW" + 0.046*"GOODREVIEW" + 0.036*"BADREVIEW" + 0.030*"greenies" + 0.020*"treat" + 0.016*"teeth" + 0.015*"price" + 0.015*"buy" + 0.014*"like" + 0.012*"great"
2018-10-26 14:49:38,045 : INFO : topic #2 (0.143): 0.091*"GOODREVIEW" + 0.045*"VGOODREVIEW" + 0.024*"BADREVIEW" + 0.015*"greenies" + 0.014*"teeth" + 0.012*"amazon" + 0.012*"get" + 0.012*"treat" + 0.012*"good" + 0.011*"one"
2018-1

2018-10-26 14:49:40,592 : INFO : topic diff=0.431534, rho=0.447214
2018-10-26 14:49:40,603 : INFO : PROGRESS: pass 2, at document #210/630
2018-10-26 14:49:40,826 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:40,834 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.048*"VGOODREVIEW" + 0.025*"BADREVIEW" + 0.016*"greenies" + 0.016*"teeth" + 0.015*"get" + 0.013*"amazon" + 0.013*"treat" + 0.013*"price" + 0.013*"good"
2018-10-26 14:49:40,836 : INFO : topic #0 (0.143): 0.028*"BADREVIEW" + 0.020*"VBADREVIEW" + 0.014*"problem" + 0.014*"GOODREVIEW" + 0.011*"treat" + 0.010*"chew" + 0.009*"greenies" + 0.008*"year" + 0.008*"cause" + 0.008*"use"
2018-10-26 14:49:40,841 : INFO : topic #3 (0.143): 0.031*"BADREVIEW" + 0.030*"GOODREVIEW" + 0.021*"VGOODREVIEW" + 0.019*"VBADREVIEW" + 0.017*"greenies" + 0.016*"treat" + 0.013*"chew" + 0.013*"eat" + 0.013*"one" + 0.012*"teeth"
2018-10-26 14:49:40,845 : INFO : topic #5 (0.143): 0.065*"VGOODREVIEW" + 0.045*"GOODREV

2018-10-26 14:49:42,843 : INFO : topic diff=0.211907, rho=0.377964
2018-10-26 14:49:42,852 : INFO : PROGRESS: pass 4, at document #210/630
2018-10-26 14:49:43,071 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:43,079 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.049*"VGOODREVIEW" + 0.026*"BADREVIEW" + 0.018*"greenies" + 0.017*"teeth" + 0.016*"get" + 0.014*"treat" + 0.013*"amazon" + 0.013*"price" + 0.013*"good"
2018-10-26 14:49:43,082 : INFO : topic #1 (0.143): 0.028*"GOODREVIEW" + 0.019*"BADREVIEW" + 0.013*"VGOODREVIEW" + 0.012*"greenies" + 0.012*"tub" + 0.011*"treat" + 0.010*"problem" + 0.009*"regular" + 0.009*"purchase" + 0.008*"senior"
2018-10-26 14:49:43,087 : INFO : topic #5 (0.143): 0.070*"VGOODREVIEW" + 0.046*"GOODREVIEW" + 0.044*"greenies" + 0.032*"BADREVIEW" + 0.024*"great" + 0.023*"buy" + 0.019*"price" + 0.017*"treat" + 0.012*"like" + 0.011*"teeth"
2018-10-26 14:49:43,093 : INFO : topic #4 (0.143): 0.018*"size" + 0.013*"time" + 

2018-10-26 14:49:44,963 : INFO : topic diff=0.149211, rho=0.333333
2018-10-26 14:49:44,974 : INFO : PROGRESS: pass 6, at document #210/630
2018-10-26 14:49:45,187 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:45,195 : INFO : topic #4 (0.143): 0.018*"size" + 0.012*"time" + 0.011*"petite" + 0.011*"box" + 0.009*"order" + 0.009*"open" + 0.009*"crate" + 0.008*"diarrhea" + 0.008*"amazon" + 0.008*"cost"
2018-10-26 14:49:45,197 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.050*"VGOODREVIEW" + 0.026*"BADREVIEW" + 0.018*"greenies" + 0.017*"teeth" + 0.016*"get" + 0.014*"treat" + 0.014*"price" + 0.013*"amazon" + 0.013*"good"
2018-10-26 14:49:45,202 : INFO : topic #1 (0.143): 0.020*"GOODREVIEW" + 0.014*"BADREVIEW" + 0.013*"tub" + 0.010*"VGOODREVIEW" + 0.009*"problem" + 0.009*"regular" + 0.009*"greenies" + 0.009*"senior" + 0.008*"treat" + 0.008*"purchase"
2018-10-26 14:49:45,207 : INFO : topic #5 (0.143): 0.072*"VGOODREVIEW" + 0.045*"GOODREVIEW" + 0.0

2018-10-26 14:49:47,086 : INFO : topic diff=0.126025, rho=0.301511
2018-10-26 14:49:47,095 : INFO : PROGRESS: pass 8, at document #210/630
2018-10-26 14:49:47,305 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:47,312 : INFO : topic #6 (0.143): 0.048*"BADREVIEW" + 0.044*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.027*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.016*"treat" + 0.015*"greenie"
2018-10-26 14:49:47,314 : INFO : topic #1 (0.143): 0.015*"GOODREVIEW" + 0.013*"tub" + 0.010*"BADREVIEW" + 0.009*"regular" + 0.009*"senior" + 0.009*"problem" + 0.008*"shipping" + 0.007*"VGOODREVIEW" + 0.007*"include" + 0.007*"sick"
2018-10-26 14:49:47,319 : INFO : topic #4 (0.143): 0.017*"size" + 0.011*"petite" + 0.011*"box" + 0.010*"time" + 0.009*"crate" + 0.009*"open" + 0.009*"order" + 0.009*"diarrhea" + 0.008*"cost" + 0.007*"amazon"
2018-10-26 14:49:47,324 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.050*"VGOODREVIEW" + 0.026*

2018-10-26 14:49:49,127 : INFO : topic diff=0.112156, rho=0.277350
2018-10-26 14:49:49,137 : INFO : PROGRESS: pass 10, at document #210/630
2018-10-26 14:49:49,346 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:49,353 : INFO : topic #6 (0.143): 0.049*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.027*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.016*"treat" + 0.015*"greenie"
2018-10-26 14:49:49,356 : INFO : topic #1 (0.143): 0.014*"tub" + 0.010*"GOODREVIEW" + 0.010*"senior" + 0.009*"regular" + 0.008*"problem" + 0.008*"shipping" + 0.007*"include" + 0.007*"BADREVIEW" + 0.007*"sick" + 0.007*"excellent"
2018-10-26 14:49:49,360 : INFO : topic #0 (0.143): 0.036*"VBADREVIEW" + 0.028*"BADREVIEW" + 0.015*"problem" + 0.011*"cause" + 0.010*"pet" + 0.010*"make" + 0.009*"chew" + 0.007*"owner" + 0.007*"case" + 0.007*"piece"
2018-10-26 14:49:49,365 : INFO : topic #3 (0.143): 0.027*"BADREVIEW" + 0.020*"GOODREVIEW" + 0.

2018-10-26 14:49:51,161 : INFO : topic diff=0.102111, rho=0.258199
2018-10-26 14:49:51,174 : INFO : PROGRESS: pass 12, at document #210/630
2018-10-26 14:49:51,357 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:51,364 : INFO : topic #0 (0.143): 0.037*"VBADREVIEW" + 0.028*"BADREVIEW" + 0.015*"problem" + 0.011*"cause" + 0.010*"pet" + 0.010*"make" + 0.009*"chew" + 0.007*"owner" + 0.007*"case" + 0.007*"piece"
2018-10-26 14:49:51,366 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.044*"greenies" + 0.044*"GOODREVIEW" + 0.027*"BADREVIEW" + 0.026*"great" + 0.024*"buy" + 0.021*"price" + 0.016*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:49:51,371 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.026*"BADREVIEW" + 0.020*"greenies" + 0.018*"teeth" + 0.017*"get" + 0.015*"treat" + 0.014*"price" + 0.013*"good" + 0.013*"amazon"
2018-10-26 14:49:51,374 : INFO : topic #3 (0.143): 0.026*"BADREVIEW" + 0.020*"VBADREVIEW" + 0.017*"

2018-10-26 14:49:53,144 : INFO : topic diff=0.094370, rho=0.242536
2018-10-26 14:49:53,154 : INFO : PROGRESS: pass 14, at document #210/630
2018-10-26 14:49:53,369 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:53,377 : INFO : topic #6 (0.143): 0.049*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.017*"treat" + 0.015*"greenie"
2018-10-26 14:49:53,379 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.044*"greenies" + 0.044*"GOODREVIEW" + 0.026*"BADREVIEW" + 0.026*"great" + 0.024*"buy" + 0.021*"price" + 0.015*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:49:53,384 : INFO : topic #1 (0.143): 0.014*"tub" + 0.010*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"problem" + 0.007*"quick" + 0.007*"excellent" + 0.007*"beg"
2018-10-26 14:49:53,388 : INFO : topic #2 (0.143): 0.093*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.0

2018-10-26 14:49:55,177 : INFO : topic diff=0.088267, rho=0.229416
2018-10-26 14:49:55,187 : INFO : PROGRESS: pass 16, at document #210/630
2018-10-26 14:49:55,363 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:55,371 : INFO : topic #4 (0.143): 0.015*"size" + 0.011*"petite" + 0.011*"box" + 0.009*"crate" + 0.009*"open" + 0.009*"diarrhea" + 0.008*"cost" + 0.008*"order" + 0.008*"time" + 0.007*"amazon"
2018-10-26 14:49:55,374 : INFO : topic #1 (0.143): 0.015*"tub" + 0.010*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"problem" + 0.007*"quick" + 0.007*"excellent" + 0.007*"beg"
2018-10-26 14:49:55,379 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.044*"greenies" + 0.043*"GOODREVIEW" + 0.026*"great" + 0.026*"BADREVIEW" + 0.024*"buy" + 0.022*"price" + 0.015*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:49:55,385 : INFO : topic #3 (0.143): 0.024*"BADREVIEW" + 0.020*"VBADREVIEW" + 0.014*"chew" + 0.012*

2018-10-26 14:49:57,155 : INFO : topic diff=0.083133, rho=0.218218
2018-10-26 14:49:57,166 : INFO : PROGRESS: pass 18, at document #210/630
2018-10-26 14:49:57,354 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:57,361 : INFO : topic #0 (0.143): 0.037*"VBADREVIEW" + 0.027*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.009*"chew" + 0.007*"owner" + 0.007*"case" + 0.007*"piece"
2018-10-26 14:49:57,364 : INFO : topic #4 (0.143): 0.015*"size" + 0.011*"petite" + 0.011*"box" + 0.009*"crate" + 0.009*"open" + 0.009*"diarrhea" + 0.009*"cost" + 0.008*"order" + 0.008*"time" + 0.007*"amazon"
2018-10-26 14:49:57,368 : INFO : topic #6 (0.143): 0.050*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.017*"treat" + 0.015*"greenie"
2018-10-26 14:49:57,373 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.044*"greenies" + 0.043*"GOODREVIEW" + 0.

2018-10-26 14:49:59,177 : INFO : topic diff=0.078946, rho=0.208514
2018-10-26 14:49:59,189 : INFO : PROGRESS: pass 20, at document #210/630
2018-10-26 14:49:59,395 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:49:59,402 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.043*"greenies" + 0.042*"GOODREVIEW" + 0.026*"great" + 0.024*"BADREVIEW" + 0.024*"buy" + 0.022*"price" + 0.014*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:49:59,405 : INFO : topic #1 (0.143): 0.015*"tub" + 0.010*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"quick" + 0.007*"beg" + 0.007*"problem" + 0.007*"excellent"
2018-10-26 14:49:59,409 : INFO : topic #6 (0.143): 0.050*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.017*"treat" + 0.015*"greenie"
2018-10-26 14:49:59,413 : INFO : topic #4 (0.143): 0.015*"size" + 0.012*"petite" + 0.011*"box" + 

2018-10-26 14:50:01,124 : INFO : topic diff=0.075467, rho=0.200000
2018-10-26 14:50:01,137 : INFO : PROGRESS: pass 22, at document #210/630
2018-10-26 14:50:01,356 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:01,369 : INFO : topic #1 (0.143): 0.015*"tub" + 0.010*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"quick" + 0.007*"beg" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:01,372 : INFO : topic #6 (0.143): 0.050*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.018*"treat" + 0.015*"greenie"
2018-10-26 14:50:01,375 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.043*"greenies" + 0.042*"GOODREVIEW" + 0.026*"great" + 0.024*"BADREVIEW" + 0.023*"buy" + 0.023*"price" + 0.014*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:50:01,381 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.027*"BADREVIEW" + 0.015

2018-10-26 14:50:03,144 : INFO : topic diff=0.072189, rho=0.192450
2018-10-26 14:50:03,156 : INFO : PROGRESS: pass 24, at document #210/630
2018-10-26 14:50:03,356 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:03,363 : INFO : topic #1 (0.143): 0.015*"tub" + 0.010*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"beg" + 0.007*"quick" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:03,365 : INFO : topic #6 (0.143): 0.050*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.021*"teeth" + 0.020*"get" + 0.019*"give" + 0.018*"treat" + 0.015*"greenie"
2018-10-26 14:50:03,371 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.043*"greenies" + 0.042*"GOODREVIEW" + 0.026*"great" + 0.023*"buy" + 0.023*"BADREVIEW" + 0.023*"price" + 0.014*"treat" + 0.012*"find" + 0.011*"like"
2018-10-26 14:50:03,375 : INFO : topic #3 (0.143): 0.021*"VBADREVIEW" + 0.019*"BADREVIEW" + 0.013

2018-10-26 14:50:05,094 : INFO : topic diff=0.069441, rho=0.185695
2018-10-26 14:50:05,107 : INFO : PROGRESS: pass 26, at document #210/630
2018-10-26 14:50:05,315 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:05,322 : INFO : topic #2 (0.143): 0.094*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.014*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:05,324 : INFO : topic #4 (0.143): 0.014*"size" + 0.012*"petite" + 0.011*"box" + 0.010*"crate" + 0.009*"open" + 0.009*"diarrhea" + 0.009*"cost" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:05,330 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"beg" + 0.007*"quick" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:05,333 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.027*"BADREVIEW" + 0.015*"problem" +

2018-10-26 14:50:07,096 : INFO : topic diff=0.066978, rho=0.179605
2018-10-26 14:50:07,108 : INFO : PROGRESS: pass 28, at document #210/630
2018-10-26 14:50:07,305 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:07,312 : INFO : topic #2 (0.143): 0.094*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.014*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:07,315 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"beg" + 0.007*"quick" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:07,320 : INFO : topic #4 (0.143): 0.014*"size" + 0.012*"petite" + 0.011*"box" + 0.010*"crate" + 0.009*"open" + 0.009*"diarrhea" + 0.009*"cost" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:07,324 : INFO : topic #3 (0.143): 0.021*"VBADREVIEW" + 0.017*"BADREVIEW" + 0.013*"choke" + 0

2018-10-26 14:50:09,071 : INFO : topic diff=0.064746, rho=0.174078
2018-10-26 14:50:09,084 : INFO : PROGRESS: pass 30, at document #210/630
2018-10-26 14:50:09,281 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:09,289 : INFO : topic #6 (0.143): 0.051*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.020*"teeth" + 0.020*"get" + 0.019*"give" + 0.018*"treat" + 0.014*"greenie"
2018-10-26 14:50:09,292 : INFO : topic #4 (0.143): 0.014*"size" + 0.012*"petite" + 0.011*"box" + 0.010*"crate" + 0.009*"open" + 0.009*"cost" + 0.009*"diarrhea" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:09,296 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.027*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.009*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:09,300 : INFO : topic #5 (0.143): 0.074*"VGOODREVIEW" + 0.043*"greenies" + 0.041*"GOODREVIEW" + 0.

2018-10-26 14:50:11,002 : INFO : topic diff=0.062701, rho=0.169031
2018-10-26 14:50:11,014 : INFO : PROGRESS: pass 32, at document #210/630
2018-10-26 14:50:11,203 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:11,212 : INFO : topic #3 (0.143): 0.021*"VBADREVIEW" + 0.015*"BADREVIEW" + 0.013*"choke" + 0.012*"chew" + 0.011*"eat" + 0.010*"money" + 0.010*"try" + 0.010*"must" + 0.010*"sure" + 0.009*"cost"
2018-10-26 14:50:11,216 : INFO : topic #2 (0.143): 0.095*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.014*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:11,222 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.009*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:11,226 : INFO : topic #6 (0.143): 0.051*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 

2018-10-26 14:50:12,943 : INFO : topic diff=0.060821, rho=0.164399
2018-10-26 14:50:12,954 : INFO : PROGRESS: pass 34, at document #210/630
2018-10-26 14:50:13,155 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:13,162 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.008*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:13,164 : INFO : topic #2 (0.143): 0.095*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.014*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:13,169 : INFO : topic #6 (0.143): 0.051*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.020*"teeth" + 0.019*"get" + 0.019*"give" + 0.018*"treat" + 0.014*"greenie"
2018-10-26 14:50:13,172 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 

2018-10-26 14:50:14,870 : INFO : topic diff=0.059188, rho=0.160128
2018-10-26 14:50:14,881 : INFO : PROGRESS: pass 36, at document #210/630
2018-10-26 14:50:15,080 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:15,088 : INFO : topic #4 (0.143): 0.013*"size" + 0.011*"petite" + 0.011*"box" + 0.010*"crate" + 0.009*"open" + 0.009*"cost" + 0.009*"diarrhea" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:15,091 : INFO : topic #3 (0.143): 0.022*"VBADREVIEW" + 0.014*"BADREVIEW" + 0.014*"choke" + 0.012*"chew" + 0.010*"eat" + 0.010*"money" + 0.010*"must" + 0.010*"try" + 0.010*"sure" + 0.009*"cost"
2018-10-26 14:50:15,095 : INFO : topic #2 (0.143): 0.095*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.013*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:15,099 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.008*"shipping" 

2018-10-26 14:50:16,836 : INFO : topic diff=0.057520, rho=0.156174
2018-10-26 14:50:16,848 : INFO : PROGRESS: pass 38, at document #210/630
2018-10-26 14:50:17,046 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:17,054 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.008*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:17,056 : INFO : topic #2 (0.143): 0.095*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.013*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:17,061 : INFO : topic #4 (0.143): 0.013*"size" + 0.011*"box" + 0.011*"petite" + 0.010*"crate" + 0.009*"open" + 0.009*"cost" + 0.009*"diarrhea" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:17,065 : INFO : topic #5 (0.143): 0.075*"VGOODREVIEW" + 0.043*"greenies" + 0.040*"GOODREVIEW" + 0

2018-10-26 14:50:18,738 : INFO : topic diff=0.056084, rho=0.152499
2018-10-26 14:50:18,749 : INFO : PROGRESS: pass 40, at document #210/630
2018-10-26 14:50:18,919 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:18,926 : INFO : topic #5 (0.143): 0.075*"VGOODREVIEW" + 0.043*"greenies" + 0.039*"GOODREVIEW" + 0.027*"great" + 0.024*"price" + 0.024*"buy" + 0.021*"BADREVIEW" + 0.013*"treat" + 0.012*"find" + 0.011*"amazon"
2018-10-26 14:50:18,929 : INFO : topic #3 (0.143): 0.022*"VBADREVIEW" + 0.014*"choke" + 0.012*"BADREVIEW" + 0.011*"chew" + 0.011*"money" + 0.010*"must" + 0.010*"eat" + 0.010*"sure" + 0.010*"try" + 0.009*"cost"
2018-10-26 14:50:18,937 : INFO : topic #6 (0.143): 0.051*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.020*"teeth" + 0.019*"get" + 0.019*"give" + 0.019*"treat" + 0.014*"greenie"
2018-10-26 14:50:18,940 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.0

2018-10-26 14:50:20,666 : INFO : topic diff=0.054756, rho=0.149071
2018-10-26 14:50:20,679 : INFO : PROGRESS: pass 42, at document #210/630
2018-10-26 14:50:20,858 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:20,866 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.008*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:20,868 : INFO : topic #4 (0.143): 0.013*"size" + 0.011*"box" + 0.011*"petite" + 0.010*"crate" + 0.009*"open" + 0.009*"cost" + 0.009*"diarrhea" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:20,872 : INFO : topic #3 (0.143): 0.022*"VBADREVIEW" + 0.014*"choke" + 0.012*"BADREVIEW" + 0.011*"chew" + 0.011*"money" + 0.010*"must" + 0.010*"eat" + 0.010*"sure" + 0.010*"try" + 0.009*"cost"
2018-10-26 14:50:20,878 : INFO : topic #5 (0.143): 0.075*"VGOODREVIEW" + 0.043*"greenies" + 0.039*"GOODREVIEW" + 0.027*"great" + 

2018-10-26 14:50:22,600 : INFO : topic diff=0.053592, rho=0.145865
2018-10-26 14:50:22,612 : INFO : PROGRESS: pass 44, at document #210/630
2018-10-26 14:50:22,814 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:22,821 : INFO : topic #6 (0.143): 0.051*"BADREVIEW" + 0.045*"GOODREVIEW" + 0.038*"VGOODREVIEW" + 0.028*"greenies" + 0.022*"one" + 0.020*"teeth" + 0.019*"get" + 0.019*"give" + 0.019*"treat" + 0.014*"greenie"
2018-10-26 14:50:22,824 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.008*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:22,829 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"beg" + 0.007*"quick" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:22,832 : INFO : topic #3 (0.143): 0.021*"VBADREVIEW" + 0.014*"choke" + 0.011*"chew" + 0.01

2018-10-26 14:50:24,488 : INFO : topic diff=0.052415, rho=0.142857
2018-10-26 14:50:24,500 : INFO : PROGRESS: pass 46, at document #210/630
2018-10-26 14:50:24,670 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:24,677 : INFO : topic #2 (0.143): 0.095*"GOODREVIEW" + 0.051*"VGOODREVIEW" + 0.027*"BADREVIEW" + 0.020*"greenies" + 0.019*"teeth" + 0.017*"get" + 0.016*"treat" + 0.013*"good" + 0.013*"price" + 0.013*"breath"
2018-10-26 14:50:24,679 : INFO : topic #5 (0.143): 0.076*"VGOODREVIEW" + 0.043*"greenies" + 0.038*"GOODREVIEW" + 0.027*"great" + 0.024*"price" + 0.024*"buy" + 0.020*"BADREVIEW" + 0.013*"treat" + 0.012*"find" + 0.012*"amazon"
2018-10-26 14:50:24,684 : INFO : topic #4 (0.143): 0.012*"size" + 0.011*"box" + 0.011*"petite" + 0.010*"crate" + 0.009*"open" + 0.009*"cost" + 0.009*"diarrhea" + 0.008*"order" + 0.007*"amazon" + 0.007*"arrive"
2018-10-26 14:50:24,689 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.

2018-10-26 14:50:26,426 : INFO : topic diff=0.051293, rho=0.140028
2018-10-26 14:50:26,438 : INFO : PROGRESS: pass 48, at document #210/630
2018-10-26 14:50:26,630 : INFO : merging changes from 210 documents into a model of 630 documents
2018-10-26 14:50:26,637 : INFO : topic #1 (0.143): 0.015*"tub" + 0.011*"senior" + 0.010*"regular" + 0.008*"shipping" + 0.008*"include" + 0.008*"sick" + 0.007*"beg" + 0.007*"quick" + 0.007*"excellent" + 0.007*"formula"
2018-10-26 14:50:26,641 : INFO : topic #3 (0.143): 0.021*"VBADREVIEW" + 0.014*"choke" + 0.011*"chew" + 0.011*"money" + 0.010*"must" + 0.010*"BADREVIEW" + 0.010*"sure" + 0.010*"eat" + 0.010*"try" + 0.009*"cost"
2018-10-26 14:50:26,644 : INFO : topic #0 (0.143): 0.038*"VBADREVIEW" + 0.026*"BADREVIEW" + 0.015*"problem" + 0.012*"cause" + 0.011*"pet" + 0.010*"make" + 0.008*"chew" + 0.008*"owner" + 0.007*"case" + 0.007*"die"
2018-10-26 14:50:26,649 : INFO : topic #5 (0.143): 0.076*"VGOODREVIEW" + 0.043*"greenies" + 0.038*"GOODREVIEW" + 0.027*"g

2018-10-26 14:50:28,377 : INFO : topic diff=0.050295, rho=0.137361
2018-10-26 14:50:28,413 : INFO : saving LdaState object under ../models/valence_outputs/B0026RQTGE_7_50_2_1.0.state, separately None
2018-10-26 14:50:28,419 : INFO : saved ../models/valence_outputs/B0026RQTGE_7_50_2_1.0.state
2018-10-26 14:50:28,424 : INFO : saving LdaModel object under ../models/valence_outputs/B0026RQTGE_7_50_2_1.0, separately ['expElogbeta', 'sstats']
2018-10-26 14:50:28,425 : INFO : storing np array 'expElogbeta' to ../models/valence_outputs/B0026RQTGE_7_50_2_1.0.expElogbeta.npy
2018-10-26 14:50:28,431 : INFO : not storing attribute state
2018-10-26 14:50:28,436 : INFO : not storing attribute id2word
2018-10-26 14:50:28,437 : INFO : not storing attribute dispatcher
2018-10-26 14:50:28,441 : INFO : saved ../models/valence_outputs/B0026RQTGE_7_50_2_1.0
2018-10-26 14:50:28,449 : INFO : using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows
201

In [33]:
# view the outputs
output

Unnamed: 0,chunk,coherence,final perplexity,final topic diff,n_above threshold,num_topics,passes,per-word bounds,perplexity,perplexity decreasing,product,top_n removed,topic 0,topic 1,topic 2,topic 3,topic 4,topic 5,topic 6,topic diff
0,210.0,0.410135,90.4,0.055856,1.0,5,50,"{0: -7.275, 1: -6.796, 2: -6.71, 3: -6.662, 4:...","{0: 154.9, 1: 111.1, 2: 104.7, 3: 101.3, 4: 99...",True,B0026RQTGE,2,"[(BADREVIEW, 0.037841577), (VBADREVIEW, 0.0264...","[(BADREVIEW, 0.0133879), (tub, 0.010434948), (...","[(GOODREVIEW, 0.07489858), (VGOODREVIEW, 0.057...","[(BADREVIEW, 0.0377436), (GOODREVIEW, 0.015770...","[(petco, 0.009880665), (could, 0.007227627), (...",,,"{0: 1.062948, 1: 0.42708, 2: 0.304938, 3: 0.24..."
0,210.0,0.374401,96.4,0.044357,1.0,7,50,"{0: -7.588, 1: -6.918, 2: -6.798, 3: -6.743, 4...","{0: 192.4, 1: 121.0, 2: 111.3, 3: 107.1, 4: 10...",True,B0026RQTGE,2,"[(VBADREVIEW, 0.04082529), (BADREVIEW, 0.02648...","[(tub, 0.0150467), (senior, 0.011487498), (reg...","[(GOODREVIEW, 0.09583513), (VGOODREVIEW, 0.051...","[(VBADREVIEW, 0.021182273), (choke, 0.01510983...","[(box, 0.01150811), (size, 0.011173038), (crat...","[(VGOODREVIEW, 0.07625106), (greenies, 0.04323...","[(BADREVIEW, 0.05165809), (GOODREVIEW, 0.04479...","{0: 1.191916, 1: 0.431534, 2: 0.288143, 3: 0.2..."


In [25]:
# if the result is an improvement, run this cell to save it
# save off the updated results dataframe
valence_gs_results = lda_funcs.save_best(output, valence_gs_results, save_path="valence_outputs")
valence_gs_results.to_csv('../data/interim/valence_gs_results.csv')

2018-10-26 14:26:39,862 : INFO : loading LdaModel object from ../models/valence_outputs/B0013A0QXC_10_100_10_0.5
2018-10-26 14:26:39,867 : INFO : loading expElogbeta from ../models/valence_outputs/B0013A0QXC_10_100_10_0.5.expElogbeta.npy with mmap=None
2018-10-26 14:26:39,874 : INFO : setting ignored attribute state to None
2018-10-26 14:26:39,876 : INFO : setting ignored attribute id2word to None
2018-10-26 14:26:39,877 : INFO : setting ignored attribute dispatcher to None
2018-10-26 14:26:39,878 : INFO : loaded ../models/valence_outputs/B0013A0QXC_10_100_10_0.5
2018-10-26 14:26:39,880 : INFO : loading LdaState object from ../models/valence_outputs/B0013A0QXC_10_100_10_0.5.state
2018-10-26 14:26:39,888 : INFO : loaded ../models/valence_outputs/B0013A0QXC_10_100_10_0.5.state
2018-10-26 14:26:39,896 : INFO : saving LdaState object under ../models/valence_outputs/final_models/B0013A0QXC_10_100_10_0.5.state, separately None
2018-10-26 14:26:39,902 : INFO : saved ../models/valence_outputs/

best results for product B0013A0QXC:
index                                                                    0
chunk                                                              180.667
coherence                                                          0.45376
final perplexity                                                      19.3
final topic diff                                                  0.043849
n_above threshold                                                      0.5
num_topics                                                              10
passes                                                                 100
per-word bounds          {0: -8.802, 1: -5.601, 2: -5.112, 3: -4.886, 4...
perplexity               {0: 446.4, 1: 48.5, 2: 34.6, 3: 29.6, 4: 26.8,...
perplexity decreasing                                                 True
product                                                         B0013A0QXC
top_n removed                                                  

In [34]:
# finally, save off the best results into a final output dataframe
# create a df to collect the best models from all grid search tuning efforts
# save it off to a csv
valence_final_results = pd.DataFrame(columns=['product', 'num_topics', 'chunk', 
                                              'passes', 'per-word bounds', 
                                              'perplexity', 'topic diff',
                                              'final perplexity', 'final topic diff', 
                                              'perplexity decreasing', 'coherence', 
                                              'top_n removed', 'n_above threshold'])

for product in top_ten:
    output = valence_gs_results[valence_gs_results['product']==product]
    valence_final_results = lda_funcs.save_best(output, valence_final_results, 
                                                save_path='valence_outputs')

valence_final_results.to_csv('../data/processed/valence_final_results.csv')

2018-10-26 14:51:00,420 : INFO : loading LdaModel object from ../models/valence_outputs/B002IEZJMA_8_80_2_0.5
2018-10-26 14:51:00,427 : INFO : loading expElogbeta from ../models/valence_outputs/B002IEZJMA_8_80_2_0.5.expElogbeta.npy with mmap=None
2018-10-26 14:51:00,433 : INFO : setting ignored attribute dispatcher to None
2018-10-26 14:51:00,434 : INFO : setting ignored attribute state to None
2018-10-26 14:51:00,438 : INFO : setting ignored attribute id2word to None
2018-10-26 14:51:00,440 : INFO : loaded ../models/valence_outputs/B002IEZJMA_8_80_2_0.5
2018-10-26 14:51:00,441 : INFO : loading LdaState object from ../models/valence_outputs/B002IEZJMA_8_80_2_0.5.state
2018-10-26 14:51:00,447 : INFO : loaded ../models/valence_outputs/B002IEZJMA_8_80_2_0.5.state
2018-10-26 14:51:00,454 : INFO : saving LdaState object under ../models/valence_outputs/final_models/B002IEZJMA_8_80_2_0.5.state, separately None
2018-10-26 14:51:00,462 : INFO : saved ../models/valence_outputs/final_models/B002I

best results for product B002IEZJMA:
level_0                                                                  0
Unnamed: 0                                                               0
product                                                         B002IEZJMA
num_topics                                                               8
chunk                                                              162.333
passes                                                                  80
per-word bounds          {0: -7.643, 1: -5.705, 2: -5.012, 3: -4.66, 4:...
perplexity               {0: 199.9, 1: 52.2, 2: 32.3, 3: 25.3, 4: 23.6,...
topic diff               {0: 0.605257, 1: 0.499302, 2: 0.435588, 3: 0.3...
final perplexity                                                      15.8
final topic diff                                                  0.055396
perplexity decreasing                                                 True
coherence                                                      

2018-10-26 14:51:00,673 : INFO : loading LdaModel object from ../models/valence_outputs/B005ZBZLT4_9_50_2_1.0
2018-10-26 14:51:00,678 : INFO : loading expElogbeta from ../models/valence_outputs/B005ZBZLT4_9_50_2_1.0.expElogbeta.npy with mmap=None
2018-10-26 14:51:00,682 : INFO : setting ignored attribute state to None
2018-10-26 14:51:00,684 : INFO : setting ignored attribute id2word to None
2018-10-26 14:51:00,685 : INFO : setting ignored attribute dispatcher to None
2018-10-26 14:51:00,687 : INFO : loaded ../models/valence_outputs/B005ZBZLT4_9_50_2_1.0
2018-10-26 14:51:00,689 : INFO : loading LdaState object from ../models/valence_outputs/B005ZBZLT4_9_50_2_1.0.state
2018-10-26 14:51:00,696 : INFO : loaded ../models/valence_outputs/B005ZBZLT4_9_50_2_1.0.state
2018-10-26 14:51:00,702 : INFO : saving LdaState object under ../models/valence_outputs/final_models/B005ZBZLT4_9_50_2_1.0.state, separately None
2018-10-26 14:51:00,705 : INFO : saved ../models/valence_outputs/final_models/B005Z

level_0                                                                 10
Unnamed: 0                                                             NaN
product                                                         B005ZBZLT4
num_topics                                                               9
chunk                                                              168.667
passes                                                                  50
per-word bounds          {0: -6.778, 1: -5.47, 2: -5.059, 3: -4.821, 4:...
perplexity               {0: 109.7, 1: 44.3, 2: 33.3, 3: 28.3, 4: 26.4,...
topic diff               {0: 0.818458, 1: 0.53425, 2: 0.434728, 3: 0.39...
final perplexity                                                      21.5
final topic diff                                                  0.117337
perplexity decreasing                                                 True
coherence                                                         0.470474
top_n removed            

2018-10-26 14:51:00,924 : INFO : loading LdaModel object from ../models/valence_outputs/B005K4Q34S_11_80_2_0.5
2018-10-26 14:51:00,929 : INFO : loading expElogbeta from ../models/valence_outputs/B005K4Q34S_11_80_2_0.5.expElogbeta.npy with mmap=None
2018-10-26 14:51:00,934 : INFO : setting ignored attribute state to None
2018-10-26 14:51:00,935 : INFO : setting ignored attribute id2word to None
2018-10-26 14:51:00,937 : INFO : setting ignored attribute dispatcher to None
2018-10-26 14:51:00,939 : INFO : loaded ../models/valence_outputs/B005K4Q34S_11_80_2_0.5
2018-10-26 14:51:00,943 : INFO : loading LdaState object from ../models/valence_outputs/B005K4Q34S_11_80_2_0.5.state
2018-10-26 14:51:00,947 : INFO : loaded ../models/valence_outputs/B005K4Q34S_11_80_2_0.5.state
2018-10-26 14:51:00,954 : INFO : saving LdaState object under ../models/valence_outputs/final_models/B005K4Q34S_11_80_2_0.5.state, separately None
2018-10-26 14:51:00,957 : INFO : saved ../models/valence_outputs/final_models

level_0                                                                 11
Unnamed: 0                                                             NaN
product                                                         B005K4Q34S
num_topics                                                              11
chunk                                                              180.333
passes                                                                  80
per-word bounds          {0: -8.623, 1: -4.717, 2: -4.429, 3: -4.335, 4...
perplexity               {0: 394.2, 1: 26.3, 2: 21.5, 3: 20.2, 4: 19.5,...
topic diff               {0: 0.813712, 1: 0.601044, 2: 0.494228, 3: 0.4...
final perplexity                                                      16.5
final topic diff                                                  0.060456
perplexity decreasing                                                 True
coherence                                                         0.498201
top_n removed            

2018-10-26 14:51:01,126 : INFO : saved ../models/valence_outputs/final_models/B0013A0QXC_10_100_10_0.5.state
2018-10-26 14:51:01,130 : INFO : saving LdaModel object under ../models/valence_outputs/final_models/B0013A0QXC_10_100_10_0.5, separately ['expElogbeta', 'sstats']
2018-10-26 14:51:01,133 : INFO : storing np array 'expElogbeta' to ../models/valence_outputs/final_models/B0013A0QXC_10_100_10_0.5.expElogbeta.npy
2018-10-26 14:51:01,141 : INFO : not storing attribute state
2018-10-26 14:51:01,144 : INFO : not storing attribute id2word
2018-10-26 14:51:01,147 : INFO : not storing attribute dispatcher
2018-10-26 14:51:01,151 : INFO : saved ../models/valence_outputs/final_models/B0013A0QXC_10_100_10_0.5
2018-10-26 14:51:01,192 : INFO : loading LdaModel object from ../models/valence_outputs/B000NMJWZO_6_80_2_0.5
2018-10-26 14:51:01,196 : INFO : loading expElogbeta from ../models/valence_outputs/B000NMJWZO_6_80_2_0.5.expElogbeta.npy with mmap=None
2018-10-26 14:51:01,200 : INFO : setting

Final model saved for product B0013A0QXC with 10 topics over 100 passes, removing top 10 tokens and token review threshold 0.5.
best results for product B000NMJWZO:
level_0                                                                  6
Unnamed: 0                                                               6
product                                                         B000NMJWZO
num_topics                                                               6
chunk                                                              180.667
passes                                                                  80
per-word bounds          {0: -6.93, 1: -5.465, 2: -5.134, 3: -5.011, 4:...
perplexity               {0: 122.0, 1: 44.2, 2: 35.1, 3: 32.3, 4: 30.3,...
topic diff               {0: 1.164703, 1: 0.755535, 2: 0.594189, 3: 0.4...
final perplexity                                                      28.6
final topic diff                                                  0.067313
perplexity

2018-10-26 14:51:01,403 : INFO : loading LdaModel object from ../models/valence_outputs/B001EO5Q64_6_80_2_1.0
2018-10-26 14:51:01,411 : INFO : loading expElogbeta from ../models/valence_outputs/B001EO5Q64_6_80_2_1.0.expElogbeta.npy with mmap=None
2018-10-26 14:51:01,415 : INFO : setting ignored attribute state to None
2018-10-26 14:51:01,416 : INFO : setting ignored attribute id2word to None
2018-10-26 14:51:01,417 : INFO : setting ignored attribute dispatcher to None
2018-10-26 14:51:01,419 : INFO : loaded ../models/valence_outputs/B001EO5Q64_6_80_2_1.0
2018-10-26 14:51:01,424 : INFO : loading LdaState object from ../models/valence_outputs/B001EO5Q64_6_80_2_1.0.state
2018-10-26 14:51:01,432 : INFO : loaded ../models/valence_outputs/B001EO5Q64_6_80_2_1.0.state
2018-10-26 14:51:01,440 : INFO : saving LdaState object under ../models/valence_outputs/final_models/B001EO5Q64_6_80_2_1.0.state, separately None
2018-10-26 14:51:01,445 : INFO : saved ../models/valence_outputs/final_models/B001E

Final model saved for product B001EO5Q64 with 6 topics over 80 passes, removing top 2 tokens and token review threshold 1.0.
best results for product B0026RQTGE:
level_0                                                                  9
Unnamed: 0                                                               9
product                                                         B0026RQTGE
num_topics                                                               6
chunk                                                                  210
passes                                                                  50
per-word bounds          {0: -7.439, 1: -6.873, 2: -6.77, 3: -6.717, 4:...
perplexity               {0: 173.5, 1: 117.2, 2: 109.1, 3: 105.2, 4: 10...
topic diff               {0: 1.151574, 1: 0.438448, 2: 0.31331, 3: 0.23...
final perplexity                                                     111.7
final topic diff                                                  0.033277
perplexity de

In [35]:
# LOAD RESULTS
# load in the final results
valence_final_results = pd.read_csv('../data/processed/valence_final_results.csv')
valence_final_results.set_index('product', inplace=True)
valence_final_results[['coherence', 'num_topics', 'passes', 'top_n removed', 'n_above threshold']]

Unnamed: 0_level_0,coherence,num_topics,passes,top_n removed,n_above threshold
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
B002IEZJMA,0.479089,8,80,2,0.5
B006MONQMC,0.492218,10,50,2,1.0
B005ZBZLT4,0.470474,9,50,2,1.0
B003GTR8IO,0.488773,12,50,2,1.0
B005K4Q34S,0.498201,11,80,2,0.5
B0013A0QXC,0.45376,10,100,10,0.5
B000NMJWZO,0.49467,6,80,2,0.5
B000KV61FC,0.490179,12,80,10,0.5
B001EO5Q64,0.518175,6,80,2,1.0
B0026RQTGE,0.463676,6,50,2,1.0
