In [1]:
import os
import pickle
import numpy as np
from prettytable import PrettyTable
from collections import Counter

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
def print_topics(H, word_list, n_words=15, n_topics='all', max_width=80):
    pt = PrettyTable(['Topic', 'Words'])
    pt.valign['Topic'] = 'm'
    pt.align['Words'] = 'l'
    pt.max_width=max_width
    if n_topics == 'all':
        nt = len(H)
    else:
        nt = n_topics
    
    C = Counter()
    
    for ix in range(nt):
        w = ', '.join(np.array(word_list)[np.argsort(H[ix])[-n_words:][::-1]])
        pt.add_row([ix, w])
        pt.add_row(['', ''])
        C.update(w.split(', '))
    print(pt)
    
    return C

In [4]:
pickle_dir = 'pickles/'
feat_pickle = os.path.join(pickle_dir, 'features.pkl')
topic_pickle = os.path.join(pickle_dir, 'topics.pkl')

In [5]:
with open(feat_pickle, 'rb') as f:
    features = pickle.load(f)

In [6]:
with open(topic_pickle, 'rb') as f:
    topics = pickle.load(f)

In [7]:
word_counts = print_topics(topics['H'], features['directions_vocab'])

+-------+----------------------------------------------------------------------------------+
| Topic | Words                                                                            |
+-------+----------------------------------------------------------------------------------+
|       | baking powder, flour baking, flour baking powder, baking soda, comes clean, sift |
|   0   | flour, inserted center, preheat 350, flour mixture, sift flour baking, center    |
|       | comes, powder baking, powder baking soda, baking powder baking, center comes     |
|       | clean                                                                            |
|       |                                                                                  |
|       | 100 percent, percent power, 100 percent power, remove uncover, release steam,    |
|   1   | prick plastic, microwave plastic, microwave plastic wrap, plastic release,       |
|       | plastic release steam, tightly microwave, prick plastic rele

In [8]:
word_counts.most_common(10)

[('bread crumbs', 3),
 ('gas grill', 2),
 ('lightly browned', 2),
 ('onion garlic', 2),
 ('strain chilled', 2),
 ('taste adjust', 2),
 ('al dente', 2),
 ('let sit', 2),
 ('parmesan cheese', 2),
 ('glass garnish', 2)]