In [43]:
import pandas as pd
import pyarrow
import os
import numpy as np
import gensim
import nltk
import pickle
import ipdb
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from wordcloud import WordCloud
from tqdm import tqdm
from pprint import pprint
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer




In [126]:
pd.options.display.max_rows = 500
GEN_WORDCLOUD = False
DATA_DIR = "bz_desc_data/"
WORDCLOUD_DIR = "wordcloud/"
WORDCLOUD_TOPICS_DIR = "wordcloud_topics/"

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')


[nltk_data] Downloading package stopwords to /home/fanpu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/fanpu/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/fanpu/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [45]:
def get_available_tickers():
    files = os.listdir(DATA_DIR)
    return [file.rstrip(".parquet") for file in files]

In [46]:
tickers = get_available_tickers()

In [55]:
stop_words = stopwords.words('english')
stop_words.extend(
    ['product', 'company', 'include', 'service', 'market', 'business', 'u', 'customer', 'financial', 'also', 'state', 'may', 'use', 'provide', 'new', 'year', 'result', 'cost', 'certain']

#     ['business', 'also', 'financial', 'market', 'u', 'may', 'company', 'include', 
#      'service', 'provide', 'customers', 'cost', 'operate', 'management', 'risk', 'report', 'information',
#      'result', 'approximately', 'require', 'use', 'us', 'relate', 'certain', 'operation', 'system', 'regulation', 'state', 'new',
#      'program', 'segment', 'base', 'sale', 'regulatory', 'million', 'year', 'time', 'subject', 'continue', 'change', 'well', 
#      'december', 'requirement', 'addition', 'term', 'could', 'increase', 'plan', 'revenue', 'believe', 'affect', 'product', 'customer'
#     ]
)
lemmatizer = WordNetLemmatizer()

def save_wordcloud(ticker, tokens):
    # Create a WordCloud object
    wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue')
    # Generate a word cloud
    wordcloud.generate(",".join(tokens))
    # Visualize the word cloud
    wordcloud.to_file(f"{WORDCLOUD_DIR}/{ticker}.png")
    
def wordlist_for_ticker(ticker):
    """
    Returns:
        tokens, is_malformed
    """
    data = pd.read_parquet(f'{DATA_DIR}/{ticker}.parquet', engine='pyarrow')
    if len(data["word_list"]) == 0:
        return [], True
    tokens = np.concatenate(data["word_list"])
    if len(tokens) < 1000:
        return [], True
    def remove_numbers(tokens):
        return list(filter(lambda x : not x.isnumeric(), tokens))
    def remove_additional_stop_words(tokens):
        return list(filter(lambda x : not x in stop_words, tokens))
    def lemmatize(tokens):
        return [lemmatizer.lemmatize(token) for token in tokens]
    tokens = remove_numbers(tokens)
    tokens = lemmatize(tokens)
    tokens = remove_additional_stop_words(tokens)
    if GEN_WORDCLOUD:
        save_wordcloud(ticker, tokens)
    return tokens, False

In [56]:
clean_tickers = []
ticker_wordlist = {}
for ticker in tqdm(tickers):
    tokens, is_malformed = wordlist_for_ticker(ticker)
    if not is_malformed:
        clean_tickers.append(ticker)
        ticker_wordlist[ticker] = tokens
    else:
        # print(ticker, "is malformed, discarding")
        pass

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 726/726 [01:29<00:00,  8.15it/s]


In [57]:
len(tickers), len(clean_tickers)

(726, 536)

In [109]:
# Create Dictionary
id2word = corpora.Dictionary(ticker_wordlist.values())

# Term Document Frequency
corpus = [id2word.doc2bow(ticker_wordlist[ticker]) for ticker in clean_tickers]

In [115]:
# number of topics
num_topics = 50
# Build LDA model
lda_model = gensim.models.LdaMulticore(corpus=corpus,
                                       id2word=id2word,
                                       num_topics=num_topics)
# Print the Keyword in the top topics
pprint(lda_model.print_topics(num_topics=num_topics))
doc_lda = lda_model[corpus]

[(0,
  '0.005*"sale" + 0.005*"information" + 0.004*"store" + 0.004*"program" + '
  '0.004*"operation" + 0.004*"regulation" + 0.004*"report" + 0.004*"operate" + '
  '0.003*"segment" + 0.003*"base"'),
 (1,
  '0.004*"offer" + 0.004*"risk" + 0.004*"program" + 0.004*"information" + '
  '0.004*"sale" + 0.004*"report" + 0.003*"technology" + 0.003*"well" + '
  '0.003*"regulation" + 0.003*"segment"'),
 (2,
  '0.005*"management" + 0.004*"operation" + 0.004*"sale" + 0.004*"information" '
  '+ 0.003*"program" + 0.003*"million" + 0.003*"report" + 0.003*"regulation" + '
  '0.003*"offer" + 0.003*"risk"'),
 (3,
  '0.005*"operation" + 0.005*"operate" + 0.004*"base" + 0.004*"information" + '
  '0.004*"management" + 0.004*"continue" + 0.004*"system" + 0.004*"relate" + '
  '0.003*"sale" + 0.003*"could"'),
 (4,
  '0.005*"sale" + 0.004*"management" + 0.004*"risk" + 0.004*"segment" + '
  '0.004*"information" + 0.004*"report" + 0.004*"operate" + 0.003*"well" + '
  '0.003*"store" + 0.003*"million"'),
 (5,
  '0

In [85]:
import pyLDAvis
import pyLDAvis.gensim_models as gensim_models
import pickle 
# Visualize the topics
pyLDAvis.enable_notebook()
LDAvis_data_filepath = os.path.join('./results/ldavis_prepared_'+str(num_topics))
# # this is a bit time consuming - make the if statement True
# # if you want to execute visualization prep yourself
if 1 == 1:
    LDAvis_prepared = gensim_models.prepare(lda_model, corpus, id2word)
    with open(LDAvis_data_filepath, 'wb') as f:
        pickle.dump(LDAvis_prepared, f)
# load the pre-prepared pyLDAvis data from disk
with open(LDAvis_data_filepath, 'rb') as f:
    LDAvis_prepared = pickle.load(f)
pyLDAvis.save_html(LDAvis_prepared, './results/ldavis_prepared_'+ str(num_topics) +'.html')
LDAvis_prepared


  by='saliency', ascending=False).head(R).drop('saliency', 1)


In [94]:
len(corpus), len(clean_tickers)

(536, 536)

In [129]:
lda_model.show_topics(num_topics)

[(0,
  '0.005*"sale" + 0.005*"information" + 0.004*"store" + 0.004*"program" + 0.004*"operation" + 0.004*"regulation" + 0.004*"report" + 0.004*"operate" + 0.003*"segment" + 0.003*"base"'),
 (1,
  '0.004*"offer" + 0.004*"risk" + 0.004*"program" + 0.004*"information" + 0.004*"sale" + 0.004*"report" + 0.003*"technology" + 0.003*"well" + 0.003*"regulation" + 0.003*"segment"'),
 (2,
  '0.005*"management" + 0.004*"operation" + 0.004*"sale" + 0.004*"information" + 0.003*"program" + 0.003*"million" + 0.003*"report" + 0.003*"regulation" + 0.003*"offer" + 0.003*"risk"'),
 (3,
  '0.005*"operation" + 0.005*"operate" + 0.004*"base" + 0.004*"information" + 0.004*"management" + 0.004*"continue" + 0.004*"system" + 0.004*"relate" + 0.003*"sale" + 0.003*"could"'),
 (4,
  '0.005*"sale" + 0.004*"management" + 0.004*"risk" + 0.004*"segment" + 0.004*"information" + 0.004*"report" + 0.004*"operate" + 0.003*"well" + 0.003*"store" + 0.003*"million"'),
 (5,
  '0.006*"operation" + 0.005*"sale" + 0.004*"informati

In [144]:
lda_model.show_topics(num_topics)[0][1]
lda_model.get_topic_terms(topicid=0)

[(751, 0.00538095),
 (428, 0.0045953114),
 (817, 0.0042803967),
 (655, 0.0041637463),
 (575, 0.004079185),
 (710, 0.0040566637),
 (726, 0.0038943756),
 (574, 0.0038493956),
 (766, 0.0032444722),
 (86, 0.0031526014)]

In [148]:
def visualize_topics(topic_id):
    topic_terms = lda_model.get_topic_terms(topicid=topic_id, topn=30)
    freq_dict = {}
    for word_id, freq in topic_terms:
        freq_dict[id2word[word_id]] = freq
        
    wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue')
    wordcloud.generate_from_frequencies(freq_dict)
    # Visualize the word cloud
    # wordcloud.to_image()
    wordcloud.to_file(f"{WORDCLOUD_TOPICS_DIR}/{topic_id}.png")
    
for topic_id in range(num_topics):
    visualize_topics(topic_id)

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transpose

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  orientation = Image.ROTATE_90
  box_size = draw.textsize(word, font=transposed_font)
  Image.ROTATE_90)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = Image.ROTATE_90
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  orientation = Image.ROTATE_90
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.tex

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None els

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transpose

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  bo

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = Image.ROTATE_90
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = Image.ROTATE_90
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  orientation = (Image.ROTATE_90 if orientation is None else
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transp

  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size = draw.textsize(word, font=transposed_font)
  box_size

In [153]:
def topics_for_ticker(ticker, ticker_index):
    print(ticker)
    topics = doc_lda[ticker_index]
    # Sort by frequency
    topics = sorted(topics, key = lambda x : x[1], reverse=True)
    return topics
    
    
tickers_and_topics = {}
for index, ticker in enumerate(clean_tickers):
    print("Ticker", ticker)
    topics = topics_for_ticker(ticker, index)
    print("Topics", topics[:5])
    tickers_and_topics[ticker] = topics

with open('tickers_and_topics.pkl', 'wb') as f:
    pickle.dump(tickers_and_topics, f)


    

Ticker CAH
CAH
Topics [(3, 0.34416974), (30, 0.14602631), (47, 0.07745895), (17, 0.054704323), (28, 0.044807393)]
Ticker FLS
FLS
Topics [(34, 0.15019295), (32, 0.095036276), (24, 0.0907074), (27, 0.08565929), (5, 0.082257345)]
Ticker HBAN
HBAN
Topics [(21, 0.8796873), (16, 0.1189329)]
Ticker TSLA
TSLA
Topics [(35, 0.38598737), (44, 0.24209556), (13, 0.13674687), (19, 0.03620344), (11, 0.03221067)]
Ticker EMN
EMN
Topics [(45, 0.40489975), (48, 0.1822674), (35, 0.09805401), (34, 0.043634426), (22, 0.03665843)]
Ticker HLT
HLT
Topics [(9, 0.70488447), (0, 0.075089775), (18, 0.06816977), (48, 0.041182492), (21, 0.016095016)]
Ticker ETN
ETN
Topics [(24, 0.4821354), (35, 0.29704514), (45, 0.14261487), (3, 0.031256173), (49, 0.0118473)]
Ticker VRSK
VRSK
Topics [(1, 0.4205645), (5, 0.1362908), (17, 0.0644071), (19, 0.043782502), (21, 0.03818069)]
Ticker NLSN
NLSN
Topics [(36, 0.2064993), (24, 0.19451758), (9, 0.071239285), (14, 0.06828498), (1, 0.065455645)]
Ticker AWK
AWK
Topics [(34, 0.196043

Topics [(19, 0.37930006), (39, 0.24949035), (9, 0.10446195), (14, 0.035898242), (24, 0.032928016)]
Ticker EQIX
EQIX
Topics [(5, 0.2721256), (19, 0.20634495), (44, 0.15090893), (39, 0.111410275), (24, 0.06768079)]
Ticker SRE
SRE
Topics [(13, 0.97577333), (38, 0.013307728)]
Ticker INTU
INTU
Topics [(5, 0.2660641), (25, 0.19292545), (14, 0.09986668), (21, 0.08770525), (19, 0.08535608)]
Ticker XEL
XEL
Topics [(13, 0.70458114), (34, 0.20953926), (9, 0.040911335), (26, 0.016326187), (29, 0.011493682)]
Ticker SBUX
SBUX
Topics [(48, 0.8892307), (22, 0.06743604), (0, 0.015758157), (9, 0.010035177)]
Ticker EMR
EMR
Topics [(35, 0.18569063), (24, 0.14932516), (38, 0.1376105), (33, 0.08203736), (34, 0.081429034)]
Ticker GL
GL
Topics [(16, 0.29897952), (29, 0.23630695), (28, 0.17696144), (2, 0.06680227), (21, 0.062480103)]
Ticker REGN
REGN
Topics [(20, 0.3995804), (3, 0.18435796), (49, 0.12408247), (47, 0.09726692), (7, 0.05030414)]
Ticker AGN
AGN
Topics [(47, 0.24179904), (3, 0.23873141), (1, 0.185

Topics [(19, 0.22605534), (5, 0.19914007), (39, 0.16714852), (42, 0.09637416), (36, 0.062372386)]
Ticker TT
TT
Topics [(32, 0.15528518), (34, 0.1284972), (27, 0.08592415), (46, 0.06440272), (48, 0.047457322)]
Ticker AFL
AFL
Topics [(17, 0.46600693), (16, 0.18733476), (29, 0.13325362), (5, 0.060120467), (10, 0.03532799)]
Ticker AKAM
AKAM
Topics [(19, 0.2830736), (36, 0.20905474), (39, 0.19335605), (14, 0.06421265), (42, 0.038514607)]
Ticker ANET
ANET
Topics [(19, 0.43815756), (39, 0.21996693), (44, 0.16257952), (48, 0.06584543), (14, 0.053416017)]
Ticker NSM
NSM
Topics [(39, 0.12642172), (24, 0.087472975), (32, 0.08399575), (35, 0.0703936), (34, 0.054852694)]
Ticker WYNN
WYNN
Topics [(9, 0.41715163), (40, 0.23478349), (27, 0.13913022), (41, 0.07305333), (7, 0.04064933)]
Ticker MCHP
MCHP
Topics [(39, 0.23634283), (42, 0.13814627), (19, 0.12778924), (32, 0.07410527), (24, 0.056887552)]
Ticker PAYC
PAYC
Topics [(19, 0.25144193), (5, 0.19848238), (33, 0.078270055), (21, 0.065165296), (35, 0

Topics [(33, 0.21034774), (48, 0.16232517), (24, 0.06347333), (9, 0.04979572), (8, 0.04076048)]
Ticker CI
CI
Topics [(30, 0.1728202), (17, 0.14612715), (20, 0.11568456), (1, 0.06441954), (21, 0.06023208)]
Ticker TMUS
TMUS
Topics [(39, 0.33750716), (18, 0.100858636), (14, 0.08925786), (8, 0.08508106), (29, 0.082887575)]
Ticker APOL
APOL
Topics [(8, 0.4992266), (7, 0.112378836), (21, 0.10112896), (20, 0.069015756), (23, 0.0344494)]
Ticker JBL
JBL
Topics [(32, 0.14130414), (39, 0.12445107), (33, 0.10944037), (35, 0.1002036), (41, 0.054579202)]
Ticker MRNA
MRNA
Topics [(47, 0.55126894), (7, 0.1900484), (32, 0.12596764), (3, 0.09650789), (1, 0.022473255)]
Ticker DRE
DRE
Topics [(36, 0.30297917), (48, 0.25649595), (37, 0.12666714), (46, 0.032422964), (26, 0.02591767)]
Ticker ECL
ECL
Topics [(33, 0.12880386), (9, 0.103578396), (5, 0.090521365), (35, 0.08494194), (32, 0.06313273)]
Ticker ES
ES
Topics [(13, 0.51276773), (32, 0.22220175), (38, 0.21534291), (29, 0.024065496)]
Ticker LVS
LVS
Topic

Topics [(43, 0.3961783), (24, 0.24056478), (33, 0.10361856), (9, 0.04886696), (35, 0.024740767)]
Ticker IVZ
IVZ
Topics [(37, 0.52645445), (21, 0.20872264), (15, 0.05535369), (4, 0.038854975), (0, 0.034730475)]
Ticker IRM
IRM
Topics [(34, 0.18126778), (48, 0.14620872), (37, 0.082752086), (5, 0.08093119), (14, 0.070265554)]
Ticker WST
WST
Topics [(32, 0.95796984)]
Ticker TJX
TJX
Topics [(0, 0.7191046), (48, 0.16601545), (24, 0.059260435), (4, 0.02016883), (34, 0.017423958)]
Ticker CDNS
CDNS
Topics [(39, 0.7249534), (33, 0.16327521), (14, 0.044849016), (19, 0.03166209)]
Ticker HES
HES
Topics [(49, 0.4771669), (29, 0.21660465), (45, 0.0619331), (13, 0.043792505), (7, 0.02923366)]
Ticker ALLE
ALLE
Topics [(3, 0.2618364), (33, 0.083986275), (38, 0.073144406), (36, 0.04151276), (35, 0.039149504)]
Ticker VFC
VFC
Topics [(48, 0.9460665), (14, 0.022275155), (35, 0.019468552)]
Ticker CMCSA
CMCSA
Topics [(36, 0.74243325), (18, 0.09968326), (39, 0.069681756), (31, 0.03357125), (20, 0.0226389)]
Tick