In [2]:
import nltk
from nltk.corpus import stopwords

In [3]:
import re
import numpy as np
import pandas as pd
from pprint import pprint

# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

# spacy for lemmatization
import spacy

# Plotting tools

import pyLDAvis
import pyLDAvis.gensim_models as gensimvis
import matplotlib.pyplot as plt
%matplotlib inline

# Enable logging for gensim - optional
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

  from imp import reload


In [4]:
stop_words = stopwords.words('english')
stop_words.extend(['from', 'subject', 're', 'edu', 'use'])

In [5]:
df = pd.read_json('news_data_1pp.json')

In [6]:
title_data = df.title.values.tolist()
text_data = df.text.values.tolist()

In [7]:
# Remove Emails
title_data = [re.sub('\S*@\S*\s?', '', sent) for sent in title_data]

# Remove new line characters
title_data = [re.sub('\s+', ' ', sent) for sent in title_data]

# Remove distracting single quotes
title_data = [re.sub("\'", "", sent) for sent in title_data]

pprint(title_data[:10])

['Cloverleaf Networks Acquires Ryver to Enhance its Top-to-Bottom Technology '
 'Stack for Businesses',
 'Ingalls Shipbuilding Successfully Completes Acceptance Trials for Lenah '
 'Sutcliffe Higbee (DDG 123)',
 'STONERIDGE, INC. TO BROADCAST ITS THIRD-QUARTER 2022 CONFERENCE CALL ON THE '
 'WEB',
 'Volatile Brazil Is Lone Bull Case for Bruised Emerging Markets',
 'Civitas Resources (CIVI) Gains But Lags Market: What You Should Know',
 'Hasbro’s Entertainment One to Produce Dungeons & Dragons Documentary Timed '
 'to Global Franchise’s 50th Anniversary',
 'CCAGW PAC Endorses New York Reps. Chris Jacobs and Nicole Malliotakis',
 'JBM Brokers Sale of The Addison at Clermont',
 'Onto Innovation Schedules 2022 Third Quarter Financial Results Conference '
 'Call for October 27, 2022',
 'UPDATE 1-Canada to ban Irans IRGC leaders from entry, expand sanctions']


In [8]:
# Remove Emails
text_data = [re.sub('\S*@\S*\s?', '', sent) for sent in text_data]

# Remove new line characters
text_data = [re.sub('\s+', ' ', sent) for sent in text_data]

# Remove distracting single quotes
text_data = [re.sub("\'", "", sent) for sent in text_data]

# Remove urls 

text_data = [re.sub(r'http\S+', r'<URL>', text) for text in text_data]

print(text_data[:1])

['SCOTTSDALE, Ariz., Oct. 18, 2022 /PRNewswire/ -- Cloverleaf Networks announced today the acquisition of the Ryver workforce collaboration platform that will add workgroup efficiency, artificial intelligence (AI), and internet of things (IoT) functionality to the companys connectivity, cyber security and wide area networking solutions forming a top-to-bottom, wire-to-workgroup business stack. The acquisition resulted from extensive strategic analysis and due diligence to find the perfect addition to Cloverleaf Networks already powerful connectivity offerings. With Ryver, IT managers can control their entire online system and enable workforce collaboration through one company and one platform, saving businesses millions of dollars. "Cloverleaf is already helping companies operate more profitably with our connectivity aggregation platform (CLOĒ) ending the nightmare of providing, managing, and securing multi-office internet services. Ryver lets remote teams work like they are in one bui

In [9]:
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations

title_data_words = list(sent_to_words(title_data))
text_data_words = list(sent_to_words(text_data))

print(title_data_words[:1])

[['cloverleaf', 'networks', 'acquires', 'ryver', 'to', 'enhance', 'its', 'top', 'to', 'bottom', 'technology', 'stack', 'for', 'businesses']]


In [10]:
# Build the bigram and trigram models
title_bigram = gensim.models.Phrases(title_data_words, min_count=5, threshold=100) # higher threshold fewer phrases.
title_trigram = gensim.models.Phrases(title_bigram[title_data_words], threshold=100)  

# Faster way to get a sentence clubbed as a trigram/bigram
title_bigram_mod = gensim.models.phrases.Phraser(title_bigram)
title_trigram_mod = gensim.models.phrases.Phraser(title_trigram)

# See trigram example
print(title_trigram_mod[title_bigram_mod[title_data_words[0]]])

['cloverleaf', 'networks', 'acquires', 'ryver', 'to', 'enhance', 'its', 'top', 'to', 'bottom', 'technology', 'stack', 'for', 'businesses']


In [11]:
# Build the bigram and trigram models
text_bigram = gensim.models.Phrases(text_data_words, min_count=5, threshold=100) # higher threshold fewer phrases.
text_trigram = gensim.models.Phrases(text_bigram[text_data_words], threshold=100)  

# Faster way to get a sentence clubbed as a trigram/bigram
text_bigram_mod = gensim.models.phrases.Phraser(text_bigram)
text_trigram_mod = gensim.models.phrases.Phraser(text_trigram)

# See trigram example
print(text_trigram_mod[text_bigram_mod[text_data_words[0]]])

['scottsdale_ariz', 'oct_prnewswire', 'cloverleaf_networks', 'announced', 'today', 'the', 'acquisition', 'of', 'the', 'ryver', 'workforce', 'collaboration', 'platform', 'that', 'will', 'add', 'workgroup', 'efficiency', 'artificial_intelligence_ai', 'and', 'internet', 'of', 'things_iot', 'functionality', 'to', 'the', 'companys', 'connectivity', 'cyber_security', 'and', 'wide', 'area', 'networking', 'solutions', 'forming', 'top', 'to', 'bottom', 'wire', 'to', 'workgroup', 'business', 'stack', 'the', 'acquisition', 'resulted', 'from', 'extensive', 'strategic', 'analysis', 'and', 'due_diligence', 'to', 'find', 'the', 'perfect', 'addition', 'to', 'cloverleaf_networks', 'already', 'powerful', 'connectivity', 'offerings', 'with', 'ryver', 'it', 'managers', 'can', 'control', 'their', 'entire', 'online', 'system', 'and', 'enable', 'workforce', 'collaboration', 'through', 'one', 'company', 'and', 'one', 'platform', 'saving', 'businesses', 'millions', 'of', 'dollars', 'cloverleaf', 'is', 'already

In [12]:
# Define functions for stopwords, bigrams, trigrams and lemmatization
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]


def make_bigrams(texts, bigram_mod):
    return [bigram_mod[doc] for doc in texts]


def make_trigrams(texts, bigram_mod, trigram_mod):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]


def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    """https://spacy.io/api/annotation"""
    texts_out = []
    for sent in texts:
        doc = nlp(" ".join(sent))
        texts_out.append(
            [token.lemma_ for token in doc if token.pos_ in allowed_postags])
    return texts_out

nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])




In [13]:
# Remove Stop Words
title_data_words_nostops = remove_stopwords(title_data_words)

# Form Bigrams
title_data_words_bigrams = make_bigrams(title_data_words_nostops, title_bigram_mod)


# Do lemmatization keeping only noun, adj, vb, adv
title_data_lemmatized = lemmatization(title_data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

print(title_data_lemmatized[:1])

[['cloverleaf', 'network', 'acquire', 'ryver', 'enhance', 'top', 'bottom', 'technology', 'stack', 'business']]


In [14]:
# Remove Stop Words
text_data_words_nostops = remove_stopwords(text_data_words)

# Form Bigrams
text_data_words_bigrams = make_bigrams(text_data_words_nostops, text_bigram_mod)

# Do lemmatization keeping only noun, adj, vb, adv
text_data_lemmatized = lemmatization(text_data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

print(text_data_lemmatized[:1])

[['scottsdale_ariz', 'oct_prnewswire', 'cloverleaf_network', 'announce', 'today', 'acquisition', 'ryver', 'workforce', 'collaboration', 'platform', 'add', 'workgroup', 'efficiency', 'artificial_intelligence', 'ai', 'internet', 'thing', 'functionality', 'company', 'connectivity', 'cyber_security', 'wide', 'area', 'networking', 'solution', 'form', 'top', 'bottom', 'wire', 'workgroup', 'business', 'stack', 'acquisition', 'result', 'extensive', 'strategic', 'analysis', 'due_diligence', 'find', 'perfect', 'addition', 'cloverleaf_network', 'already', 'powerful', 'connectivity', 'offering', 'ryver', 'manager', 'control', 'entire', 'online', 'system', 'enable', 'workforce', 'collaboration', 'company', 'platform', 'save', 'business', 'dollar', 'cloverleaf', 'already', 'help', 'company', 'operate', 'profitably', 'connectivity', 'aggregation', 'platform', 'cloe', 'end', 'nightmare', 'provide', 'manage', 'secure', 'office', 'internet', 'service', 'ryver', 'let', 'remote', 'team', 'work', 'building

Create the dictonary and corpus needed for topic modelling 

In [15]:
# Create Dictionary
title_id2word = corpora.Dictionary(title_data_lemmatized)

# Create Corpus
title_texts = title_data_lemmatized

# Term Document Frequency
title_corpus = [title_id2word.doc2bow(text) for text in title_texts]

# View
print(title_corpus[:1])

[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1), (9, 1)]]


In [16]:
# Create Dictionary
text_id2word = corpora.Dictionary(text_data_lemmatized)

# Create Corpus
text_texts = text_data_lemmatized

# Term Document Frequency
text_corpus = [text_id2word.doc2bow(text) for text in text_texts]

# View
print(text_corpus[:1])

[[(0, 1), (1, 3), (2, 1), (3, 1), (4, 1), (5, 1), (6, 2), (7, 1), (8, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 2), (14, 1), (15, 4), (16, 1), (17, 1), (18, 1), (19, 1), (20, 5), (21, 9), (22, 5), (23, 1), (24, 1), (25, 8), (26, 1), (27, 3), (28, 3), (29, 1), (30, 2), (31, 1), (32, 1), (33, 1), (34, 1), (35, 1), (36, 2), (37, 1), (38, 1), (39, 1), (40, 1), (41, 1), (42, 1), (43, 1), (44, 3), (45, 1), (46, 1), (47, 1), (48, 1), (49, 1), (50, 1), (51, 1), (52, 3), (53, 1), (54, 1), (55, 1), (56, 1), (57, 1), (58, 1), (59, 1), (60, 1), (61, 1), (62, 1), (63, 1), (64, 3), (65, 1), (66, 1), (67, 1), (68, 1), (69, 1), (70, 3), (71, 1), (72, 2), (73, 1), (74, 1), (75, 1), (76, 1), (77, 1), (78, 1), (79, 1), (80, 1), (81, 1), (82, 1), (83, 1), (84, 1), (85, 2), (86, 1), (87, 1), (88, 1), (89, 5), (90, 1), (91, 1), (92, 1), (93, 1), (94, 2), (95, 1), (96, 1), (97, 1), (98, 1), (99, 1), (100, 1), (101, 1), (102, 1), (103, 7), (104, 3), (105, 2), (106, 1), (107, 1), (108, 1), (109, 2), (110, 1)

In [17]:
# Human readable format of corpus (term-frequency)
[[(title_id2word[id], freq) for id, freq in cp] for cp in title_corpus[:1]]

[[('acquire', 1),
  ('bottom', 1),
  ('business', 1),
  ('cloverleaf', 1),
  ('enhance', 1),
  ('network', 1),
  ('ryver', 1),
  ('stack', 1),
  ('technology', 1),
  ('top', 1)]]

In [18]:
# Human readable format of corpus (term-frequency)
[[(text_id2word[id], freq) for id, freq in cp] for cp in text_corpus[:1]]

[[('academic', 1),
  ('acquisition', 3),
  ('add', 1),
  ('addition', 1),
  ('aggregation', 1),
  ('ai', 1),
  ('already', 2),
  ('analysis', 1),
  ('announce', 1),
  ('area', 1),
  ('artificial_intelligence', 1),
  ('automation', 1),
  ('base', 1),
  ('bottom', 2),
  ('building', 1),
  ('business', 4),
  ('cash', 1),
  ('client', 1),
  ('cloe', 1),
  ('cloud_compute', 1),
  ('cloverleaf', 5),
  ('cloverleaf_network', 9),
  ('collaboration', 5),
  ('combine', 1),
  ('communication', 1),
  ('company', 8),
  ('connect', 1),
  ('connectivity', 3),
  ('control', 3),
  ('countless', 1),
  ('cyber_security', 2),
  ('datum', 1),
  ('day', 1),
  ('deal', 1),
  ('development', 1),
  ('device', 1),
  ('dollar', 2),
  ('due_diligence', 1),
  ('efficiency', 1),
  ('efficient', 1),
  ('efficiently', 1),
  ('empower', 1),
  ('enable', 1),
  ('end', 1),
  ('enhance', 3),
  ('entire', 1),
  ('even', 1),
  ('extensive', 1),
  ('faherty', 1),
  ('find', 1),
  ('form', 1),
  ('fully', 1),
  ('functionali

Building the topic model

In [19]:
# Build LDA model
title_lda_model = gensim.models.ldamodel.LdaModel(corpus=title_corpus,
                                                  id2word=title_id2word,
                                                  num_topics=20,
                                                  random_state=100,
                                                  update_every=1,
                                                  chunksize=100,
                                                  passes=10,
                                                  alpha='auto',
                                                  per_word_topics=True)


In [20]:
# Print the Keyword in the 10 topics
pprint(title_lda_model.print_topics())
title_doc_lda = title_lda_model[title_corpus]

[(0,
  '0.307*"report" + 0.089*"plan" + 0.058*"health" + 0.058*"price" + '
  '0.050*"declare" + 0.045*"record" + 0.041*"end" + 0.037*"sell" + '
  '0.023*"source" + 0.020*"warn"'),
 (1,
  '0.138*"launch" + 0.063*"group" + 0.062*"research" + 0.052*"investment" + '
  '0.048*"fund" + 0.047*"cagr" + 0.046*"continue" + 0.046*"project" + '
  '0.043*"reach" + 0.039*"size"'),
 (2,
  '0.160*"dividend" + 0.112*"partner" + 0.101*"expand" + 0.056*"pay" + '
  '0.054*"lead" + 0.046*"management" + 0.035*"event" + 0.033*"highlight" + '
  '0.031*"accelerate" + 0.019*"infrastructure"'),
 (3,
  '0.317*"company" + 0.178*"broader_market" + 0.013*"form" + 0.011*"renewable" '
  '+ 0.000*"open" + 0.000*"credit" + 0.000*"data_breach" + 0.000*"settle" + '
  '0.000*"online" + 0.000*"drizly"'),
 (4,
  '0.157*"lag" + 0.113*"inflation" + 0.075*"host" + 0.072*"last" + '
  '0.066*"analyst" + 0.062*"dollar" + 0.051*"resource" + 0.039*"challenge" + '
  '0.032*"face" + 0.024*"leave"'),
 (5,
  '0.421*"announce" + 0.084*"m

In [21]:
# Build LDA model
text_lda_model = gensim.models.ldamodel.LdaModel(corpus=text_corpus,
                                                 id2word=text_id2word,
                                                 num_topics=20,
                                                 random_state=100,
                                                 update_every=1,
                                                 chunksize=100,
                                                 passes=10,
                                                 alpha='auto',
                                                 per_word_topics=True)


In [22]:
# Print the Keyword in the 10 topics
pprint(text_lda_model.print_topics())
text_doc_lda = text_lda_model[text_corpus]

[(0,
  '0.021*"community" + 0.016*"program" + 0.016*"year" + 0.015*"family" + '
  '0.013*"work" + 0.012*"support" + 0.012*"education" + 0.012*"life" + '
  '0.012*"people" + 0.012*"school"'),
 (1,
  '0.118*"automotive" + 0.071*"lighting" + 0.044*"reinsurance" + '
  '0.024*"resistance" + 0.010*"argentina" + 0.007*"imf" + 0.000*"terminal" + '
  '0.000*"ampere" + 0.000*"bowman" + 0.000*"sensor"'),
 (2,
  '0.037*"zack" + 0.032*"stock" + 0.029*"earning" + 0.029*"estimate" + '
  '0.026*"industry" + 0.026*"year" + 0.023*"rank" + 0.021*"research" + '
  '0.020*"report" + 0.015*"share"'),
 (3,
  '0.043*"statement" + 0.035*"look" + 0.035*"forward" + 0.034*"company" + '
  '0.021*"information" + 0.017*"include" + 0.013*"future" + 0.012*"risk" + '
  '0.012*"result" + 0.011*"factor"'),
 (4,
  '0.054*"health" + 0.053*"patient" + 0.041*"care" + 0.038*"treatment" + '
  '0.030*"study" + 0.030*"medical" + 0.020*"disease" + 0.018*"research" + '
  '0.017*"clinical" + 0.015*"cancer"'),
 (5,
  '0.042*"company"

#### Computing Model perplexity and coherence score 



In [39]:
# Compute Perplexity
print('\nPerplexity: ', title_lda_model.log_perplexity(title_corpus))  # a measure of how good the model is. lower the better.



Perplexity:  -20.18116362866162


In [24]:

# Compute Coherence Score
title_coherence_model_lda = CoherenceModel(model=title_lda_model, texts= title_data_lemmatized, dictionary=title_id2word, coherence='u_mass')
title_coherence_lda = title_coherence_model_lda.get_coherence()
print('\nCoherence Score: ', title_coherence_lda)


Coherence Score:  -15.812422728688782


In [26]:
# Compute Perplexity
print('\nPerplexity: ', text_lda_model.log_perplexity(text_corpus))  # a measure of how good the model is. lower the better.



Perplexity:  -10.267502555393607


In [25]:

# Compute Coherence Score
text_coherence_model_lda = CoherenceModel(model=text_lda_model, texts= text_data_lemmatized, dictionary=text_id2word, coherence='u_mass')
text_coherence_lda = text_coherence_model_lda.get_coherence()
print('\nCoherence Score: ', text_coherence_lda)


Coherence Score:  -2.166778707269944


Visualising topic keywords

In [40]:
def plot_lda_vis(lda_model, bow_corpus, dic):
    pyLDAvis.enable_notebook()
    vis = gensimvis.prepare(lda_model, bow_corpus, dic)
    return vis

In [41]:
# Visualize the topics
plot_lda_vis(title_lda_model, title_corpus, title_id2word)

  by='saliency', ascending=False).head(R).drop('saliency', 1)


In [28]:
# Visualize the topics
plot_lda_vis(text_lda_model, text_corpus, text_id2word)

  by='saliency', ascending=False).head(R).drop('saliency', 1)


Building LDA Mallet Model

In [33]:
import little_mallet_wrapper as lmw

In [52]:
# title_data_lemmatized[0].join
" ".join(title_data_lemmatized[0])

'cloverleaf network acquire ryver enhance top bottom technology stack business'

In [53]:
path_to_mallet = 'C:/Users/jimmy/Documents/GitHub/Financial News Scraping/news_scraping_yahoo/mallet-2.0.8/bin/mallet'
output_directory_path = './title-lmw-output'
num_topics = 20
training_data = [" ".join(data) for data in title_data_lemmatized]


['cloverleaf network acquire ryver enhance top bottom technology stack business',
 'ingall shipbuilde successfully complete acceptance trial',
 'third quarter conference_call web']

In [56]:
topic_keys, topic_distributions = lmw.quick_train_topic_model(path_to_mallet, 
                                                              output_directory_path, 
                                                              num_topics, 
                                                              training_data)

Importing data...
Complete
Training topic model...
Complete


FileNotFoundError: [Errno 2] No such file or directory: './title-lmw-output/mallet.topic_keys.20'

In [32]:
# Download File: http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip
mallet_path = 'C:/Users/jimmy/Documents/GitHub/Financial News Scraping/news_scraping_yahoo/mallet-2.0.8/bin/mallet'  # update this path
title_ldamallet = LdaMallet(
    mallet_path, corpus=title_corpus, num_topics=20, id2word=title_id2word)

text_ldamallet = gensim.models.wrappers.LdaMallet(
    mallet_path, corpus=text_corpus, num_topics=20, id2word=text_id2word)

CalledProcessError: Command 'C:/Users/jimmy/Documents/GitHub/Financial News Scraping/news_scraping_yahoo/mallet-2.0.8/bin/mallet import-file --preserve-case --keep-sequence --remove-stopwords --token-regex "\S+" --input C:\Users\jimmy\AppData\Local\Temp\31f01c_corpus.txt --output C:\Users\jimmy\AppData\Local\Temp\31f01c_corpus.mallet' returned non-zero exit status 1.

In [None]:
# Show Topics
pprint(title_ldamallet.show_topics(formatted=False))

# Compute Coherence Score
title_coherence_model_ldamallet = CoherenceModel(
    model=title_ldamallet, texts=title_data_lemmatized, dictionary=title_id2word, coherence='c_v')
title_coherence_ldamallet = title_coherence_model_ldamallet.get_coherence()
print('\nCoherence Score: ', title_coherence_ldamallet)


In [None]:
# Show Topics
pprint(text_ldamallet.show_topics(formatted=False))

# Compute Coherence Score
text_coherence_model_ldamallet = CoherenceModel(
    model=text_ldamallet, texts=text_data_lemmatized, dictionary=text_id2word, coherence='c_v')
text_coherence_ldamallet = text_coherence_model_ldamallet.get_coherence()
print('\nCoherence Score: ', text_coherence_ldamallet)


Finding the optimal k number of topics

In [None]:
def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
    """
    Compute c_v coherence for various number of topics

    Parameters:
    ----------
    dictionary : Gensim dictionary
    corpus : Gensim corpus
    texts : List of input texts
    limit : Max num of topics

    Returns:
    -------
    model_list : List of LDA topic models
    coherence_values : Coherence values corresponding to the LDA model with respective number of topics
    """
    coherence_values = []
    model_list = []
    for num_topics in range(start, limit, step):
        model = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=num_topics, id2word=id2word)
        model_list.append(model)
        coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
        coherence_values.append(coherencemodel.get_coherence())

    return model_list, coherence_values

In [None]:
# Can take a long time to run.
model_list, coherence_values = compute_coherence_values(
    dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=40, step=6)


Finding the dominant topic in each sentence

In [37]:
def format_topics_sentences(ldamodel, corpus, texts):
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    # Add original text to the end of the output
    contents = pd.Series(texts)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return(sent_topics_df)

In [38]:
df_topic_sents_keywords = format_topics_sentences(title_lda_model, title_corpus, title_data)

TypeError: '<' not supported between instances of 'int' and 'tuple'