In [1]:
# !pip install bertopic
# pip install py-newscollector

In [2]:
# import packages. 
from newscollector import *
import pickle
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired
from transformers.pipelines import pipeline
from bertopic.representation import TextGeneration
from sklearn.feature_extraction.text import CountVectorizer
from bertopic.representation import MaximalMarginalRelevance
from bertopic.representation import ZeroShotClassification
import pandas as pd

In [3]:
# newsletter = NewsCollector()
# print("collected",len(newsletter.sources),"articles")
# with open('news_articles_26_01.pkl', 'wb') as file:
#     pickle.dump(newsletter.sources, file)
    

In [4]:
# reading the data. 
with open('data/news_articles_26_01.pkl', 'rb') as file:
    newsletter_sources=pickle.load(file)
# newsletter_sources[0]

In [5]:
# getting relevant info and converting to dataframe. 
titles,article,keywords,source=[],[],[],[]
for i in newsletter_sources:
    titles.append(i["title"])
    article.append(i["body"])
    keywords.append(i["keywords"])
    source.append(i["source"])

df = pd.DataFrame({'title': titles, 'article': article, 'keywords': keywords,'source':source})
df=df.drop_duplicates(["article"]).reset_index(drop=True)
df

Unnamed: 0,title,article,keywords,source
0,"Live updates: Israel-Hamas war rages, hostages...","Bill Burns, director of the CIA, is expected t...","[expected, war, held, deal, rages, stateheres,...",CNN
1,"January 25, 2024 Israel-Hamas war","Palestinians flee Khan Younis, moving toward R...","[war, hamas, 25, sides, reported, truce, pales...",CNN
2,January 25 - 2024 campaign updates,Former President Donald Trump arrives for a ca...,"[campaign, 25, primary, republican, nonincumbe...",CNN
3,Today’s new in 10 minutes,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...","[cnn, yearly, average, minutes, todays, intern...",CNN
4,U.S. and China are working to make the busines...,"The flags of China, U.S. and the Chinese Commu...","[trade, foreign, rules, beijing, business, min...",CNBC
...,...,...,...,...
99,Intel sinks after first quarter outlook disapp...,Intel (INTC) stock fell more than 10% in prema...,"[expected, quarter, intel, company, business, ...",Yahoo Finance
100,Intel Plunges After Forecast Casts Doubt on Co...,(Bloomberg) -- Intel Corp. tumbled in premarke...,"[sales, forecast, quarter, plunges, intel, gel...",Yahoo Finance
101,The S&P 500 Just Hit An All-Time High. Here's ...,"Over the last year, a big contributor to the m...","[bargain, companys, sp, stocks, hit, motley, h...",Yahoo Finance
102,The Dow will surge 24% after the Fed's first r...,The Dow will surge 24% after the Fed's first r...,"[unlikely, feds, cut, average, surge, rate, jo...",Yahoo Finance


In [6]:
df.to_csv("data/data.csv", index=False)

## Baseline

In [7]:
# remember to convert texts to list (list of texts. )
docs=list(df["article"])
topic_model = BERTopic()
topics, probs = topic_model.fit_transform(docs)
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,37,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...
1,0,34,0_the_to_in_of,"[the, to, in, of, and, is, for, that, this, as]",[ilbusca\n\nAll financial numbers in this arti...
2,1,33,1_the_to_of_and,"[the, to, of, and, in, that, for, by, is, on]",[Pierre Bretagne woke at 4am to feed the cows ...


In [8]:
# print the topic models. 
topic_model.get_document_info(docs)

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",1,1_the_to_of_and,"[the, to, of, and, in, that, for, by, is, on]",[Pierre Bretagne woke at 4am to feed the cows ...,the - to - of - and - in - that - for - by - i...,0.721071,False
1,"Palestinians flee Khan Younis, moving toward R...",-1,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...,the - to - in - and - of - said - for - that -...,0.000000,False
2,Former President Donald Trump arrives for a ca...,-1,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...,the - to - in - and - of - said - for - that -...,0.000000,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",-1,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...,the - to - in - and - of - said - for - that -...,0.000000,False
4,"The flags of China, U.S. and the Chinese Commu...",-1,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...,the - to - in - and - of - said - for - that -...,0.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,0,0_the_to_in_of,"[the, to, in, of, and, is, for, that, this, as]",[ilbusca\n\nAll financial numbers in this arti...,the - to - in - of - and - is - for - that - t...,0.883375,False
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,0,0_the_to_in_of,"[the, to, in, of, and, is, for, that, this, as]",[ilbusca\n\nAll financial numbers in this arti...,the - to - in - of - and - is - for - that - t...,0.939735,False
101,"Over the last year, a big contributor to the m...",0,0_the_to_in_of,"[the, to, in, of, and, is, for, that, this, as]",[ilbusca\n\nAll financial numbers in this arti...,the - to - in - of - and - is - for - that - t...,0.939735,False
102,The Dow will surge 24% after the Fed's first r...,-1,-1_the_to_in_and,"[the, to, in, and, of, said, for, that, on, he]",[Superdry has parted ways with its fourth fina...,the - to - in - and - of - said - for - that -...,0.000000,False


In [9]:
topic_model.get_topic(0)

[('the', 0.11256817898759293),
 ('to', 0.06976722389031854),
 ('in', 0.06291393456322937),
 ('of', 0.06140871196215125),
 ('and', 0.060106360256650455),
 ('is', 0.05281049317050375),
 ('for', 0.04123908068076121),
 ('that', 0.04066472022559741),
 ('this', 0.0346841524098249),
 ('as', 0.033841847624522865)]

## Consider basic "pre-processing tricks"

In [10]:
vectorizer_model = CountVectorizer(ngram_range=(1, 3), stop_words="english") # use ngrams and stop words
topic_model.update_topics(docs, vectorizer_model=vectorizer_model)
topics, probs = topic_model.fit_transform(docs)
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,32,-1_said_year_china_2024,"[said, year, china, 2024, production, max, tai...",[Average private rents in Great Britain have c...
1,0,34,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...
2,1,19,1_said_uk_cruise_says,"[said, uk, cruise, says, data, information, pa...","[SUIXI, CHINA - DECEMBER 30: An employee works..."
3,2,19,2_billion_kenya_million_year,"[billion, kenya, million, year, percent, said,...",[Economy Penalties for illegal State actions b...


In [13]:
(topic_model.get_document_info(docs))

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",1,1_said_uk_cruise_says,"[said, uk, cruise, says, data, information, pa...","[SUIXI, CHINA - DECEMBER 30: An employee works...",said - uk - cruise - says - data - information...,0.983350,False
1,"Palestinians flee Khan Younis, moving toward R...",1,1_said_uk_cruise_says,"[said, uk, cruise, says, data, information, pa...","[SUIXI, CHINA - DECEMBER 30: An employee works...",said - uk - cruise - says - data - information...,0.993184,False
2,Former President Donald Trump arrives for a ca...,0,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...,company - growth - year - stock - market - inv...,0.832900,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",-1,-1_said_year_china_2024,"[said, year, china, 2024, production, max, tai...",[Average private rents in Great Britain have c...,said - year - china - 2024 - production - max ...,0.000000,False
4,"The flags of China, U.S. and the Chinese Commu...",-1,-1_said_year_china_2024,"[said, year, china, 2024, production, max, tai...",[Average private rents in Great Britain have c...,said - year - china - 2024 - production - max ...,0.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,0,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...,company - growth - year - stock - market - inv...,0.982223,False
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,0,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...,company - growth - year - stock - market - inv...,1.000000,False
101,"Over the last year, a big contributor to the m...",0,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...,company - growth - year - stock - market - inv...,0.970782,False
102,The Dow will surge 24% after the Fed's first r...,0,0_company_growth_year_stock,"[company, growth, year, stock, market, investo...",[ilbusca\n\nAll financial numbers in this arti...,company - growth - year - stock - market - inv...,0.835092,False


In [14]:
topic_model.get_topic(0)

[('company', 0.014531743872859893),
 ('growth', 0.012137190517001815),
 ('year', 0.011716927443273358),
 ('stock', 0.011583153970270968),
 ('market', 0.010712955616062355),
 ('investors', 0.010560771992193027),
 ('share', 0.010414211826482157),
 ('billion', 0.009623606718469945),
 ('intel', 0.008398259008679684),
 ('revenue', 0.008356630074095801)]

## Consider "fine-tuning"

### Maximal Marginal Relevance

In [15]:
representation_model = MaximalMarginalRelevance(diversity=0.3) # identify the representation for tuning (here we use MMR)
topic_model = BERTopic(representation_model=representation_model, vectorizer_model=vectorizer_model)
topics, probs = topic_model.fit_transform(docs)
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,25,-1_cookies_taiwan_oil_global,"[cookies, taiwan, oil, global, rents, change, ...",[Taiwan People's Party (TPP) presidential cand...
1,0,42,0_billion_kenya_market_government,"[billion, kenya, market, government, company, ...",[Pierre Bretagne woke at 4am to feed the cows ...
2,1,37,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...


In [18]:
topic_model.get_topic(0)

[('billion', 0.0098279825703271),
 ('kenya', 0.00817176422933932),
 ('market', 0.0056021318091189235),
 ('government', 0.0053817504504362615),
 ('company', 0.0052092074141379795),
 ('says', 0.004814784743998926),
 ('bond', 0.004630506581364003),
 ('bank', 0.004563227918726576),
 ('sector', 0.004198035176964921),
 ('2023', 0.0038629427347616086)]

In [19]:
(topic_model.get_document_info(docs))

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",0,0_billion_kenya_market_government,"[billion, kenya, market, government, company, ...",[Pierre Bretagne woke at 4am to feed the cows ...,billion - kenya - market - government - compan...,0.758054,False
1,"Palestinians flee Khan Younis, moving toward R...",0,0_billion_kenya_market_government,"[billion, kenya, market, government, company, ...",[Pierre Bretagne woke at 4am to feed the cows ...,billion - kenya - market - government - compan...,0.757863,False
2,Former President Donald Trump arrives for a ca...,1,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...,company - investors - intel - 2023 - dividend ...,0.781510,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",0,0_billion_kenya_market_government,"[billion, kenya, market, government, company, ...",[Pierre Bretagne woke at 4am to feed the cows ...,billion - kenya - market - government - compan...,0.738855,False
4,"The flags of China, U.S. and the Chinese Commu...",-1,-1_cookies_taiwan_oil_global,"[cookies, taiwan, oil, global, rents, change, ...",[Taiwan People's Party (TPP) presidential cand...,cookies - taiwan - oil - global - rents - chan...,0.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,1,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...,company - investors - intel - 2023 - dividend ...,0.968276,False
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,1,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...,company - investors - intel - 2023 - dividend ...,0.968276,False
101,"Over the last year, a big contributor to the m...",1,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...,company - investors - intel - 2023 - dividend ...,0.939438,False
102,The Dow will surge 24% after the Fed's first r...,1,1_company_investors_intel_2023,"[company, investors, intel, 2023, dividend, st...",[ilbusca\n\nAll financial numbers in this arti...,company - investors - intel - 2023 - dividend ...,0.797141,False


### KeyBERT

In [20]:
representation_model = KeyBERTInspired() # here keyBERT is our representation fine tuning, we define it
topic_model = BERTopic(representation_model=representation_model, vectorizer_model=vectorizer_model)
topics, probs = topic_model.fit_transform(docs)
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,33,-1_demand_ceo_market_rents,"[demand, ceo, market, rents, growth, financial...",[Taiwan People's Party (TPP) presidential cand...
1,0,37,0_fines_kenya_nairobi_bank,"[fines, kenya, nairobi, bank, kenyas, governme...",[Economy Penalties for illegal State actions b...
2,1,34,1_stocks_stock_dividend_investing,"[stocks, stock, dividend, investing, investors...",[ilbusca\n\nAll financial numbers in this arti...


In [23]:
topic_model.get_topic(0)

[('fines', 0.3665838),
 ('kenya', 0.3574194),
 ('nairobi', 0.35301816),
 ('bank', 0.3460089),
 ('kenyas', 0.32278898),
 ('government', 0.31521612),
 ('africa', 0.30328456),
 ('regulations', 0.30296445),
 ('sector', 0.28376648),
 ('loans', 0.28092253)]

In [25]:
(topic_model.get_document_info(docs))

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",0,0_fines_kenya_nairobi_bank,"[fines, kenya, nairobi, bank, kenyas, governme...",[Economy Penalties for illegal State actions b...,fines - kenya - nairobi - bank - kenyas - gove...,0.728923,False
1,"Palestinians flee Khan Younis, moving toward R...",0,0_fines_kenya_nairobi_bank,"[fines, kenya, nairobi, bank, kenyas, governme...",[Economy Penalties for illegal State actions b...,fines - kenya - nairobi - bank - kenyas - gove...,0.696118,False
2,Former President Donald Trump arrives for a ca...,0,0_fines_kenya_nairobi_bank,"[fines, kenya, nairobi, bank, kenyas, governme...",[Economy Penalties for illegal State actions b...,fines - kenya - nairobi - bank - kenyas - gove...,0.684165,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",-1,-1_demand_ceo_market_rents,"[demand, ceo, market, rents, growth, financial...",[Taiwan People's Party (TPP) presidential cand...,demand - ceo - market - rents - growth - finan...,0.000000,False
4,"The flags of China, U.S. and the Chinese Commu...",-1,-1_demand_ceo_market_rents,"[demand, ceo, market, rents, growth, financial...",[Taiwan People's Party (TPP) presidential cand...,demand - ceo - market - rents - growth - finan...,0.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,1,1_stocks_stock_dividend_investing,"[stocks, stock, dividend, investing, investors...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investing - invest...,0.875137,False
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,1,1_stocks_stock_dividend_investing,"[stocks, stock, dividend, investing, investors...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investing - invest...,0.987720,False
101,"Over the last year, a big contributor to the m...",1,1_stocks_stock_dividend_investing,"[stocks, stock, dividend, investing, investors...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investing - invest...,0.960157,False
102,The Dow will surge 24% after the Fed's first r...,1,1_stocks_stock_dividend_investing,"[stocks, stock, dividend, investing, investors...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investing - invest...,0.849407,False


## Using a different model?

### all-MiniLM-L6-v2

In [26]:
embedding_model="all-MiniLM-L6-v2"
topic_model = BERTopic(representation_model=representation_model, 
                       vectorizer_model=vectorizer_model,
                       embedding_model=embedding_model) # specify the use of differed LLM here. 
topics, probs = topic_model.fit_transform(docs)

In [27]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,33,-1_rents_rent_demand_rental,"[rents, rent, demand, rental, spending, growth...",[With new leadership coming in after difficult...
1,0,34,0_stocks_stock_dividend_investors,"[stocks, stock, dividend, investors, investmen...",[ilbusca\n\nAll financial numbers in this arti...
2,1,19,1_commodity_epa_enforcement_emissions,"[commodity, epa, enforcement, emissions, polic...","[SUIXI, CHINA - DECEMBER 30: An employee works..."
3,2,18,2_bank kenya_nairobi_kenya_financial,"[bank kenya, nairobi, kenya, financial, treasu...",[Economy Penalties for illegal State actions b...


In [28]:
(topic_model.get_document_info(docs))

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",1,1_commodity_epa_enforcement_emissions,"[commodity, epa, enforcement, emissions, polic...","[SUIXI, CHINA - DECEMBER 30: An employee works...",commodity - epa - enforcement - emissions - po...,0.994079,False
1,"Palestinians flee Khan Younis, moving toward R...",1,1_commodity_epa_enforcement_emissions,"[commodity, epa, enforcement, emissions, polic...","[SUIXI, CHINA - DECEMBER 30: An employee works...",commodity - epa - enforcement - emissions - po...,0.966867,False
2,Former President Donald Trump arrives for a ca...,-1,-1_rents_rent_demand_rental,"[rents, rent, demand, rental, spending, growth...",[With new leadership coming in after difficult...,rents - rent - demand - rental - spending - gr...,0.000000,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",-1,-1_rents_rent_demand_rental,"[rents, rent, demand, rental, spending, growth...",[With new leadership coming in after difficult...,rents - rent - demand - rental - spending - gr...,0.000000,False
4,"The flags of China, U.S. and the Chinese Commu...",-1,-1_rents_rent_demand_rental,"[rents, rent, demand, rental, spending, growth...",[With new leadership coming in after difficult...,rents - rent - demand - rental - spending - gr...,0.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,0,0_stocks_stock_dividend_investors,"[stocks, stock, dividend, investors, investmen...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investors - invest...,0.924364,False
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,0,0_stocks_stock_dividend_investors,"[stocks, stock, dividend, investors, investmen...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investors - invest...,0.957260,False
101,"Over the last year, a big contributor to the m...",0,0_stocks_stock_dividend_investors,"[stocks, stock, dividend, investors, investmen...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investors - invest...,0.909220,False
102,The Dow will surge 24% after the Fed's first r...,0,0_stocks_stock_dividend_investors,"[stocks, stock, dividend, investors, investmen...",[ilbusca\n\nAll financial numbers in this arti...,stocks - stock - dividend - investors - invest...,0.835250,False


In [29]:
topic_model.get_topic(0)

[('stocks', 0.47366685),
 ('stock', 0.46822774),
 ('dividend', 0.44935924),
 ('investors', 0.44175082),
 ('investment', 0.43116313),
 ('shares', 0.40257162),
 ('markets', 0.37172067),
 ('earnings', 0.35494322),
 ('financial', 0.3503764),
 ('market', 0.34380215)]

### longformer

In [30]:
topic_model = BERTopic(representation_model=representation_model, 
                       vectorizer_model=vectorizer_model,
                       embedding_model="allenai/longformer-base-4096", # specify the use of a different LLM here. 
                      calculate_probabilities=True)
topics, probs = topic_model.fit_transform(docs)
# topic_model.get_topic_info()

No sentence-transformers model found with name C:\Users\charl/.cache\torch\sentence_transformers\allenai_longformer-base-4096. Creating a new one with MEAN pooling.


In [31]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,10,-1_energy transition_climate change_geopolitic...,"[energy transition, climate change, geopolitic...","[SUIXI, CHINA - DECEMBER 30: An employee works..."
1,0,25,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...
2,1,19,1_transportation board_long term investment_ta...,"[transportation board, long term investment, t...",[After a portion of an Alaska Airlines Boeing ...
3,2,18,2_spokesperson_pollution_euro parking_london,"[spokesperson, pollution, euro parking, london...",[Hundreds of thousands of EU citizens were wro...
4,3,16,3_free cash flow_long term_share price_cash flow,"[free cash flow, long term, share price, cash ...",[ilbusca\n\nAll financial numbers in this arti...
5,4,16,4_nairobi_deposits_natural gas_infrastructure,"[nairobi, deposits, natural gas, infrastructur...",[Economy Nairobi collects more revenue than 30...


In [32]:
(topic_model.get_document_info(docs))

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"Bill Burns, director of the CIA, is expected t...",1,1_transportation board_long term investment_ta...,"[transportation board, long term investment, t...",[After a portion of an Alaska Airlines Boeing ...,transportation board - long term investment - ...,1.000000,False
1,"Palestinians flee Khan Younis, moving toward R...",0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,0.479184,False
2,Former President Donald Trump arrives for a ca...,0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,1.000000,False
3,"CNN —\n\nJanuary 26, 2024\n\nToday on CNN 10, ...",-1,-1_energy transition_climate change_geopolitic...,"[energy transition, climate change, geopolitic...","[SUIXI, CHINA - DECEMBER 30: An employee works...",energy transition - climate change - geopoliti...,0.455071,False
4,"The flags of China, U.S. and the Chinese Commu...",0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,1.000000,False
...,...,...,...,...,...,...,...,...
99,Intel (INTC) stock fell more than 10% in prema...,0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,1.000000,True
100,(Bloomberg) -- Intel Corp. tumbled in premarke...,0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,1.000000,True
101,"Over the last year, a big contributor to the m...",-1,-1_energy transition_climate change_geopolitic...,"[energy transition, climate change, geopolitic...","[SUIXI, CHINA - DECEMBER 30: An employee works...",energy transition - climate change - geopoliti...,0.387267,True
102,The Dow will surge 24% after the Fed's first r...,0,0_artificial intelligence_15 billion_fourth qu...,"[artificial intelligence, 15 billion, fourth q...",[Intel (INTC) stock fell more than 10% in prem...,artificial intelligence - 15 billion - fourth ...,0.520073,False


In [33]:
topic_model.get_topic(-1)

[('energy transition', 0.90015256),
 ('climate change', 0.9000958),
 ('geopolitical', 0.8944586),
 ('getty images', 0.89317113),
 ('reality labs', 0.89254904),
 ('central banks', 0.8910686),
 ('crisis', 0.89067936),
 ('commodities', 0.8900441),
 ('commodity', 0.8877039),
 ('economist', 0.8874501)]