In [19]:
####        \\      Load Relevant Libraries      //      ### 
from bertopic import BERTopic
import json
import pandas as pd
import pathlib
from umap import UMAP

In [20]:
print(pathlib.Path().absolute())

/Users/jenskoning/Documents/Python_projects/dsr_media_nlp/dsr_media_nlp/Topic_model


In [68]:
with open("all_news_items.json", "r", encoding="utf-8") as f:
    docs = json.load(f)
len(docs)

4492

In [69]:
pd_docs = pd.DataFrame(docs)

# List of words to check in the column names
words_to_remove = ['bitcoin', 'Bitcoin', 'crypto', 'cryptocurrency', 'museum', 'macclesfield', 'coins', 'fashion', 'ftx', 'dunhuang', 'heritage', 'archaeological', 'abortion', 'republican', 'hearings',
                   'liverpool', 'temple', 'art', 'cultural', 'nike', 'caves', 'murals','relics', 'exhibition', 'hotel', 'textile', 'ciff', 'migrants', 'meloni', 'gold',
                   'weather', 'meteorological', 'autograph', 'columbus', 'cashmere', 'xifeng', 'wuliangye', 'liquor', 'grottoes', 'ancient', 'furniture', 'tourism', 'tourists',
                   'cancer', 'blockchain', 'isis', 'covid', 'polyamide', 'wine', 'Deodorants']

# Find columns containing the specified words and remove them
pd_docs = pd_docs[~pd_docs['title'].str.lower().str.contains('|'.join(words_to_remove))]
len(pd_docs)

3487

In [70]:
# remove NA
pd_docs = pd_docs[pd_docs['summary'].notna()]

In [71]:
#### Load embedding model ####
from umap import UMAP

topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2")


# Reduce influence of stop words / Improves the representation of topics
from sklearn.feature_extraction.text import CountVectorizer

# Set UMAP seed 
#umap_model = UMAP(random_state=88) (Huawei first)
#umap_model = UMAP(random_state=33) #(b3w and bri together first, also some huawei and nato/russia defence war)
# umap_model = UMAP(random_state=57) (not too interesting)
# umap_model = UMAP(random_state=250) (quite intersting mix of asean, huawei, nato/russia and some g7/b3w)
umap_model = UMAP(random_state=99) #(ONE OF THE BEST mixes so far)

# Define vectorizer model
vectorizer_model = CountVectorizer(stop_words="english")

# Create Topic Model
topic_model = BERTopic(
    vectorizer_model=vectorizer_model,
                       umap_model=umap_model)

In [72]:
topics, probs = topic_model.fit_transform(pd_docs['summary'])

In [73]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,885,-1_terrorist_china_new_global,"[terrorist, china, new, global, security, coun...","[By Djoomart Otorbaev (China Daily) 09:24, Sep..."
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, 5g, digital,...",[A newly released French report provides a com...
2,1,123,1_cultural_festival_chinese_art,"[cultural, festival, chinese, art, film, cultu...","[BEIJING, Aug. 21, 2023 /PRNewswire/ -- Prapha..."
3,2,84,2_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
4,3,77,3_ap_biden_g7_said,"[ap, biden, g7, said, countries, developing, p...",[By Associated Press \n2022/06/29 02:35 \n \n\...
5,4,61,4_kazakhstan_cooperation_sides_relations,"[kazakhstan, cooperation, sides, relations, bi...","[(Xinhua) 08:34, June 08, 2022 NUR-SULTAN, Jun..."
6,5,55,5_bri_belt_road_cooperation,"[bri, belt, road, cooperation, countries, init...","[BEIJING\n, \nSept. 7, 2023\n /PRNewswire/ -- ..."
7,6,52,6_market_insurance_report_research,"[market, insurance, report, research, analysis...","[The ""North America Deodorants Market - Growth..."
8,7,51,7_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
9,8,51,8_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...


In [74]:
# Attempt merging residual topics 1
topics_to_merge = [[-1,1]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [75]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1008,-1_china_terrorist_new_chinese,"[china, terrorist, new, chinese, global, said,...",[By Djoomart Otorbaev | China Daily | Updated:...
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, 5g, digital,...",[A newly released French report provides a com...
2,1,84,1_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
3,2,77,2_ap_biden_g7_said,"[ap, biden, g7, said, countries, developing, p...",[By Associated Press \n2022/06/29 02:35 \n \n\...
4,3,61,3_kazakhstan_cooperation_sides_relations,"[kazakhstan, cooperation, sides, relations, bi...","[(Xinhua) 08:34, June 08, 2022 NUR-SULTAN, Jun..."
5,4,55,4_bri_belt_road_cooperation,"[bri, belt, road, cooperation, countries, init...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
6,5,52,5_market_insurance_report_research,"[market, insurance, report, research, analysis...","[The ""CBD Top Brands & Pricing Report: the UK ..."
7,6,51,6_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
8,7,51,7_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
9,8,47,8_g7_biden_countries_president,"[g7, biden, countries, president, developing, ...",[Group of Seven leaders Sunday pledged to rais...


In [76]:
# Attempt merging residual topics 2
topics_to_merge = [[-1,5]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [77]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1060,-1_china_market_new_terrorist,"[china, market, new, terrorist, global, said, ...","[New York, NY, April 10, 2021 (GLOBE NEWSWIRE)..."
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, 5g, digital,...",[A newly released French report provides a com...
2,1,84,1_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
3,2,77,2_ap_biden_g7_said,"[ap, biden, g7, said, countries, developing, p...",[By Associated Press \n2022/06/29 02:35 \n \n\...
4,3,61,3_kazakhstan_cooperation_sides_relations,"[kazakhstan, cooperation, sides, relations, bi...","[(Xinhua) 08:34, June 08, 2022 NUR-SULTAN, Jun..."
5,4,55,4_bri_belt_road_cooperation,"[bri, belt, road, cooperation, countries, init...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
6,5,51,5_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
7,6,51,6_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
8,7,47,7_g7_biden_countries_president,"[g7, biden, countries, president, developing, ...",[Group of Seven leaders Sunday pledged to rais...
9,8,43,8_bitcoin_currency_yuan_money,"[bitcoin, currency, yuan, money, bank, amla, c...",['Bond King' Jeffrey Gundlach: Stock Market Wi...


In [78]:
# Attempt merging residual topics 3
topics_to_merge = [[3,4]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [79]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1060,-1_china_market_new_global,"[china, market, new, global, terrorist, said, ...","[New York, NY, April 10, 2021 (GLOBE NEWSWIRE)..."
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, 5g, digital,...",[A newly released French report provides a com...
2,1,116,1_kazakhstan_cooperation_bri_road,"[kazakhstan, cooperation, bri, road, belt, chi...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
3,2,84,2_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
4,3,77,3_ap_biden_g7_said,"[ap, biden, g7, said, countries, developing, p...",[By Associated Press \n2022/06/29 02:35 \n \n\...
5,4,51,4_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
6,5,51,5_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
7,6,47,6_g7_biden_countries_president,"[g7, biden, countries, president, developing, ...",[Group of Seven leaders Sunday pledged to rais...
8,7,43,7_bitcoin_currency_yuan_money,"[bitcoin, currency, yuan, money, bank, amla, c...",['Bond King' Jeffrey Gundlach: Stock Market Wi...
9,8,42,8_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."


In [80]:
# Attempt merging residual topics 4
topics_to_merge = [[-1,11]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [81]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1098,-1_market_china_new_global,"[market, china, new, global, said, terrorist, ...",[Former Conga VP of Sales Ron Gupta tapped to ...
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, digital, 5g,...",[A newly released French report provides a com...
2,1,116,1_kazakhstan_cooperation_bri_road,"[kazakhstan, cooperation, bri, road, belt, chi...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
3,2,84,2_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
4,3,77,3_ap_biden_g7_said,"[ap, biden, g7, said, countries, developing, p...",[By Associated Press \n2022/06/29 02:35 \n \n\...
5,4,51,4_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
6,5,51,5_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
7,6,47,6_g7_biden_countries_president,"[g7, biden, countries, president, developing, ...",[Group of Seven leaders Sunday pledged to rais...
8,7,43,7_bitcoin_currency_yuan_money,"[bitcoin, currency, yuan, money, bank, amla, c...",['Bond King' Jeffrey Gundlach: Stock Market Wi...
9,8,42,8_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."


In [82]:
# Attempt merging residual topics 5
topics_to_merge = [[3,6]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [83]:
topic_model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1098,-1_market_china_new_global,"[market, china, new, global, said, terrorist, ...",[Former Conga VP of Sales Ron Gupta tapped to ...
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, digital, 5g,...",[A newly released French report provides a com...
2,1,124,1_ap_biden_g7_countries,"[ap, biden, g7, countries, developing, said, p...",[Group of Seven leaders on Sunday pledged to r...
3,2,116,2_kazakhstan_cooperation_bri_road,"[kazakhstan, cooperation, bri, road, belt, chi...","[BEIJING\n, \nSept. 7, 2023\n /PRNewswire/ -- ..."
4,3,84,3_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
5,4,51,4_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
6,5,51,5_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
7,6,43,6_bitcoin_currency_yuan_money,"[bitcoin, currency, yuan, money, bank, amla, c...",['Bond King' Jeffrey Gundlach: Stock Market Wi...
8,7,42,7_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."
9,8,41,8_central_asian_chinacentral_asia,"[central, asian, chinacentral, asia, summit, c...","[XI'AN, May 19 (Xinhua) -- Chinese President X..."


In [84]:
# Attempt merging residual topics 6
topics_to_merge = [[-1,6]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

topic_model.get_topic_info().head(15)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1141,-1_market_china_new_said,"[market, china, new, said, global, chinese, te...",[Former Conga VP of Sales Ron Gupta tapped to ...
1,0,186,0_huawei_data_internet_chinese,"[huawei, data, internet, chinese, digital, 5g,...",[A newly released French report provides a com...
2,1,124,1_ap_biden_g7_countries,"[ap, biden, g7, countries, said, developing, p...",[Group of Seven leaders on Sunday pledged to r...
3,2,116,2_kazakhstan_cooperation_bri_road,"[kazakhstan, cooperation, bri, road, belt, chi...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
4,3,84,3_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
5,4,51,4_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
6,5,51,5_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
7,6,42,6_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."
8,7,41,7_central_asian_chinacentral_asia,"[central, asian, chinacentral, asia, summit, c...","[XI'AN, May 19 (Xinhua) -- Chinese President X..."
9,8,40,8_big_data_teaching_terminology,"[big, data, teaching, terminology, education, ...","[June 24, 2021 5:59 AM EDT News and research b..."


In [85]:
# Attempt merging residual topics 6
topics_to_merge = [[2,13]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

topic_model.get_topic_info().head(17)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1141,-1_market_china_new_said,"[market, china, new, said, global, chinese, te...",[Former Conga VP of Sales Ron Gupta tapped to ...
1,0,186,0_huawei_data_chinese_internet,"[huawei, data, chinese, internet, digital, 5g,...",[A newly released French report provides a com...
2,1,149,1_bri_cooperation_kazakhstan_road,"[bri, cooperation, kazakhstan, road, belt, cou...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
3,2,124,2_ap_biden_g7_countries,"[ap, biden, g7, countries, said, developing, p...",[Group of Seven leaders on Sunday pledged to r...
4,3,84,3_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
5,4,51,4_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
6,5,51,5_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
7,6,42,6_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."
8,7,41,7_central_asian_chinacentral_asia,"[central, asian, chinacentral, asia, summit, c...","[XI'AN, May 19 (Xinhua) -- Chinese President X..."
9,8,40,8_big_data_teaching_terminology,"[big, data, teaching, terminology, education, ...","[June 24, 2021 5:59 AM EDT News and research b..."


In [94]:
# Attempt merging residual topics 6
topics_to_merge = [[-1,14]]
merge_topics = topic_model.merge_topics(pd_docs['summary'], topics_to_merge)

In [96]:
topic_model.get_topic_info().head(16)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1172,-1_china_market_new_said,"[china, market, new, said, global, chinese, te...",[Bank of England governor Andrew Bailey has si...
1,0,186,0_huawei_data_chinese_internet,"[huawei, data, chinese, internet, digital, 5g,...",[A newly released French report provides a com...
2,1,149,1_bri_cooperation_kazakhstan_road,"[bri, cooperation, kazakhstan, road, belt, cou...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
3,2,124,2_ap_biden_g7_countries,"[ap, biden, g7, countries, said, developing, p...",[Group of Seven leaders on Sunday pledged to r...
4,3,84,3_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
5,4,51,4_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
6,5,51,5_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
7,6,42,6_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."
8,7,41,7_central_asian_chinacentral_asia,"[central, asian, chinacentral, asia, summit, c...","[XI'AN, May 19 (Xinhua) -- Chinese President X..."
9,8,40,8_big_data_teaching_terminology,"[big, data, teaching, terminology, education, ...","[June 24, 2021 5:59 AM EDT News and research b..."


In [98]:
topic_model.get_topic_info().head(40)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1172,-1_china_market_new_said,"[china, market, new, said, global, chinese, te...",[Bank of England governor Andrew Bailey has si...
1,0,186,0_huawei_data_chinese_internet,"[huawei, data, chinese, internet, digital, 5g,...",[A newly released French report provides a com...
2,1,149,1_bri_cooperation_kazakhstan_road,"[bri, cooperation, kazakhstan, road, belt, cou...","[BEIJING\n, \nSept. 8, 2023\n /PRNewswire/ -- ..."
3,2,124,2_ap_biden_g7_countries,"[ap, biden, g7, countries, said, developing, p...",[Group of Seven leaders on Sunday pledged to r...
4,3,84,3_g7_b3w_infrastructure_bri,"[g7, b3w, infrastructure, bri, initiative, pla...","[CARBIS BAY, England – The Group of Seven rich..."
5,4,51,4_asean_chinaasean_trade_rcep,"[asean, chinaasean, trade, rcep, china, region...",[-- The 17th China-ASEAN Expo and China-ASEAN ...
6,5,51,5_uyghur_rights_human_xinjiang,"[uyghur, rights, human, xinjiang, central, asi...",[Xinhua | Updated: 2023-05-19 12:51 \n \n \n \...
7,6,42,6_arab_chinaarab_expo_states,"[arab, chinaarab, expo, states, ningxia, yinch...","[(Xinhua) 08:20, August 20, 2021 \n -- Chines..."
8,7,41,7_central_asian_chinacentral_asia,"[central, asian, chinacentral, asia, summit, c...","[XI'AN, May 19 (Xinhua) -- Chinese President X..."
9,8,40,8_big_data_teaching_terminology,"[big, data, teaching, terminology, education, ...","[June 24, 2021 5:59 AM EDT News and research b..."


In [99]:
# Labelling the topics
topic_labels_dict = {
    0: "Chinese Digital Alternative",
    1: "BRI Digital Cooperation",
    2: "B3W as Alternative to BRI",
    3: "B3W as Alternative to BRI",
    4: "China-ASEAN digital trade & RCEP",
    5: "Technology and HR Abuses in Xinjiang",
    6: "Ningxia Expo and China-Arab Cooperation",
    7: "Central Asia",
    8: "Technology Education",
    9: "BRI Dept Trap",
    10: "Strategic Competition",
    11: "CIIE Expo and Chinese Tech Export",
    12: "Xi Jinping and Chinese Communist Party",
    13: "GCC and China-Gulf Cooperation",
    14: "IOT and Digital Services"
}
topic_model.set_topic_labels(topic_labels_dict)

In [104]:
topic_model.get_topics()

{-1: [('china', 0.005636138282059337),
  ('market', 0.005477659467371687),
  ('new', 0.005370068715176352),
  ('said', 0.005230468733337455),
  ('global', 0.005112543843322383),
  ('chinese', 0.004700165478901688),
  ('terrorist', 0.004545543196846855),
  ('digital', 0.004455763841041751),
  ('2022', 0.0044450775989646404),
  ('government', 0.004316149889796274)],
 0: [('huawei', 0.019247379466945647),
  ('data', 0.013822909842716866),
  ('chinese', 0.012685817839151296),
  ('internet', 0.012636720371879372),
  ('digital', 0.011894386716473868),
  ('5g', 0.011746730773882302),
  ('technology', 0.010224562854786188),
  ('companies', 0.009242370634269729),
  ('china', 0.008512841120723386),
  ('cable', 0.008432176424509354)],
 1: [('bri', 0.024540100561874827),
  ('cooperation', 0.021225190240439885),
  ('kazakhstan', 0.018347483808922194),
  ('road', 0.017793561731519913),
  ('belt', 0.01757914325222168),
  ('countries', 0.013971816684847739),
  ('china', 0.013505058325487956),
  ('init

In [105]:
# labelling data set and exporting to CSV
pd_docs['topic'] = topic_model.topics_
pd_docs['topic_label'] = pd_docs.topic.map(topic_labels_dict)

# create csv file
pd_docs.to_csv('dsr_articles_berttopiced.csv', index=False)


In [128]:
# Define the custom names of the topics
topic_names = ["Chinese Digital Alternative", 
              "BRI Digital Cooperation", 
              "B3W as Alternative to BRI", 
              "B3W as Alternative to BRI", 
              "China-ASEAN digital trade & RCEP", 
              "Technology and HR Abuses in Xinjiang", 
              "Ningxia Expo and China-Arab Cooperation", 
              "Central Asia"]

In [136]:
# Visualize the bar chart with the specified topic names
topic_model_barchart = topic_model.visualize_barchart(
    topics=list(range(0, 8)),
    topics=topic_names,
    title='Most Common Topics (n = 975)' 
)

SyntaxError: keyword argument repeated: topics (3354089486.py, line 2)

In [134]:
topic_model_barchart = topic_model.visualize_barchart(
    topics=list(range(0, 8)),
    title='Most Common Topics (n = 975)' 
)

In [135]:
topic_model_barchart

In [116]:
topic_model.visualize_topics_over_time(topics_over_time, topics=[0, 1, 2, 3, 4, 5, 6, 7])


NameError: name 'topics_over_time' is not defined

In [119]:
top_15_topics = topic_model.get_topic_info().head(16)

In [120]:
top_15 = pd.DataFrame(top_15_topics)
top_15 = top_15.drop(top_15.index[0])

In [125]:
# create csv file
top_15.to_csv('top_15_topics.csv', index=False)
