In [99]:
import pandas as pd
from bertopic import BERTopic
import nltk
import re
import matplotlib.pyplot as plt
from mpmath.visualization import VisualizationMethods
from sklearn.feature_extraction.text import CountVectorizer

vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(vectorizer_model=vectorizer_model)

# read data
df = pd.read_csv('../data/wps_speeches.csv')
df['only text'] = df['only text'].astype(str)
docs = df['only text'].tolist()
years = df['year'].tolist()

# get topics and probs
topics, probs = topic_model.fit_transform(docs)

In [27]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,819,-1_women_violence_sexual_security,"[women, violence, sexual, security, peace, con...","[ I would like to thank you, Mr. President, fo..."
1,0,273,0_women_gender_peace_resolution,"[women, gender, peace, resolution, peacekeepin...",[ I thank the Security Council for the invitat...
2,1,177,1_trafficking_human_persons_crime,"[trafficking, human, persons, crime, slavery, ...",[ Let me first thank the United Kingdom presid...
3,2,99,2_sexual_violence_conflict_united,"[sexual, violence, conflict, united, nations, ...",[ I would like to thank the United Kingdom lea...
4,3,57,3_asean_indonesia_women_malaysia,"[asean, indonesia, women, malaysia, violence, ...",[ I have the honour to speak on behalf of the ...
5,4,56,4_chile_civilians_protection_women,"[chile, civilians, protection, women, girls, c...",[ Chile thanks the Rwandan presidency for havi...
6,5,48,5_african_union_sahel_region,"[african, union, sahel, region, africa, joint,...","[ At the outset, I wish to congratulate the de..."
7,6,45,6_sexual_violence_european_union,"[sexual, violence, european, union, conflict, ...",[ I have the honour to speak on behalf of the ...
8,7,42,7_abuse_exploitation_peacekeeping_allegations,"[abuse, exploitation, peacekeeping, allegation...","[ Thank you, Mr. President, for the opportunit..."
9,8,37,8_women_peace_security_agenda,"[women, peace, security, agenda, womens, franc...",[ I thank the French presidency for putting th...


In [28]:
# reduce topics (from 58 to 10)
topic_model.reduce_topics(docs, nr_topics=10)

# Access updated topics
topics = topic_model.topics_
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,819,-1_women_violence_sexual_security,"[women, violence, sexual, security, peace, con...",[ We are grateful to the Secretary-General for...
1,0,1038,0_women_peace_security_resolution,"[women, peace, security, resolution, conflict,...",[ I am pleased to address the Security Council...
2,1,337,1_sexual_violence_conflict_united,"[sexual, violence, conflict, united, nations, ...","[ At the outset, I would like to thank you, Ma..."
3,2,177,2_trafficking_human_persons_crime,"[trafficking, human, persons, crime, victims, ...",[ Ijoin the previous speakers in commending yo...
4,3,77,3_peacebuilding_african_peace_women,"[peacebuilding, african, peace, women, securit...","[ Madam President, China welcomes the fact tha..."
5,4,57,4_women_ukraine_security_rights,"[women, ukraine, security, rights, peace, conf...",[ Allow me to thank the Secretary-General; Ms....
6,5,24,5_displaced_women_girls_displacement,"[displaced, women, girls, displacement, refuge...","[ I thank you, Madam President, for convening ..."
7,6,13,6_belgium_sexual_violence_resolution,"[belgium, sexual, violence, resolution, women,...",[ My delegation first would like to thank Nige...
8,7,13,7_syrian_syria_sexual_women,"[syrian, syria, sexual, women, violence, terro...",[ The Syrian Government has worked hard to str...
9,8,11,8_afghanistan_afghan_women_womens,"[afghanistan, afghan, women, womens, rights, t...","[ At the outset, I would like to thank the Uni..."


In [29]:
topic_model.visualize_topics()

In [45]:
topics_over_time = topic_model.topics_over_time(docs, years)
topic_model.visualize_topics_over_time(topics_over_time, topics=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
## CHINA, DOES NOT WORK (TOO LITTLE DATA?? ONLY EVER TWO TOPICS)

In [91]:
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(vectorizer_model=vectorizer_model)

# read data
df = pd.read_csv('../data/wps_speeches.csv')
df['only text'] = df['only text'].astype(str)
china_df = df[df['country/organization'] == 'China']
docs = china_df['only text'].tolist()
years = china_df['year'].tolist()

# get topics and probs
topics, probs = topic_model.fit_transform(docs)
#topic_model.reduce_topics(docs, nr_topics=10)

# Access updated topics
#topics = topic_model.topics_
topic_model.get_topic_info()
#topic_model.visualize_topics()
topics_over_time = topic_model.topics_over_time(docs, years)
topic_model.visualize_topics_over_time(topics_over_time)


In [83]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,13,-1_women_united_sexual_violence,"[women, united, sexual, violence, nations, sec...","[ I thank you, Madam President, for holding th..."
1,0,21,0_women_united_peace_resolution,"[women, united, peace, resolution, conflict, s...","[ Thank you, Madam President, for holding this..."
2,1,10,1_sexual_violence_united_conflict,"[sexual, violence, united, conflict, council, ...",[ I join others in thanking Margot Wallstrom a...


In [None]:
# WOMEN vs. MEN

In [107]:
# Men mentioned
men_keywords = r"\b(men|man|boy|boys)\b"
# Women mentioned
women_keywords = r"\b(women|woman|girl|girls)\b"

def filter_sentences(text, keyword_pattern):
    '''get sentences with keyword pattern (men or women)'''
    sentences = nltk.sent_tokenize(text)
    matching_sentences = [sentence for sentence in sentences if re.search(keyword_pattern, sentence, re.IGNORECASE)]

    return matching_sentences

def filter_sentences_only(text, keyword_pattern, exclude_pattern=None):
    '''get sentences with exclusive keyword pattern (e.g, "women" occurs in sentence but not "men")'''
    sentences = nltk.sent_tokenize(text)
    if exclude_pattern:
        matching_sentences = [sentence for sentence in sentences if re.search(keyword_pattern, sentence, re.IGNORECASE) and not re.search(exclude_pattern, sentence, re.IGNORECASE)]
    else:
        matching_sentences = [sentence for sentence in sentences if re.search(keyword_pattern, sentence, re.IGNORECASE)]
    return matching_sentences

def filter_text(df, keyword_pattern):
    filtered_sentences = []
    df['only text'] = df['only text'].astype(str)
    for text in df['only text']:
        sentences = filter_sentences(text, keyword_pattern)
        filtered_sentences.extend(sentences)

    return filtered_sentences

def filter_text_only(df, keyword_pattern, exclude_pattern=None):
    filtered_sentences = []
    df['only text'] = df['only text'].astype(str)
    for text in df['only text']:
        sentences = filter_sentences_only(text, keyword_pattern, exclude_pattern)
        filtered_sentences.extend(sentences)

    return filtered_sentences

df_men_sentences = filter_text(df, men_keywords)
df_women_sentences = filter_text(df, women_keywords)
df_men_sentences_only = filter_text_only(df, men_keywords, exclude_pattern=women_keywords)
df_women_sentences_only = filter_text_only(df, women_keywords, exclude_pattern=men_keywords)

In [113]:
total_men_sentences = len(df_men_sentences)
men_with_women = total_men_sentences - len(df_men_sentences_only)
men_without_women = len(df_men_sentences_only)

men_with_women_percentage = (men_with_women / total_men_sentences) * 100
men_without_women_percentage = (men_without_women / total_men_sentences) * 100

total_women_sentences = len(df_women_sentences)
women_with_men = total_women_sentences - len(df_women_sentences_only)
women_without_men = len(df_women_sentences_only)

women_with_men_percentage = (women_with_men / total_women_sentences) * 100
women_without_men_percentage = (women_without_men / total_women_sentences) * 100

print(f"Amount of sentences with occurrence of 'men, man, boy, or boys': {total_men_sentences}")
print(f"Amount of sentences with occurrence of 'women, woman, girl, or girls': {total_women_sentences}")
print(f"Amount of sentences with occurrence of 'men, man, boy, or boys' (no women mentioned): {men_without_women}")
print(f"Amount of sentences with occurrence of 'women, woman, girl, or girls' (no men mentioned): {women_without_men}")
print(f"Percentage of sentences with men and women mentioned together (of men): {men_with_women_percentage:.2f}%")
print(f"Percentage of sentences with men mentioned without women: {men_without_women_percentage:.2f}%")
print(f"Percentage of sentences with women and men mentioned together (of women): {women_with_men_percentage:.2f}%")
print(f"Percentage of sentences with women mentioned without men: {women_without_men_percentage:.2f}%")

Amount of sentences with occurrence of 'men, man, boy, or boys': 1428
Amount of sentences with occurrence of 'women, woman, girl, or girls': 31439
Amount of sentences with occurrence of 'men, man, boy, or boys' (no women mentioned): 276
Amount of sentences with occurrence of 'women, woman, girl, or girls' (no men mentioned): 30287
Percentage of sentences with men and women mentioned together (of men): 80.67%
Percentage of sentences with men mentioned without women: 19.33%
Percentage of sentences with women and men mentioned together (of women): 3.66%
Percentage of sentences with women mentioned without men: 96.34%


In [118]:
### TOPICS

[' Let me thank and congratulate the presidency of the Security Council for its initiative in holding this meeting on women and armed conflict.',
 'From rape and displacement to the denial of the right to food and health care, women bear more than their fair share of the suffering.',
 'For generations women have served as peace educators, both in their families and in their societies.',
 'We in the United Nations know at first hand the invaluable support women provide to our peacekeepers, by organizing committees, non- governmental organizations and church groups that help ease tensions and by persuading their menfolk to accept peace.',
 'Partly for that reason, we are making special efforts to recruit more women for our peacekeeping and peacemaking missions, and to make all our operations more aware of gender issues.',
 'In all these areas, we have seen examples of women playing an important role - not least on my own continent, Africa.',
 'And yet the potential contribution of women 

In [123]:
topic_model = BERTopic(vectorizer_model=vectorizer_model)

## WOMEN ONLY
# get topics and probs
topics, probs = topic_model.fit_transform(df_women_sentences_only)
topic_model.visualize_topics()

In [124]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,12148,-1_conflict_women_peacekeeping_participation,"[conflict, women, peacekeeping, participation,...",[The United States calls on all Member States ...
1,0,885,0_council_councils_security_resolutions,"[council, councils, security, resolutions, man...",[Let us not forget that the issue of counterin...
2,1,729,1_nations_united_unwomen_agencies,"[nations, united, unwomen, agencies, entity, c...","[In that context, my delegation welcomes the v..."
3,2,667,2_negotiations_table_processes_negotiating,"[negotiations, table, processes, negotiating, ...",[With women's participation at the negotiating...
4,3,607,3_excluded_underrepresented_negotiations_proce...,"[excluded, underrepresented, negotiations, pro...",[Women and women's civil society organizations...
...,...,...,...,...,...
306,305,10,305_conducts_courses_training_specialized,"[conducts, courses, training, specialized, bor...",[That would be in addition to cooperation betw...
307,306,10,306_kingdom_reconvening_presidency_debate,"[kingdom, reconvening, presidency, debate, ope...","[ At the outset, we would like to thank the Un..."
308,307,10,307_elected_candidates_municipal_seats,"[elected, candidates, municipal, seats, electi...",[In local elections in Mali at the end of last...
309,308,10,308_kimoon_ban_phumzile_mlambongcuka,"[kimoon, ban, phumzile, mlambongcuka, thank, e...",[We would like also to thank the Secretary-Gen...


In [125]:
# reduce topics (from 311 to 10)
topic_model.reduce_topics(df_women_sentences_only, nr_topics=10)

In [126]:
topics = topic_model.topics_
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,12148,-1_women_peace_conflict_security,"[women, peace, conflict, security, womens, vio...",[Resolution 1325 (2000) sought to promote a ge...
1,0,14595,0_women_peace_security_womens,"[women, peace, security, womens, conflict, res...","[As partners in development, peace and securit..."
2,1,2497,1_women_violence_girls_displaced,"[women, violence, girls, displaced, sexual, co...",[China condemns all acts of violence against w...
3,2,351,2_boko_girls_haram_yazidi,"[boko, girls, haram, yazidi, daesh, raped, ira...",[Groups such as Daesh and Boko Haram have merc...
4,3,325,3_trafficking_persons_children_especially,"[trafficking, persons, children, especially, c...",[Bangladesh remains committed to international...
5,4,156,4_cent_parliament_women_half,"[cent, parliament, women, half, quota, seats, ...","[Now, 66 per cent of Government officials are ..."
6,5,84,5_cent_agreements_signatories_peace,"[cent, agreements, signatories, peace, negotia...",[The fact that women have constituted less tha...
7,6,83,6_friends_behalf_delivered_statement,"[friends, behalf, delivered, statement, group,...",[Italy aligns itself with the statement delive...
8,7,36,7_georgia_georgias_regions_occupied,"[georgia, georgias, regions, occupied, abkhazi...",[While the Government of Georgia spares no eff...
9,8,12,8_blue_helmets_training_peacekeepers,"[blue, helmets, training, peacekeepers, number...",[In our View it is crucial to train Blue Helme...


In [127]:
topic_model.visualize_topics()

In [120]:
topic_model = BERTopic(vectorizer_model=vectorizer_model)

## MEN ONLY
# get topics and probs
topics, probs = topic_model.fit_transform(df_men_sentences_only)
topic_model.get_topic_info()

In [None]:
topic_model.visualize_topics()

In [None]:
### women are much more talked about, with more variety 