# Natural Language Processing analysis of Mastodon's servers' rules

In [45]:
import pandas as pd
import numpy as np

from utils.data_preprocesser import *

## Data cleaning and preprocessing

In [46]:
df_mastodon1, df_mastodon2, df_reddit = load_instances_data(cols_to_keep_mastodon=["domain", "description", "active_month", 
                                                                      "languages", "rules", "total_users", 
                                                                      "total_posts"],
                                               cols_to_keep_reddit=["name", "title", "description", 
                                                                    "language", "subscribers", "active_user_count", 
                                                                    "rules"])
df_mastodon1 = DataCleaner(df_mastodon1).clean()
df_mastodon2 = DataCleaner(df_mastodon2).clean()

# df_reddit = DataCleaner(df_reddit)._clean_subscribers()
# df_reddit = DataCleaner(df_reddit)._clean_active_user_count()
# df_reddit = DataCleaner(df_reddit)._clean_description()


Cleaning column: description
Cleaning column: languages
Cleaning column: rules
Cleaning column: total_users
Cleaning column: total_posts
Cleaning column: description
Cleaning column: languages
Cleaning column: rules
Cleaning column: total_users
Cleaning column: total_posts


In [47]:
df_mastodon1.head(5)

Unnamed: 0,domain,description,active_month,languages,rules,total_users,total_posts
0,mastodon.social,The original server operated by the Mastodon g...,327503,[en],"[{'id': '1', 'text': 'Sexually explicit or vio...",2711879,129725014
1,mstdn.social,A general-purpose Mastodon server with a 500 c...,14546,[en],"[{'id': '2', 'text': 'Sexually explicit or vio...",260323,19904208
2,infosec.exchange,A Mastodon instance for info/cyber security-mi...,13319,[en],"[{'id': '1', 'text': 'Do unto others as you th...",75725,4296664
3,mas.to,"Hello! mas.to is a fast, up-to-date and fun Ma...",11889,[en],"[{'id': '9', 'text': 'No discrimination, inclu...",183608,10834206
4,mastodon.world,Generic Mastodon server for anyone to use.,9472,[en],"[{'id': '16', 'text': 'No illegal content, e.g...",191710,7158644


In [48]:
df_mastodon2.head(5)

Unnamed: 0,domain,description,active_month,languages,rules,total_users,total_posts
0,mastodon.social,The original server operated by the Mastodon g...,327503,[en],"[{'id': '1', 'text': 'Sexually explicit or vio...",2711879,129725014
1,mstdn.social,A general-purpose Mastodon server with a 500 c...,14546,[en],"[{'id': '2', 'text': 'Sexually explicit or vio...",260323,19904208
2,infosec.exchange,A Mastodon instance for info/cyber security-mi...,13319,[en],"[{'id': '1', 'text': 'Do unto others as you th...",75725,4296664
3,mas.to,"Hello! mas.to is a fast, up-to-date and fun Ma...",11889,[en],"[{'id': '9', 'text': 'No discrimination, inclu...",183608,10834206
4,piaille.fr,Piaille.fr est un serveur mastodon public fran...,9221,[fr],"[{'id': '1', 'text': 'Conformément aux lois fr...",40869,3220626


In [49]:
df_reddit.head()

Unnamed: 0,name,title,description,language,subscribers,active_user_count,rules
0,Home,Home,,en,307843,51,
1,AskReddit,Ask Reddit...,r/AskReddit is the place to ask and answer tho...,es,54701432,9986,Rule 1 - Questions must be clear and direct an...
2,NoStupidQuestions,No such thing as stupid questions,Ask away!\n\nDisclaimer: This is an anonymous ...,en,6021303,4872,Top level comments must contain a genuine huma...
3,BaldursGate3,Baldur's Gate 3,"A community all about Baldur's Gate III, the r...",en,3118378,1384,Be civil to one another.; Respect the opinions...
4,facepalm,now double verified,/r/facepalm - please sir can I have some more?,en,8145733,2345,"No uncivil, bigoted, misogynist, misandrist, r..."


In [50]:
datasets = [df_mastodon1, df_mastodon2, df_reddit]

In [51]:
from utils.rules_extractor import *
from utils.utils import *

In [52]:
df_mastodon1_en = df_mastodon1[df_mastodon1['languages'].apply(lambda x: x == ['en'] if isinstance(x, list) else False)]
df_mastodon2_en = df_mastodon2[df_mastodon2['languages'].apply(lambda x: x == ['en'] if isinstance(x, list) else False)]
df_reddit_en =  df_reddit[df_reddit['language'].apply(lambda x: x == 'en' if isinstance(x, str) else False)]

display(compare_languages(df_mastodon1, df_mastodon2, df_reddit))

Unnamed: 0,mastodon1,mastodon2,reddit
0,'en' : 234,'en' : 94,'en' : 99
1,'de' : 33,'de' : 16,'es' : 1
2,'fr' : 13,'fr' : 7,
3,'es' : 9,'ko' : 3,
4,'it' : 5,'es' : 3,
5,'ko' : 5,'nl' : 2,
6,'nl' : 4,'it' : 2,
7,'pt-BR' : 3,'he' : 1,
8,'pl' : 2,'gd' : 1,
9,'et' : 2,'pl' : 1,


In [53]:
df_mastodon1_en

Unnamed: 0,domain,description,active_month,languages,rules,total_users,total_posts
0,mastodon.social,The original server operated by the Mastodon g...,327503,[en],"[{'id': '1', 'text': 'Sexually explicit or vio...",2711879,129725014
1,mstdn.social,A general-purpose Mastodon server with a 500 c...,14546,[en],"[{'id': '2', 'text': 'Sexually explicit or vio...",260323,19904208
2,infosec.exchange,A Mastodon instance for info/cyber security-mi...,13319,[en],"[{'id': '1', 'text': 'Do unto others as you th...",75725,4296664
3,mas.to,"Hello! mas.to is a fast, up-to-date and fun Ma...",11889,[en],"[{'id': '9', 'text': 'No discrimination, inclu...",183608,10834206
4,mastodon.world,Generic Mastodon server for anyone to use.,9472,[en],"[{'id': '16', 'text': 'No illegal content, e.g...",191710,7158644
...,...,...,...,...,...,...,...
321,darticulate.com,"A Mastodon instance for Dartisans, run by myse...",10,[en],"[{'id': '2', 'text': 'This server is intended ...",43,22
322,dariox.club,A safe space for tech-centered LGBTQI+ folks a...,9,[en],"[{'id': '2', 'text': 'No illegal content', 'hi...",150,3872
326,kjas.no,Server for brukere med litt ekstra interesse f...,3,[en],"[{'id': '1', 'text': 'Vær ålreit med hverandre...",24,284
327,jaxbeach.social,Local server based in Jacksonville Beach Flori...,3,[en],"[{'id': '1', 'text': 'No Illegal content', 'hi...",17,37


### First Mastodon dataset

In [54]:
rules = df_mastodon1_en[['rules']].explode('rules').reset_index(drop=False)
rules = rules.rename(columns={"index": "server_id"})
rules = rules.dropna()
rules = pd.concat([rules.drop(['rules'], axis=1), rules['rules'].apply(pd.Series)], axis=1)
rules = rules.rename(columns={'id': "rule_id"})
rules

Unnamed: 0,server_id,rule_id,text,hint
0,0,1,Sexually explicit or violent media must be mar...,This includes content that is particularly pro...
1,0,2,"No racism, sexism, homophobia, transphobia, ab...",Transphobic behavior such as intentional misge...
2,0,3,No incitement of violence or promotion of viol...,Calling for people or groups to be assassinate...
3,0,4,"No harassment, block evasion, dogpiling, or do...",Repeat attempts to communicate with users who ...
4,0,7,Do not share information widely-known to be fa...,False and misleading information and links fro...
...,...,...,...,...
1912,329,77,③ 同意呜呜站的社区规则？/ Do you agree with the community...,
1913,329,78,🌸 缺项或错误将被拒绝 ... / Incomplete or nonsensical re...,
1914,329,79,"详细社区规则见： / For detailed community rules, pleas...",
1915,329,80,https://wxw.moe/about,


In [55]:
# remove rules that are not written in english
df_english = rules[rules['text'].apply(is_english)].reset_index(drop=True)

non_english_rules_pourcentage = 100 * (rules.shape[0] - df_english.shape[0]) / rules.shape[0]

print(f" We removed {rules.shape[0] - df_english.shape[0]} of the {rules.shape[0]} rules ({non_english_rules_pourcentage:.0f}%) that were not detected to be in english.")

 We removed 499 of the 1912 rules (26%) that were not detected to be in english.


In [56]:
# Detect the language of the description among the supposedly engish servers
server_descr_english = df_mastodon1_en[df_mastodon1_en['description'].apply(is_english)].index
server_descr_not_english = df_mastodon1_en[~df_mastodon1_en['description'].apply(is_english)].index

# Count them
print(f"{len(server_descr_english)} servers have their description in english;")
print(f"{len(server_descr_not_english)} servers have their description in another language.")

# Display the rules of the servers that have their description in another language than english
suspicious_server_ids = [serv_id for serv_id in df_english.server_id.unique() if serv_id in server_descr_not_english]
df_english[df_english['server_id'].isin(suspicious_server_ids)]

188 servers have their description in english;
46 servers have their description in another language.


Unnamed: 0,server_id,rule_id,text,hint
215,48,12,Treat others with respect,"Avoid any form of insult, bullying, or discrim..."
216,48,15,Comply with applicable law,Do not publish any illegal content.
217,48,16,Avoid misinformation,Do not share false or misleading information.
218,48,19,Mark sensitive content (CW),Please mark any content that may contain commo...
219,48,20,You must be older than 16 years,"According to European law, you must be at leas..."
...,...,...,...,...
1408,329,76,② 简答 ACGN 的含义？/ What does ACGN stand for?,
1409,329,77,③ 同意呜呜站的社区规则？/ Do you agree with the community...,
1410,329,78,🌸 缺项或错误将被拒绝 ... / Incomplete or nonsensical re...,
1411,329,79,"详细社区规则见： / For detailed community rules, pleas...",


In [57]:
# We decide to remove 4 servers based on their rules language
manually_excluded = [52, 180, 230, 329] #manually inspected their rules and decide to remove the server since most of their rules contained words not in english.
df_english = df_english[~df_english['server_id'].isin(manually_excluded)]
df_english

Unnamed: 0,server_id,rule_id,text,hint
0,0,1,Sexually explicit or violent media must be mar...,This includes content that is particularly pro...
1,0,3,No incitement of violence or promotion of viol...,Calling for people or groups to be assassinate...
2,0,4,"No harassment, block evasion, dogpiling, or do...",Repeat attempts to communicate with users who ...
3,0,7,Do not share information widely-known to be fa...,False and misleading information and links fro...
4,0,1008,"Content created by others must be attributed, ...",Content created by others must clearly provide...
...,...,...,...,...
1397,322,21,This instance is under the jurisdiction of Wes...,
1398,327,2,No incitement of violence or promotion of viol...,
1399,327,3,"No harassment, block evasion, dogpiling, or do...",
1400,327,4,Do not share information widely-known to be fa...,


In [59]:
# Process te rules for NLP
processor = RulesProcessor(df_mastodon1_en)
rules_df = processor.extract_rules()
standardized_df = processor.standardize_rules()

# Compute strictness metrics TODO: define metrics and compute them
standardized_df["strict_rule"] = standardized_df["text"].apply(contains_strict_words)
standardized_df

Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, or, violent, media, must,...","[this, includes, content, that, is, particular...",False
1,0,2,"[no, racism, sexism, homophobia, transphobia, ...","[transphobic, behavior, such, as, intentional,...",True
2,0,3,"[no, incitement, of, violence, or, promotion, ...","[calling, for, people, or, groups, to, be, ass...",True
3,0,4,"[no, harassment, block, evasion, dogpiling, or...","[repeat, attempts, to, communicate, with, user...",True
4,0,7,"[do, not, share, information, widely, known, t...","[false, and, misleading, information, and, lin...",False
...,...,...,...,...,...
1912,329,77,"[do, you, agree, with, the, community, rules, ...",[],False
1913,329,78,"[incomplete, or, nonsensical, responses, will,...",[],False
1914,329,79,"[for, detailed, community, rules, please, see]",[],False
1915,329,80,"[https, wxw, moe, about]",[],False


In [61]:
# Keep only the rows from standardized_df that have a matching server_id and rule_id in df_english
standardized_df = standardized_df.merge(df_english[["server_id", "rule_id"]], on=["server_id", "rule_id"], how='inner')
standardized_df

Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, or, violent, media, must,...","[this, includes, content, that, is, particular...",False
1,0,3,"[no, incitement, of, violence, or, promotion, ...","[calling, for, people, or, groups, to, be, ass...",True
2,0,4,"[no, harassment, block, evasion, dogpiling, or...","[repeat, attempts, to, communicate, with, user...",True
3,0,7,"[do, not, share, information, widely, known, t...","[false, and, misleading, information, and, lin...",False
4,0,1008,"[content, created, by, others, must, be, attri...","[content, created, by, others, must, clearly, ...",False
...,...,...,...,...,...
1390,322,21,"[this, instance, is, under, the, jurisdiction,...",[],False
1391,327,2,"[no, incitement, of, violence, or, promotion, ...",[],True
1392,327,3,"[no, harassment, block, evasion, dogpiling, or...",[],True
1393,327,4,"[do, not, share, information, widely, known, t...",[],False


In [63]:
# Natural language processing
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

# Download stopwords if not already done
nltk.download('stopwords')

# Set of English stopwords
stop_words = set(stopwords.words('english')).union(set(stopwords.words('french'))).union(set(stopwords.words('german'))).union(set(stopwords.words('spanish')))

# Remove stop words from the tokenized lists
standardized_df["text"] = standardized_df["text"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)
standardized_df["hint"] = standardized_df["hint"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)

# Lemmatize tokens
# No need to lemmatize for berTOPIC
#standardized_df["text"] = standardized_df["text"].apply(lemmatize)
#standardized_df["hint"] = standardized_df["hint"].apply(lemmatize)

standardized_df

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/eglantinevialaneix/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, violent, media, must, mar...","[includes, content, particularly, provocative,...",False
1,0,3,"[incitement, violence, promotion, violent, ide...","[calling, people, groups, assassinated, murder...",True
2,0,4,"[harassment, block, evasion, dogpiling, doxxin...","[repeat, attempts, communicate, users, blocked...",True
3,0,7,"[share, information, widely, known, false, mis...","[false, misleading, information, links, low, q...",False
4,0,1008,"[content, created, others, must, attributed, u...","[content, created, others, must, clearly, prov...",False
...,...,...,...,...,...
1390,322,21,"[instance, jurisdiction, western, australia, p...",[],False
1391,327,2,"[incitement, violence, promotion, violent, ide...",[],True
1392,327,3,"[harassment, block, evasion, dogpiling, doxxin...",[],True
1393,327,4,"[share, information, widely, known, false, mis...",[],False


In [64]:
#tokenize each text and each hint
nlp_df = standardized_df.copy()

nlp_df["document"] = nlp_df.apply(create_document, axis=1)

vocab = nlp_df["document"].explode().unique()
nlp_df["document"].explode().value_counts()

document
content          396
must             174
accounts         140
violent          133
users            127
                ... 
contributors       1
disrespectful      1
file               1
famichiki          1
jaxbeach           1
Name: count, Length: 2726, dtype: int64

In [65]:
# TF-IDF scores
# Join the tokens back into strings
nlp_df['document_str'] = nlp_df['document'].apply(lambda x: ' '.join(x))

# Create and fit TF-IDF Vectorizer
tfidf_vect = TfidfVectorizer()
tfidf_matrix = tfidf_vect.fit_transform(nlp_df['document_str'])

# Get feature names (words)
feature_names = tfidf_vect.get_feature_names_out()

tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
    

TF-IDF matrix shape: (1395, 2711)


In [66]:
# Display top terms with highest average TF-IDF scores
mean_tfidf = tfidf_matrix.mean(axis=0).A1
term_scores = pd.DataFrame({'term': feature_names, 'score': mean_tfidf})
print("Top 10 terms by average TF-IDF score:")
print(term_scores.sort_values('score', ascending=False).head(10))

Top 10 terms by average TF-IDF score:
            term     score
540      content  0.042975
2623     violent  0.030025
1922   promotion  0.027371
2622    violence  0.027367
1136  harassment  0.026950
2582       users  0.026153
1599        must  0.023212
1248  incitement  0.022352
1214  ideologies  0.021237
784      doxxing  0.020395


In [88]:
# Display the top TF-IDF words for a few sample documents
sample_indices = np.random.choice(tfidf.index, size=5, replace=False)
for idx in sample_indices:
    print(f"Top TF-IDF words for document {idx}:")
    doc_tfidf = tfidf.iloc[idx]
    top_words = doc_tfidf.nlargest(5)
    print(top_words, '\n')

Top TF-IDF words for document 788:
nsfw          0.418596
borderline    0.361127
construed     0.361127
glance        0.347197
could         0.327563
Name: 788, dtype: float64 

Top TF-IDF words for document 1515:
gratuitous    0.412529
serves        0.412529
obscenity     0.396616
practical     0.396616
benefit       0.384273
Name: 1515, dtype: float64 

Top TF-IDF words for document 921:
ban            0.414406
rules          0.308624
authorities    0.257913
depending      0.257913
failure        0.257913
Name: 921, dtype: float64 

Top TF-IDF words for document 364:
accounts       0.488438
joke           0.338930
necessarily    0.338930
remote         0.338930
celebrity      0.321453
Name: 364, dtype: float64 

Top TF-IDF words for document 873:
intentionally    0.484170
false            0.449582
misleading       0.449582
share            0.444260
information      0.404926
Name: 873, dtype: float64 



In [68]:
# Topic modeling
from bertopic import BERTopic

topic_model = BERTopic()
topics, probs = topic_model.fit_transform(nlp_df['document_str'])

In [69]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,310,-1_spam_accounts_advertising_racism,"[spam, accounts, advertising, racism, transpho...",[respectful racism sexism homophobia transphob...
1,0,107,0_content_nsfw_warning_cw,"[content, nsfw, warning, cw, warnings, adult, ...","[nsfw without content warning, posts contain n..."
2,1,92,1_harassment_threats_harass_bullying,"[harassment, threats, harass, bullying, kind, ...","[harassment bullying doxxing users, harassment..."
3,2,78,2_ideologies_incitement_violence_promotion,"[ideologies, incitement, violence, promotion, ...",[incitement violence promotion violent ideolog...
4,3,69,3_illegal_laws_content_states,"[illegal, laws, content, states, united, copyr...","[content illegal united states, content illega..."
5,4,55,4_bots_bot_unlisted_automated,"[bots, bot, unlisted, automated, post, account...",[automated bot accounts must enable bot flag a...
6,5,46,5_respect_treat_respectful_everyone,"[respect, treat, respectful, everyone, welcomi...","[always kind polite treat others respect, trea..."
7,6,44,6_report_rules_admins_admin,"[report, rules, admins, admin, team, moderatio...","[please report violations rules admin, please ..."
8,7,38,7_impersonation_parody_clearly_impersonate,"[impersonation, parody, clearly, impersonate, ...",[impersonation individuals public figures orga...
9,8,36,8_dogpiling_doxxing_harassment_users,"[dogpiling, doxxing, harassment, users, contac...","[harassment dogpiling doxxing users, harassmen..."


### Second Mastodon dataset

In [89]:
rules = df_mastodon2_en[['rules']].explode('rules').reset_index(drop=False)
rules = rules.rename(columns={"index": "server_id"})
rules = rules.dropna()
rules = pd.concat([rules.drop(['rules'], axis=1), rules['rules'].apply(pd.Series)], axis=1)
rules = rules.rename(columns={'id': "rule_id"})
rules

Unnamed: 0,server_id,rule_id,text,hint
0,0,1,Sexually explicit or violent media must be mar...,This includes content that is particularly pro...
1,0,2,"No racism, sexism, homophobia, transphobia, ab...",Transphobic behavior such as intentional misge...
2,0,3,No incitement of violence or promotion of viol...,Calling for people or groups to be assassinate...
3,0,4,"No harassment, block evasion, dogpiling, or do...",Repeat attempts to communicate with users who ...
4,0,7,Do not share information widely-known to be fa...,False and misleading information and links fro...
...,...,...,...,...
803,139,77,③ 同意呜呜站的社区规则？/ Do you agree with the community...,
804,139,78,🌸 缺项或错误将被拒绝 ... / Incomplete or nonsensical re...,
805,139,79,"详细社区规则见： / For detailed community rules, pleas...",
806,139,80,https://wxw.moe/about,


In [90]:
# remove rules that are not written in english
df_english = rules[rules['text'].apply(is_english)].reset_index(drop=True)

non_english_rules_pourcentage = 100 * (rules.shape[0] - df_english.shape[0]) / rules.shape[0]

print(f" We removed {rules.shape[0] - df_english.shape[0]} of the {rules.shape[0]} rules ({non_english_rules_pourcentage:.0f}%) that were not detected to be in english.")

 We removed 221 of the 807 rules (27%) that were not detected to be in english.


In [91]:
# Detect the language of the description among the supposedly engish servers
server_descr_english = df_mastodon2_en[df_mastodon2_en['description'].apply(is_english)].index
server_descr_not_english = df_mastodon2_en[~df_mastodon2_en['description'].apply(is_english)].index

# Count them
print(f"{len(server_descr_english)} servers have their description in english;")
print(f"{len(server_descr_not_english)} servers have their description in another language.")

# Display the rules of the servers that have their description in another language than english
suspicious_server_ids = [serv_id for serv_id in df_english.server_id.unique() if serv_id in server_descr_not_english]
df_english[df_english['server_id'].isin(suspicious_server_ids)]

79 servers have their description in english;
15 servers have their description in another language.


Unnamed: 0,server_id,rule_id,text,hint
142,31,12,Treat others with respect,"Avoid any form of insult, bullying, or discrim..."
143,31,15,Comply with applicable law,Do not publish any illegal content.
144,31,16,Avoid misinformation,Do not share false or misleading information.
145,31,19,Mark sensitive content (CW),Please mark any content that may contain commo...
146,31,20,You must be older than 16 years,"According to European law, you must be at leas..."
151,33,3,No al machismo,
152,33,9,No ai bot*,"* in generale è una regola da tenere presente,..."
363,93,21,Don't be an asshole. You have an opinion? That...,
364,93,23,"Harassment, stalking, doxxing, transmisogyny, ...",
365,93,25,"Violent nationalist propaganda, Nazi symbolism...",


In [102]:
# We decide to remove some servers based on their rules language
manually_excluded = [33, 139] #manually inspected their rules and decide to remove the server since most of their rules contained words not in english.
df_english = df_english[~df_english['server_id'].isin(manually_excluded)]
df_english

Unnamed: 0,server_id,rule_id,text,hint
0,0,1,Sexually explicit or violent media must be mar...,This includes content that is particularly pro...
1,0,3,No incitement of violence or promotion of viol...,Calling for people or groups to be assassinate...
2,0,4,"No harassment, block evasion, dogpiling, or do...",Repeat attempts to communicate with users who ...
3,0,7,Do not share information widely-known to be fa...,False and misleading information and links fro...
4,0,1008,"Content created by others must be attributed, ...",Content created by others must clearly provide...
...,...,...,...,...
570,137,16,This instance is made for individuals older th...,
571,137,17,Brand accounts are not allowed. This instance ...,
572,137,18,Do not register your account via Tor/VPN/Hosti...,
573,137,20,English Only. If you sign up with the reason n...,


In [103]:
# Process te rules for NLP
processor = RulesProcessor(df_mastodon2_en)
rules_df = processor.extract_rules()
standardized_df = processor.standardize_rules()

# Compute strictness metrics TODO: define metrics and compute them
standardized_df["strict_rule"] = standardized_df["text"].apply(contains_strict_words)
standardized_df

Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, or, violent, media, must,...","[this, includes, content, that, is, particular...",False
1,0,2,"[no, racism, sexism, homophobia, transphobia, ...","[transphobic, behavior, such, as, intentional,...",True
2,0,3,"[no, incitement, of, violence, or, promotion, ...","[calling, for, people, or, groups, to, be, ass...",True
3,0,4,"[no, harassment, block, evasion, dogpiling, or...","[repeat, attempts, to, communicate, with, user...",True
4,0,7,"[do, not, share, information, widely, known, t...","[false, and, misleading, information, and, lin...",False
...,...,...,...,...,...
803,139,77,"[do, you, agree, with, the, community, rules, ...",[],False
804,139,78,"[incomplete, or, nonsensical, responses, will,...",[],False
805,139,79,"[for, detailed, community, rules, please, see]",[],False
806,139,80,"[https, wxw, moe, about]",[],False


In [104]:
# Keep only the rows from standardized_df that have a matching server_id and rule_id in df_english
standardized_df = standardized_df.merge(df_english[["server_id", "rule_id"]], on=["server_id", "rule_id"], how='inner')
standardized_df

Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, or, violent, media, must,...","[this, includes, content, that, is, particular...",False
1,0,3,"[no, incitement, of, violence, or, promotion, ...","[calling, for, people, or, groups, to, be, ass...",True
2,0,4,"[no, harassment, block, evasion, dogpiling, or...","[repeat, attempts, to, communicate, with, user...",True
3,0,7,"[do, not, share, information, widely, known, t...","[false, and, misleading, information, and, lin...",False
4,0,1008,"[content, created, by, others, must, be, attri...","[content, created, by, others, must, clearly, ...",False
...,...,...,...,...,...
568,137,16,"[this, instance, is, made, for, individuals, o...",[],False
569,137,17,"[brand, accounts, are, not, allowed, this, ins...",[],False
570,137,18,"[do, not, register, your, account, via, tor, v...",[],False
571,137,20,"[english, only, if, you, sign, up, with, the, ...",[],False


In [105]:
# Natural language processing
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

# Download stopwords if not already done
nltk.download('stopwords')

# Set of English stopwords
stop_words = set(stopwords.words('english')).union(set(stopwords.words('french'))).union(set(stopwords.words('german'))).union(set(stopwords.words('spanish')))

# Remove stop words from the tokenized lists
standardized_df["text"] = standardized_df["text"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)
standardized_df["hint"] = standardized_df["hint"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)

# Lemmatize tokens
# No need to lemmatize for berTOPIC
#standardized_df["text"] = standardized_df["text"].apply(lemmatize)
#standardized_df["hint"] = standardized_df["hint"].apply(lemmatize)

standardized_df

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/eglantinevialaneix/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, violent, media, must, mar...","[includes, content, particularly, provocative,...",False
1,0,3,"[incitement, violence, promotion, violent, ide...","[calling, people, groups, assassinated, murder...",True
2,0,4,"[harassment, block, evasion, dogpiling, doxxin...","[repeat, attempts, communicate, users, blocked...",True
3,0,7,"[share, information, widely, known, false, mis...","[false, misleading, information, links, low, q...",False
4,0,1008,"[content, created, others, must, attributed, u...","[content, created, others, must, clearly, prov...",False
...,...,...,...,...,...
568,137,16,"[instance, made, individuals, older, 18yo, acc...",[],False
569,137,17,"[brand, accounts, allowed, instance, individuals]",[],False
570,137,18,"[register, account, via, tor, vpn, hosting, pr...",[],False
571,137,20,"[english, sign, reason, english, application, ...",[],False


In [106]:
#tokenize each text and each hint
nlp_df = standardized_df.copy()

nlp_df["document"] = nlp_df.apply(create_document, axis=1)

vocab = nlp_df["document"].explode().unique()
nlp_df["document"].explode().value_counts()

document
content        177
must            79
promotion       56
accounts        52
violent         52
              ... 
bring            1
reflect          1
responsible      1
invites          1
australian       1
Name: count, Length: 1616, dtype: int64

In [107]:
# TF-IDF scores
# Join the tokens back into strings
nlp_df['document_str'] = nlp_df['document'].apply(lambda x: ' '.join(x))

# Create and fit TF-IDF Vectorizer
tfidf_vect = TfidfVectorizer()
tfidf_matrix = tfidf_vect.fit_transform(nlp_df['document_str'])

# Get feature names (words)
feature_names = tfidf_vect.get_feature_names_out()

tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
    

TF-IDF matrix shape: (573, 1609)


In [108]:
# Display top terms with highest average TF-IDF scores
mean_tfidf = tfidf_matrix.mean(axis=0).A1
term_scores = pd.DataFrame({'term': feature_names, 'score': mean_tfidf})
print("Top 10 terms by average TF-IDF score:")
print(term_scores.sort_values('score', ascending=False).head(10))

Top 10 terms by average TF-IDF score:
            term     score
322      content  0.049851
1555     violent  0.029528
1134   promotion  0.028996
1554    violence  0.027639
945         must  0.026209
662   harassment  0.025530
1531       users  0.024257
737   incitement  0.022650
712   ideologies  0.021178
539     explicit  0.019201


In [109]:
# Display the top TF-IDF words for a few sample documents
sample_indices = np.random.choice(tfidf.index, size=5, replace=False)
for idx in sample_indices:
    print(f"Top TF-IDF words for document {idx}:")
    doc_tfidf = tfidf.iloc[idx]
    top_words = doc_tfidf.nlargest(5)
    print(top_words, '\n')

Top TF-IDF words for document 305:
characteristic    0.359952
considerate       0.322487
identity          0.322487
nationality       0.322487
orientation       0.310426
Name: 305, dtype: float64 

Top TF-IDF words for document 559:
fediblock    0.476019
added        0.238009
filters      0.238009
hatred       0.238009
honors       0.238009
Name: 559, dtype: float64 

Top TF-IDF words for document 443:
islamophobia    0.462459
casteism        0.414325
semitism        0.414325
anti            0.338033
homophobia      0.292854
Name: 443, dtype: float64 

Top TF-IDF words for document 164:
friendly    0.707107
polite      0.707107
1044123     0.000000
13          0.000000
16          0.000000
Name: 164, dtype: float64 

Top TF-IDF words for document 338:
people     0.515307
hit        0.398281
inquire    0.398281
lives      0.374032
pry        0.374032
Name: 338, dtype: float64 



In [110]:
# Topic modeling
from bertopic import BERTopic

topic_model = BERTopic()
topics, probs = topic_model.fit_transform(nlp_df['document_str'])

In [111]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,20,-1_languages_language_english_servers,"[languages, language, english, servers, deepl,...",[allowed languages german english main languag...
1,0,131,0_racism_hate_transphobia_speech,"[racism, hate, transphobia, speech, sexism, di...",[respectful racism sexism homophobia transphob...
2,1,97,1_content_sensitive_explicit_nsfw,"[content, sensitive, explicit, nsfw, media, wa...",[explicit nsfw content without content warning...
3,2,72,2_spam_accounts_advertising_commercial,"[spam, accounts, advertising, commercial, acco...",[commercial accounts advertising excessive pro...
4,3,65,3_illegal_content_united_post,"[illegal, content, united, post, laws, kingdom...","[content illegal united kingdom, content illeg..."
5,4,39,4_false_misleading_information_intentionally,"[false, misleading, information, intentionally...",[share intentionally false misleading informat...
6,5,36,5_violence_incitement_ideologies_violent,"[violence, incitement, ideologies, violent, pr...",[incitement violence promotion violent ideolog...
7,6,29,6_rules_admins_report_admin,"[rules, admins, report, admin, moderators, ser...",[hoosier social admins unilateral authority ju...
8,7,24,7_doxxing_dogpiling_users_harassment,"[doxxing, dogpiling, users, harassment, inform...","[harassment dogpiling doxxing users, harassmen..."
9,8,19,8_parody_impersonation_clearly_unless,"[parody, impersonation, clearly, unless, accou...",[impersonation individuals public figures orga...


### Reddit dataset

In [112]:
df_reddit_en

Unnamed: 0,name,title,description,language,subscribers,active_user_count,rules
0,Home,Home,,en,307843,51,
2,NoStupidQuestions,No such thing as stupid questions,Ask away!\n\nDisclaimer: This is an anonymous ...,en,6021303,4872,Top level comments must contain a genuine huma...
3,BaldursGate3,Baldur's Gate 3,"A community all about Baldur's Gate III, the r...",en,3118378,1384,Be civil to one another.; Respect the opinions...
4,facepalm,now double verified,/r/facepalm - please sir can I have some more?,en,8145733,2345,"No uncivil, bigoted, misogynist, misandrist, r..."
5,interestingasfuck,Interesting As Fuck,For anything truly interesting as fuck,en,13961838,4038,Posts MUST be INTERESTING AS FUCK!; No Politic...
...,...,...,...,...,...,...,...
95,SteamDeck,Steam Deck,The Unofficial Subreddit for the Valve Steam D...,en,903284,446,Be Kind Or Get Banned; Posts must be about or ...
96,college,College,The subreddit for discussion related to colleg...,en,2911405,76,Do not post spam or surveys.; Do not post anyt...
97,manga,"/r/manga: manga, on reddit.",Everything and anything manga! (manhwa/manhua...,en,4725267,1480,Disrespectful; Follow submission guidelines wh...
98,CrazyFuckingVideos,CrazyFuckingVideos,Crazy fucking videos for your viewing pleasure,en,2240152,799,Follow Reddit's TOS; Be civil; Must be a Crazy...


In [113]:
rules = df_reddit_en['rules']
rules = rules.dropna()
rules

2     Top level comments must contain a genuine huma...
3     Be civil to one another.; Respect the opinions...
4     No uncivil, bigoted, misogynist, misandrist, r...
5     Posts MUST be INTERESTING AS FUCK!; No Politic...
6     Only "Damnthatsinteresting" content; Use descr...
                            ...                        
95    Be Kind Or Get Banned; Posts must be about or ...
96    Do not post spam or surveys.; Do not post anyt...
97    Disrespectful; Follow submission guidelines wh...
98    Follow Reddit's TOS; Be civil; Must be a Crazy...
99    Don't be a dick; No hateful insults; Repost; N...
Name: rules, Length: 98, dtype: object

In [115]:
# remove rules that are not written in english
df_english = rules[rules.apply(is_english)].reset_index(drop=True)

non_english_rules_pourcentage = 100 * (rules.shape[0] - df_english.shape[0]) / rules.shape[0]

print(f" We removed {rules.shape[0] - df_english.shape[0]} of the {rules.shape[0]} rules ({non_english_rules_pourcentage:.0f}%) that were not detected to be in english.")

 We removed 1 of the 98 rules (1%) that were not detected to be in english.


In [118]:
def r_remove_empty(x):
    if isinstance(x, list) and len(x) > 0:
        return [item for item in x if item != '']
    return x

def r_standardize_text(df_column):
    df_column = df_column.apply(lambda x: x.strip() if isinstance(x, str) else x)
    df_column = df_column.str.lower()
    df_column = df_column.str.replace(r"[^a-zA-Z0-9\s]", " ", regex=True)
    df_column = df_column.str.replace(r"\s+", " ", regex=True).str.strip()
    df_column = df_column.str.split(" ")
    df_column = df_column.apply(r_remove_empty)
    return df_column

In [121]:
type(df_english)

pandas.core.series.Series

In [119]:
# Process te rules for NLP
standardized_df = df_english.apply(r_standardize_text)

# Compute strictness metrics TODO: define metrics and compute them
standardized_df["strict_rule"] = standardized_df["text"].apply(contains_strict_words)
standardized_df

AttributeError: 'str' object has no attribute 'apply'

In [None]:
# Keep only the rows from standardized_df that have a matching server_id and rule_id in df_english
standardized_df = standardized_df.merge(df_english[["server_id", "rule_id"]], on=["server_id", "rule_id"], how='inner')
standardized_df

Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, or, violent, media, must,...","[this, includes, content, that, is, particular...",False
1,0,3,"[no, incitement, of, violence, or, promotion, ...","[calling, for, people, or, groups, to, be, ass...",True
2,0,4,"[no, harassment, block, evasion, dogpiling, or...","[repeat, attempts, to, communicate, with, user...",True
3,0,7,"[do, not, share, information, widely, known, t...","[false, and, misleading, information, and, lin...",False
4,0,1008,"[content, created, by, others, must, be, attri...","[content, created, by, others, must, clearly, ...",False
...,...,...,...,...,...
1390,322,21,"[this, instance, is, under, the, jurisdiction,...",[],False
1391,327,2,"[no, incitement, of, violence, or, promotion, ...",[],True
1392,327,3,"[no, harassment, block, evasion, dogpiling, or...",[],True
1393,327,4,"[do, not, share, information, widely, known, t...",[],False


In [None]:
# Natural language processing
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

# Download stopwords if not already done
nltk.download('stopwords')

# Set of English stopwords
stop_words = set(stopwords.words('english')).union(set(stopwords.words('french'))).union(set(stopwords.words('german'))).union(set(stopwords.words('spanish')))

# Remove stop words from the tokenized lists
standardized_df["text"] = standardized_df["text"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)
standardized_df["hint"] = standardized_df["hint"].apply(lambda words: [word for word in words if word not in stop_words] if isinstance(words, list) else words)

# Lemmatize tokens
# No need to lemmatize for berTOPIC
#standardized_df["text"] = standardized_df["text"].apply(lemmatize)
#standardized_df["hint"] = standardized_df["hint"].apply(lemmatize)

standardized_df

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/eglantinevialaneix/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,server_id,rule_id,text,hint,strict_rule
0,0,1,"[sexually, explicit, violent, media, must, mar...","[includes, content, particularly, provocative,...",False
1,0,3,"[incitement, violence, promotion, violent, ide...","[calling, people, groups, assassinated, murder...",True
2,0,4,"[harassment, block, evasion, dogpiling, doxxin...","[repeat, attempts, communicate, users, blocked...",True
3,0,7,"[share, information, widely, known, false, mis...","[false, misleading, information, links, low, q...",False
4,0,1008,"[content, created, others, must, attributed, u...","[content, created, others, must, clearly, prov...",False
...,...,...,...,...,...
1390,322,21,"[instance, jurisdiction, western, australia, p...",[],False
1391,327,2,"[incitement, violence, promotion, violent, ide...",[],True
1392,327,3,"[harassment, block, evasion, dogpiling, doxxin...",[],True
1393,327,4,"[share, information, widely, known, false, mis...",[],False


In [None]:
#tokenize each text and each hint
nlp_df = standardized_df.copy()

nlp_df["document"] = nlp_df.apply(create_document, axis=1)

vocab = nlp_df["document"].explode().unique()
nlp_df["document"].explode().value_counts()

document
content          396
must             174
accounts         140
violent          133
users            127
                ... 
contributors       1
disrespectful      1
file               1
famichiki          1
jaxbeach           1
Name: count, Length: 2726, dtype: int64

In [None]:
# TF-IDF scores
# Join the tokens back into strings
nlp_df['document_str'] = nlp_df['document'].apply(lambda x: ' '.join(x))

# Create and fit TF-IDF Vectorizer
tfidf_vect = TfidfVectorizer()
tfidf_matrix = tfidf_vect.fit_transform(nlp_df['document_str'])

# Get feature names (words)
feature_names = tfidf_vect.get_feature_names_out()

tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
    

TF-IDF matrix shape: (1395, 2711)


In [None]:
# Display top terms with highest average TF-IDF scores
mean_tfidf = tfidf_matrix.mean(axis=0).A1
term_scores = pd.DataFrame({'term': feature_names, 'score': mean_tfidf})
print("Top 10 terms by average TF-IDF score:")
print(term_scores.sort_values('score', ascending=False).head(10))

Top 10 terms by average TF-IDF score:
            term     score
540      content  0.042975
2623     violent  0.030025
1922   promotion  0.027371
2622    violence  0.027367
1136  harassment  0.026950
2582       users  0.026153
1599        must  0.023212
1248  incitement  0.022352
1214  ideologies  0.021237
784      doxxing  0.020395


In [None]:
# Display the top TF-IDF words for a few sample documents
sample_indices = np.random.choice(tfidf.index, size=5, replace=False)
for idx in sample_indices:
    print(f"Top TF-IDF words for document {idx}:")
    doc_tfidf = tfidf.iloc[idx]
    top_words = doc_tfidf.nlargest(5)
    print(top_words, '\n')

Top TF-IDF words for document 788:
nsfw          0.418596
borderline    0.361127
construed     0.361127
glance        0.347197
could         0.327563
Name: 788, dtype: float64 

Top TF-IDF words for document 1515:
gratuitous    0.412529
serves        0.412529
obscenity     0.396616
practical     0.396616
benefit       0.384273
Name: 1515, dtype: float64 

Top TF-IDF words for document 921:
ban            0.414406
rules          0.308624
authorities    0.257913
depending      0.257913
failure        0.257913
Name: 921, dtype: float64 

Top TF-IDF words for document 364:
accounts       0.488438
joke           0.338930
necessarily    0.338930
remote         0.338930
celebrity      0.321453
Name: 364, dtype: float64 

Top TF-IDF words for document 873:
intentionally    0.484170
false            0.449582
misleading       0.449582
share            0.444260
information      0.404926
Name: 873, dtype: float64 



In [None]:
# Topic modeling
from bertopic import BERTopic

topic_model = BERTopic()
topics, probs = topic_model.fit_transform(nlp_df['document_str'])

In [None]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,310,-1_spam_accounts_advertising_racism,"[spam, accounts, advertising, racism, transpho...",[respectful racism sexism homophobia transphob...
1,0,107,0_content_nsfw_warning_cw,"[content, nsfw, warning, cw, warnings, adult, ...","[nsfw without content warning, posts contain n..."
2,1,92,1_harassment_threats_harass_bullying,"[harassment, threats, harass, bullying, kind, ...","[harassment bullying doxxing users, harassment..."
3,2,78,2_ideologies_incitement_violence_promotion,"[ideologies, incitement, violence, promotion, ...",[incitement violence promotion violent ideolog...
4,3,69,3_illegal_laws_content_states,"[illegal, laws, content, states, united, copyr...","[content illegal united states, content illega..."
5,4,55,4_bots_bot_unlisted_automated,"[bots, bot, unlisted, automated, post, account...",[automated bot accounts must enable bot flag a...
6,5,46,5_respect_treat_respectful_everyone,"[respect, treat, respectful, everyone, welcomi...","[always kind polite treat others respect, trea..."
7,6,44,6_report_rules_admins_admin,"[report, rules, admins, admin, team, moderatio...","[please report violations rules admin, please ..."
8,7,38,7_impersonation_parody_clearly_impersonate,"[impersonation, parody, clearly, impersonate, ...",[impersonation individuals public figures orga...
9,8,36,8_dogpiling_doxxing_harassment_users,"[dogpiling, doxxing, harassment, users, contac...","[harassment dogpiling doxxing users, harassmen..."


In [None]:
# try consensus clutering on topics shared between servers