In [6]:
import os
import ast
import warnings
import datetime

import pandas as pd
import numpy as np

from tqdm.auto import tqdm

# NLTK packages
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Spacy
import spacy

warnings.filterwarnings("ignore")

# downloading corpus
nltk.download('stopwords')
nltk.download('punkt')

stopwords = set(stopwords.words("english"))

# Remove 'not' from stopword list
stopwords.remove('not')

# Load the spaCy English model
# https://github.com/explosion/spaCy/issues/6498
nlp = spacy.load("en_core_web_lg", disable=["parser", 'ner'])
nlp.add_pipe('sentencizer')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<spacy.pipeline.sentencizer.Sentencizer at 0x18e308dc1d0>

In [7]:
json_datasets = [ds for ds in os.listdir("datasets") if not ds.endswith(".csv")]

df = (
    pd.concat((pd.read_json("datasets/"+d) for d in json_datasets), ignore_index=True)
    .replace('', np.nan)
    .loc[lambda x: x['posts'].isnull()]
    .rename(columns={'posts' : 'body_type'})
    .assign(body_type='post')
    .reset_index(drop=True)
)

df.head()

Unnamed: 0,post_id,title,author,timestamp,body,score,downs,total_comments,comments,body_type
0,1b3xroj,r/CasualConversation Welcome Thread - Month of...,AutoModerator,2024-03-01 16:00:09,Welcome to r/CasualConversation! Thank you for...,4.0,0.0,7.0,"[{'comment_id': 'kt1kilo', 'author': 'fluffyke...",post
1,1b0m6c1,r/CasualConversation is looking for new modera...,MajorParadox,2024-02-26 16:48:14,"Hello everyone,\n\nIt's that time again, [we'r...",4.0,0.0,0.0,[],post
2,1b5h6x7,Deleting social media was one of the best thin...,Glum-Net-9774,2024-03-03 13:29:12,I know that technically reddit is a social med...,69.0,0.0,13.0,"[{'comment_id': 'kt5a08d', 'author': 'BaclavaB...",post
3,1b5lu9j,Lonlieness is not about gender! My Opinion.,Intjfire,2024-03-03 16:56:09,Okay so I just wanted to get this off of my mi...,28.0,0.0,22.0,"[{'comment_id': 'kt6d4uc', 'author': 'theTeaEn...",post
4,1b5jo0m,Have you gave up any hobbies?,incombusty,2024-03-03 15:23:08,I used to be a doll collector. Then when I was...,33.0,0.0,76.0,"[{'comment_id': 'kt67rvd', 'author': 'SA_Dza',...",post


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2289 entries, 0 to 2288
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   post_id         2289 non-null   object        
 1   title           2289 non-null   object        
 2   author          2211 non-null   object        
 3   timestamp       2289 non-null   datetime64[ns]
 4   body            2248 non-null   object        
 5   score           2289 non-null   float64       
 6   downs           2289 non-null   float64       
 7   total_comments  2289 non-null   float64       
 8   comments        2289 non-null   object        
 9   body_type       2289 non-null   object        
dtypes: datetime64[ns](1), float64(3), object(6)
memory usage: 179.0+ KB


##### The dataset contains columns named `comments` and `replies`, both of which have nested JSON data. To make this data more accessible, we’ll preprocess the dataset. This involves extracting the `body` from both the `comments` and `replies` and adding these to the dataset. Each `body` will be associated with its corresponding `post_id`, which helps us identify the post that each comment or reply belongs to. Additionally, we’ll introduce a new column named `body_type`. This column will specify whether a particular body is a post, a comment, or a reply.

In [9]:
def convert_unix_to_datetime(x: object) -> datetime:
    """
    function to convert unix time format (of object datatype) to datetime format.
    """
    try:
        return datetime.datetime.fromtimestamp(int(x))
    except ValueError:
        return x

In [10]:
def expand_json_cols(df: pd.DataFrame, col: str) -> pd.DataFrame:
    """
    function that converts comments or replies column to a 
    Pandas DataFrame with reference to its original post_id
    """
    
    return (
        df.loc[:, ['post_id', col]] # Select post_id and column (either `comments` or `replies`)
        
        # convert nested dictionary to its own rows with dictonary object
        .explode(column=col, ignore_index=True) 
        
        # remove those rows where the column has null values
        # some posts have no comments and some comments have no replies.
        .loc[lambda x: x[col].notnull()] 

        .reset_index(drop=True) 
        .pipe(lambda x: x.join(pd.json_normalize(x[col]))) # convert dictionary object to a dataframe and  
                                                           # join with its original datafrmae to get
                                                           # respective post_id
        .drop(col, axis=1)
        .assign(timestamp=lambda x: x['timestamp'].apply(convert_unix_to_datetime), # convert unix time fomat to datetime
                body_type=col)
    )

In [11]:
full_df = (
    # Unnest `comments` column and then concat with the whole dataframe
    df.pipe(lambda x: pd.concat([x, expand_json_cols(x, 'comments')], ignore_index=True))

    # Similarly, Unnest `replies` column and then concat with the whole dataframe
    .pipe(lambda x: pd.concat([x, expand_json_cols(x, 'replies')], ignore_index=True))

    # Dropping rows where comments/replied were deleted
    # Also removing rows with posts by AutoModerator
    .loc[lambda x: ~x['body'].eq("[deleted]")]
    .loc[lambda x: x['author'].ne('AutoModerator')]
    .drop(['author', 'score', 'downs', 'total_comments', 'comments', 
           'comment_id', 'replies', 'reply_id'], axis=1)
           
    # replace reddit channel or comment links in the body 
    .replace(r'\r+|\n+|\t+','', regex=True)
    .assign(body=lambda x: (x['body']
                            .replace(r'(r/.*?/comments/.*?) ', 'comments ', regex=True)
                            .replace(r'(r/.*?) ', 'channel ', regex=True)))

    # some body is empty. So these will be replaced with the title.
    .assign(body=lambda x: np.where(x['body'].isnull(), x['title'], x['body']))
    .reset_index(drop=True)
)

full_df.head()

Unnamed: 0,post_id,title,timestamp,body,body_type
0,1b0m6c1,r/CasualConversation is looking for new modera...,2024-02-26 16:48:14,"Hello everyone,It's that time again, [we're lo...",post
1,1b5h6x7,Deleting social media was one of the best thin...,2024-03-03 13:29:12,I know that technically reddit is a social med...,post
2,1b5lu9j,Lonlieness is not about gender! My Opinion.,2024-03-03 16:56:09,Okay so I just wanted to get this off of my mi...,post
3,1b5jo0m,Have you gave up any hobbies?,2024-03-03 15:23:08,I used to be a doll collector. Then when I was...,post
4,1b5c8ah,Is it me or has the world just stopped moving ...,2024-03-03 08:24:06,"This is strange, but I feel like:1) I don’t re...",post


In [12]:
# Ensuring no null values
full_df.loc[lambda x: x['body'].isnull()]

Unnamed: 0,post_id,title,timestamp,body,body_type


In [13]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133038 entries, 0 to 133037
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   post_id    133038 non-null  object
 1   title      2286 non-null    object
 2   timestamp  133038 non-null  object
 3   body       133038 non-null  object
 4   body_type  133038 non-null  object
dtypes: object(5)
memory usage: 5.1+ MB


In [14]:
full_df['body_type'].value_counts()

body_type
comments    112412
replies      18340
post          2286
Name: count, dtype: int64

In [15]:
# Checking if any reddit tags/links still exists in the body
(
    full_df['body'].str
    .extract(r'(r/.*?/comments/.*?) ', expand=False)
    .unique()
)

array([nan], dtype=object)

In [16]:
(
    full_df['body'].str
    .extract(r'(r/.*?) ', expand=False)
    .unique()
)

array([nan], dtype=object)

In [17]:
# Checking all body that belings to post_id '1b3xroj'
(
    full_df.query("post_id=='1b3xroj'")
)

Unnamed: 0,post_id,title,timestamp,body,body_type
2286,1b3xroj,,2024-03-02 22:37:27,hi ..been here a couple of days. great pla...,comments
2287,1b3xroj,,2024-03-02 22:39:22,I don't have much friends. I'm friendly with a...,comments


In [18]:
# Checking all body that belings to post_id 'b18zmk'
(
    full_df.query("post_id=='b18zmk'")
)

Unnamed: 0,post_id,title,timestamp,body,body_type
2285,b18zmk,Today I promised myself bacon and eggs if I co...,2019-03-15 01:55:17,Today I promised myself bacon and eggs if I co...,post
114679,b18zmk,,2019-03-15 06:17:18,Good for you! Whenever I see something with an...,comments
114680,b18zmk,,2019-03-15 06:18:22,Dig the shirt,comments
114681,b18zmk,,2019-03-15 06:48:41,Hope the meet up went well! Proud to see you b...,comments
114682,b18zmk,,2019-03-15 08:37:28,"As a woman who suffers from anxiety, I really ...",comments
114683,b18zmk,,2019-03-15 06:47:09,Treating yourself to something wonderful for o...,comments
114684,b18zmk,,2019-03-15 07:17:54,awww you are so awesome!!!! too bad those baco...,comments
114685,b18zmk,,2019-03-15 06:18:40,Good shit.,comments
114686,b18zmk,,2019-03-15 08:49:13,Your smile is so cute! Proud of you!!,comments
114687,b18zmk,,2019-03-15 06:24:09,Way to go! 😊 that's wonderful.,comments


##### The above clearly shows the structure of the conversation for a `post_id`. There will be a single post, followed by comments and replies to those comments.

# Topic Modelling

##### **Topic modeling** is a technique used to automatically identify topics present in a text object by using unsupervised machine learning to find clusters or groups of similar words within a body of text. We will use **BERTopic** for topic modelling.

##### **[BERTopic]("https://maartengr.github.io/BERTopic/index.html")** is an advanced technique in language topic modeling that leverages transformer models for superior language capabilities. It uses several components to produce topics from a collection of documents:

* **Transformer Embedding Model**: This is used to convert words into meaningful numerical representations that capture the semantic meaning of the words in their context.

* **UMAP Dimensionality Reduction**: This is a technique used to reduce the dimensionality of the word embeddings, making them easier to work with.

* **HDBSCAN Clustering**: This is a clustering algorithm used to group similar word embeddings together. Each cluster represents a potential topic.

* **Cluster Tagging using c-TF-IDF**: This is used to find the most representative terms for each topic, which are then used as the labels for the topics.

BERTopic takes advantage of these components to create a model that can identify and extract topics from a collection of documents. It’s a powerful tool for analyzing and understanding large collections of unstructured text data.

#### **Note**: Performing Topic Modelling with BERTopic on a dataset of over 130,000 texts took approximately 45 minutes. After running the topic modelling for the first time in this notebook, I saved the results as a CSV file on my local system. This step ensures that I won’t need to repeat the entire process in future sessions. Please note that the sections of this notebook related to topic modelling are commented out. This precaution is taken in case a restart of the notebook is required.

In [19]:
# from bertopic import BERTopic
# from sentence_transformers import SentenceTransformer
# from bertopic.vectorizers import ClassTfidfTransformer

# sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

# ctfidf_model = ClassTfidfTransformer(bm25_weighting=True, 
#                                      reduce_frequent_words=True)

# topic_model = BERTopic(embedding_model=sentence_model,
#                        ctfidf_model=ctfidf_model,
#                        verbose=True)

In [20]:
# docs = full_df['body']
# topics, _ = topic_model.fit_transform(docs)

In [21]:
# topic_model.get_topic_info()

In [22]:
# topic_model.visualize_topics(width=1000, height=1000)

In [23]:
# full_df_with_topics = (
#     full_df.assign(topics=topics)
#     .merge(topic_model.get_topic_info()[['Topic', 'Name']], 
#              how='left',
#              left_on='topics',
#              right_on='Topic')
#     .drop(['Topic', 'topics'], axis=1)
#     .rename(columns={'Name': 'topic_name'})
# )

# full_df_with_topics.head()

In [24]:
# full_df_with_topics.to_csv("datasets/reddit_posts_with_topics.csv", index=False)

In [25]:
# Open saved df with topics
topic_df = pd.read_csv("datasets/reddit_posts_with_topics.csv")
topic_df.head()

Unnamed: 0,post_id,title,timestamp,body,body_type,topic_name
0,1b0m6c1,r/CasualConversation is looking for new modera...,2024-02-26 16:48:14,"Hello everyone,It's that time again, [we're lo...",post,22_subs_subreddits_trolls_subreddit
1,1b5h6x7,Deleting social media was one of the best thin...,2024-03-03 13:29:12,I know that technically reddit is a social med...,post,1_fb_deleted_instagram_facebook
2,1b5lu9j,Lonlieness is not about gender! My Opinion.,2024-03-03 16:56:09,Okay so I just wanted to get this off of my mi...,post,359_loneliness_lonely_epidemic_intiate
3,1b5jo0m,Have you gave up any hobbies?,2024-03-03 15:23:08,I used to be a doll collector. Then when I was...,post,-1_breakfast_cream_listening_bus
4,1b5c8ah,Is it me or has the world just stopped moving ...,2024-03-03 08:24:06,"This is strange, but I feel like:1) I don’t re...",post,-1_breakfast_cream_listening_bus


In [26]:
# Top 20 topics
# -1 topics are outliers and should be ignored.
(
    topic_df['topic_name']
    .value_counts()
    .nlargest(20)
)

topic_name
-1_breakfast_cream_listening_bus        52368
0_attacks_prescribed_symptoms_benzos     2555
1_fb_deleted_instagram_facebook          1490
2_nicotine_cigarette_smoked_smoker        953
3_bday_birthdays_25th_belated             940
4_masks_vaccinated_mask_wearing           867
5_cats_cat_kitty_kitties                  740
6_pete_stories_casconvo_sharing           648
7_removed_backspaced_bailing_erased       572
8_grammar_native_english_language         565
9_dreamt_dreaming_dreamed_dreams          554
10_crying_tear_cry_tears                  515
11_bully_bullied_bullies_bullying         496
12_upvote_upvotes_upvoted_downvotes       481
13_bachelors_bachelor_diploma_ged         481
14_channel_subreddit_sub_lego             469
15_teens_30s_20s_aging                    465
16_savings_invest_401k_saving             444
17_pregnancy_adoption_pregnant_adopt      412
18_bald_shaved_shave_shaving              405
Name: count, dtype: int64

# Data Pre-processing & Keyword Extraction

##### In this section, the texts will be preprocessed such as stopword removal, lemmatization, etc following by keyword extraction on the cleaned text using **keyBERT**.

In [27]:
# https://github.com/kootenpv/contractions
import contractions

# Remove stopwords function
def remove_sw(text: str) -> str:
    tokenized_text = word_tokenize(text)
    # Remove stopwords and those words that is of length<=2
    processed_text = (word for word in tokenized_text if not word in stopwords and len(word) > 2)

    return " ".join(processed_text)

# Lemmatization
# https://stackoverflow.com/a/75215495/15937542
def lemmatize_pipe(text_col: pd.Series) -> list:
    
    docs = nlp.pipe(text_col)
    lemmatized_col = [lemmatize_text_spacy(doc) for doc in docs]

    return lemmatized_col


def lemmatize_text_spacy(doc: spacy.tokens.doc.Doc) -> str:

    # Extract lemmatized words
    # The word refunded does not get lemmatized, hence adding this special condition
    lemmatized_words = (token.lemma_ for token in doc)

    # Join the lemmatized words to form the lemmatized text
    lemmatized_text = ' '.join(lemmatized_words)

    return lemmatized_text

In [28]:
cleaned_df = (
    topic_df
    .loc[lambda x: x['body'].notnull()]
    .assign(cleaned_text=lambda x: lemmatize_pipe(
        x['body'].str.lower() # lower case
        .str.replace(r'https*.*? *|www\..*? *|bit\..*? *', " ", 
                        regex=True) # Remove all url links
        .apply(contractions.fix) # Apply Contractions function
        .str.replace(r'[^a-z]', ' ', regex=True) # Remove all characters except alphabets
        .str.replace('\s+', " ", regex=True) # Remove all extra spaces
        .str.strip() # Remove leading and trailing spaces
        .apply(remove_sw) # Remove stopwords
    ))

    # remove those rows with just empty text after preprocessing
    .loc[lambda x:x['cleaned_text'].ne('')]
    .reset_index(drop=True)
)

In [29]:
cleaned_df.head()

Unnamed: 0,post_id,title,timestamp,body,body_type,topic_name,cleaned_text
0,1b0m6c1,r/CasualConversation is looking for new modera...,2024-02-26 16:48:14,"Hello everyone,It's that time again, [we're lo...",post,22_subs_subreddits_trolls_subreddit,hello everyone time look new mod join team doc...
1,1b5h6x7,Deleting social media was one of the best thin...,2024-03-03 13:29:12,I know that technically reddit is a social med...,post,1_fb_deleted_instagram_facebook,know technically reddit social medium not nega...
2,1b5lu9j,Lonlieness is not about gender! My Opinion.,2024-03-03 16:56:09,Okay so I just wanted to get this off of my mi...,post,359_loneliness_lonely_epidemic_intiate,okay want get mind keep see people post man lo...
3,1b5jo0m,Have you gave up any hobbies?,2024-03-03 15:23:08,I used to be a doll collector. Then when I was...,post,-1_breakfast_cream_listening_bus,use doll collector tell weird old donate every...
4,1b5c8ah,Is it me or has the world just stopped moving ...,2024-03-03 08:24:06,"This is strange, but I feel like:1) I don’t re...",post,-1_breakfast_cream_listening_bus,strange feel like not remember anything happen...


In [56]:
# Example of raw text vs clean text

print("Raw text:")
print(cleaned_df.loc[1, 'body'])

print()

print("Cleaned text:")
print(cleaned_df.loc[1, 'cleaned_text'])

Raw text:
I know that technically reddit is a social media but for me it doesn't negatively effect my mental health, in fact it helps me quite a lot and I have gained quite a bit of knowledge from surfing it  Almost two months ago I deleted Instagram and Tiktok and honestly it was super difficult the first few days but..  \- I had time to do things that I would otherwise have not done due to scrolling endlessly. I started working out and I am super proud of myself for that  \- I don't really care about my looks as much anymore  \- My screen time has decreased by quite a lot, I have social interactions in person way more now  So I guess what I'm saying is if you've been considering deleting social media for whatever reason, do it.

Cleaned text:
know technically reddit social medium not negatively effect mental health fact help quite lot gain quite bit knowledge surf almost two month ago delete instagram tiktok honestly super difficult first day time thing would otherwise not do due scr

In [30]:
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131952 entries, 0 to 131951
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   post_id       131952 non-null  object
 1   title         2285 non-null    object
 2   timestamp     131952 non-null  object
 3   body          131952 non-null  object
 4   body_type     131952 non-null  object
 5   topic_name    131952 non-null  object
 6   cleaned_text  131952 non-null  object
dtypes: object(7)
memory usage: 7.0+ MB


##### **[KeyBERT]("https://maartengr.github.io/KeyBERT/api/keybert.html#:~:text=A%20minimal%20method%20for%20keyword,get%20a%20document%2Dlevel%20representation.")** is a minimal and easy-to-use keyword extraction technique that leverages BERT embeddings to create keywords and keyphrases that are most similar to a document.

##### KeyBERT uses BERT embeddings and simple cosine similarity to find the sub-phrases in a document that are the most similar to the document itself. First, document embeddings are extracted with BERT to get a document-level representation. Then, word embeddings are extracted for N-gram words/phrases. Finally, cosine similarity is used to find the words/phrases that are the most similar to the document. The most similar words could then be identified as the words that best describe the entire document.

##### **Note**: Similarly to Topic modelling section, extracting keywords using keyBERT is a time consuming process. This task took around 2.5 hours to complete. Therefore the following codes will be commented out too. The dataframe containing the kewords has been saved to my local system.

In [58]:
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer

sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
vectorizer = CountVectorizer(max_df=0.7, stop_words='english', ngram_range=(1, 3))


model = KeyBERT(model=sentence_model)

def extract_keywords(texts):

    kw_list = model.extract_keywords(
        texts,
        vectorizer=vectorizer,
        top_n=5,
        use_mmr=True,
        diversity=0.7
    )

    keywords=[",".join((k[0] for k in kw_tup)) for kw_tup in kw_list]
    
    return keywords

In [59]:
batch_size=64
nbr_of_batches=int(np.ceil(len(cleaned_df)/batch_size))

batches = np.array_split(cleaned_df['cleaned_text'].to_list(), nbr_of_batches)

keyword_list=[]

for batch in tqdm(batches):
    keywords_from_batch = extract_keywords(texts=batch)
    keyword_list.extend(keywords_from_batch)

  0%|          | 0/2062 [00:00<?, ?it/s]

In [60]:
cleaned_df['keywords']=keyword_list
cleaned_df.to_csv("datasets/reddit_posts_with_topics_keywords.csv", index=False)

In [61]:
df_with_topic_kw = pd.read_csv("datasets/reddit_posts_with_topics_keywords.csv")
df_with_topic_kw.head()

Unnamed: 0,post_id,title,timestamp,body,body_type,topic_name,cleaned_text,keywords
0,1b0m6c1,r/CasualConversation is looking for new modera...,2024-02-26 16:48:14,"Hello everyone,It's that time again, [we're lo...",post,22_subs_subreddits_trolls_subreddit,hello everyone time look new mod join team doc...,"modmail courteous professional,grow strict quo..."
1,1b5h6x7,Deleting social media was one of the best thin...,2024-03-03 13:29:12,I know that technically reddit is a social med...,post,1_fb_deleted_instagram_facebook,know technically reddit social medium not nega...,"consider delete social,instagram tiktok,health..."
2,1b5lu9j,Lonlieness is not about gender! My Opinion.,2024-03-03 16:56:09,Okay so I just wanted to get this off of my mi...,post,359_loneliness_lonely_epidemic_intiate,okay want get mind keep see people post man lo...,"man loneliness epidemic,understand gender thin..."
3,1b5jo0m,Have you gave up any hobbies?,2024-03-03 15:23:08,I used to be a doll collector. Then when I was...,post,-1_breakfast_cream_listening_bus,use doll collector tell weird old donate every...,"use doll collector,throw away hobby,day happy ..."
4,1b5c8ah,Is it me or has the world just stopped moving ...,2024-03-03 08:24:06,"This is strange, but I feel like:1) I don’t re...",post,-1_breakfast_cream_listening_bus,strange feel like not remember anything happen...,"good year covid,like remember happen,people tr..."


In [62]:
print(
    (
        df_with_topic_kw.loc[:5, ['body', 'keywords']]
        .T
        .to_json(indent=4)
    )
)

{
    "0":{
        "body":"Hello everyone,It's that time again, [we're looking for new mods to join our team!](https:\/\/docs.google.com\/forms\/d\/e\/1FAIpQLSeQNft07wRiSyPA5BvoFIJo9lpkx5WZhDsF5jnPCq9q7AL7HA\/viewform)*Here are a few responsibilities you'll be faced with:** removing threads & comments that break da rules* keep the modqueue clear* help enforce our rules and etiquette* help maintain a healthy, nice and friendly atmosphere* handle modmails in a courteous and professional manner* hang out in our mod discord server* discuss things with the team* brainstorm new or current ideas* able to learn and grow with us* and more!---We do not have strict quotas but there is always something to do and we do expect our mods to be active in helping keep the queues clear.Do you think you have what it takes to be a moderator on one of the most friendly communities on Reddit? Give it a shot and apply!We are using a Google Form for our app (we **do not** collect your email address), fill out

##### In the next notebook, we will analyze keyword frequncy for each of the topics.