In [1]:
# Standard Library Imports
import pandas as pd
import re
import time
import datetime
from collections import defaultdict, Counter

# Third-party Library Imports
import requests
from bs4 import BeautifulSoup
import praw
import nltk
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.util import bigrams
from sklearn.feature_extraction.text import CountVectorizer
import spacy

In [2]:
# Set pandas display options to show the entire content of the "Post Text" column
pd.set_option('display.max_colwidth', None)

In [3]:
# Specify the path to your CSV file
csv_file_path = 'reddit_posts.csv'

# Read the CSV file into a DataFrame
df_reddit_post = pd.read_csv(csv_file_path)

# Now, 'df' contains your data from the CSV file

len(df_reddit_post)

3963

In [4]:
# Define a function to check if a post is an image-only post
def is_image_only_post(post):
    # Check if the post's content (Post Text) is empty (no text) and the URL is an image or an image-hosting site
    return not post["Post Text"] and post["Post URL"].endswith(('.jpg', '.jpeg', '.png', '.gif', '.gifv', '.webp'))

# Apply the function to the DataFrame to create a Boolean mask
image_only_mask = df_reddit_post.apply(is_image_only_post, axis=1)

# Filter out image-only posts
filtered_df_reddit_post = df_reddit_post[~image_only_mask]
print(f"Total number of posts after filter collected: {len(filtered_df_reddit_post)}")

Total number of posts after filter collected: 3963


In [5]:
# Create a new DataFrame with the selected columns
selected_columns = ["Title", "Post Text", "Subreddit"]
filtered_columns_df_reddit_post = filtered_df_reddit_post[selected_columns]
# Save the data to a CSV file
filtered_columns_df_reddit_post.to_csv("reddit_posts (no image-only post).csv", index=False)
filtered_columns_df_reddit_post.head()

Unnamed: 0,Title,Post Text,Subreddit
0,Daily Fasting Check-in!,"* **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.",intermittentfasting
1,F/29/5’8” [246 > 134 = 112lbs]. I’ve been maintaining for a couple months now. AMA about IF and weight loss,,intermittentfasting
2,50 lbs down - 3 months,Started IF on June 19th and have now passed the 50lb mark this morning. Started on 16:8 eating breakfast and lunch only. Slowly moved up to 19:5. Exercise 2-3 days a week with either rowing or walking. I am using a eufy scale to weigh each morning and then manually measure every Sunday. I don't have a strict diet but I am trying to avoid as much sugar as possible. \n\nSW: 305\nCW: 255\nGW: 220,intermittentfasting
3,222 to 195 in 6 months,"Besides the weight I have zero aches and pains, can breathe better and my medical blood work went from 4 red flags to 0.",intermittentfasting
4,So far it seems to be working!,"[22F - 4'10"" - SW: 143.4, CW: 138.2, GW: 115]\n\nI started 16:8 with a calorie deficit on 9/13/23. I last weighed myself on 9/19/23. I went from 143.4lbs to 138.2lbs in almost a week. I’m probably below 138lbs now, but I’m not going to weigh myself again until the end of the month. With my height, 5lbs seems to make a big difference. \n\nI’m sure a lot of it was water weight and bloating. I’m still happy though, I feel better and it gives me even more motivation to stick with it.",intermittentfasting


In [6]:
# Iterate through all columns and convert text data to lowercase
for column in filtered_columns_df_reddit_post.columns:
    if filtered_columns_df_reddit_post[column].dtype == 'object':
        filtered_columns_df_reddit_post[column] = filtered_columns_df_reddit_post[column].str.lower()

# Print the resulting DataFrame
filtered_columns_df_reddit_post

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_columns_df_reddit_post[column] = filtered_columns_df_reddit_post[column].str.lower()


Unnamed: 0,Title,Post Text,Subreddit
0,daily fasting check-in!,"* **type** of fast (water, juice, smoking, etc.)\n* **context** of fast (start, end, day x of y, etc.)\n* **length** of fast (8 hours, 3 days, etc.)\n* **why?** what you hope to accomplish with your fast\n* **notes** how is it going so far? any concerns? insights to share?\n\nbe sure to check back often as comments get posted throughout the day. sort comments by ""new"" to be sure the newer comments get some love as well.",intermittentfasting
1,f/29/5’8” [246 > 134 = 112lbs]. i’ve been maintaining for a couple months now. ama about if and weight loss,,intermittentfasting
2,50 lbs down - 3 months,started if on june 19th and have now passed the 50lb mark this morning. started on 16:8 eating breakfast and lunch only. slowly moved up to 19:5. exercise 2-3 days a week with either rowing or walking. i am using a eufy scale to weigh each morning and then manually measure every sunday. i don't have a strict diet but i am trying to avoid as much sugar as possible. \n\nsw: 305\ncw: 255\ngw: 220,intermittentfasting
3,222 to 195 in 6 months,"besides the weight i have zero aches and pains, can breathe better and my medical blood work went from 4 red flags to 0.",intermittentfasting
4,so far it seems to be working!,"[22f - 4'10"" - sw: 143.4, cw: 138.2, gw: 115]\n\ni started 16:8 with a calorie deficit on 9/13/23. i last weighed myself on 9/19/23. i went from 143.4lbs to 138.2lbs in almost a week. i’m probably below 138lbs now, but i’m not going to weigh myself again until the end of the month. with my height, 5lbs seems to make a big difference. \n\ni’m sure a lot of it was water weight and bloating. i’m still happy though, i feel better and it gives me even more motivation to stick with it.",intermittentfasting
...,...,...,...
3958,anyone else have a fear of becoming obese?,"i know that fear of gaining weight is a pretty much universal thing for anorexia, but does anyone else have a fear of actual obesity? like i’m genuinely terrified that once i start eating more i just won’t stop. i’ll just keep eating and eating and never stop until i’m 300 pounds because i lost all my self control when i stopped restricting. plus hearing about how many people go from anorexia to binge eating disorder doesn’t help either. i know it’s not like that for everyone but what if it is for me? i’m so terrified of it that i literally have nightmares about it, and imaging it happening makes me feel sick with anxiety. i just hope i’m not the only one cus i feel crazy",anorexianervosa
3959,i am so alone.,"i’m a 16 year old male who’s been suffering from anorexia for two years now, entering my 3rd year and have been fake recovered to some extent like 5 times now but every time has me going back down the same path i was on when restricting.\n\ni’m approaching the weight i was at when i was at my worst and everyday i have to deal with constantly being body shamed by my family/friends who have no idea what i’m going through no matter how many attempts i’ve given to explain fully about what i’m going through.\n\nbecause i’m a boy i don’t get a lot of recognition for these struggles, i’m just “the skinny one who doesn’t eat a lot” in peoples eyes.\n\non top of this i live in a place where there’s virtually nowhere to go for eating disorders, there’s no programs or people to help me. i feel like i’m approaching the end of my road and at this point i am ready to stop fighting and give up. i want to eat, i want to recover, but i feel like it’s a forever cycle i’m stuck on of restricting, hitting rock bottom, binging, and then restricting again.",anorexianervosa
3960,chocolate ice cream!,"ok, so it's not a lot, and i didn't use to be as scared of liquid calories (i count ice cream towards liquid calories) as i am now. but i'm eating chocolate ice cream! and i'm so proud of myself! even if i do feel sick (because of motion sickness), i'm still going to eat this entire scoop of ice cream!",anorexianervosa
3961,do you experience oddly specific challenges right after recovering that tempt you back into your eating disorder?,"do y’all find that as soon as you get to a good place in recovery something comes along that wants to push you right back? whether it’s a triggering comment, a break up, a new relationship, someone close to you going on a diet, or some other triggering situation? \n\ni know that challenges will come but it’s just crazy how quickly and specific they seem to be to your own personal weaknesses. like for me as soon as i was getting better my now boyfriend asked me out. which was great but also what starting my eating disorder in the first place years ago so it was soooo hard to not relapse (& i kinda did a bit). \n\ni’d love to hear your stories with this if it’s similar! i just feel like i’ve noticed this theme.",anorexianervosa


In [7]:
nltk.download("stopwords")
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [8]:
# Get the list of English stopwords
stop_words = set(stopwords.words("english"))

# Function to filter out stopwords
def remove_stopwords(text):
    if isinstance(text, str):  # Check if the value is a string (not NaN)
        words = text.split()
        filtered_words = [word for word in words if word.lower() not in stop_words]
        return " ".join(filtered_words)
    else:
        return text  # Return the original value if it's NaN

# Apply the function to the "Title" and "Post Text" columns using .loc
filtered_columns_df_reddit_post["Stopword Dropped Title"] = filtered_columns_df_reddit_post["Title"].apply(remove_stopwords)
filtered_columns_df_reddit_post["Stopword Dropped Post Text"] = filtered_columns_df_reddit_post["Post Text"].apply(remove_stopwords)

# Create a clean copy of the DataFrame with the dropped columns
dropped_filtered_columns_dataframe = filtered_columns_df_reddit_post.drop(columns=["Title", "Post Text"]).copy()

# Display the DataFrame with the dropped columns
dropped_filtered_columns_dataframe

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_columns_df_reddit_post["Stopword Dropped Title"] = filtered_columns_df_reddit_post["Title"].apply(remove_stopwords)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_columns_df_reddit_post["Stopword Dropped Post Text"] = filtered_columns_df_reddit_post["Post Text"].apply(remove_stopwords)


Unnamed: 0,Subreddit,Stopword Dropped Title,Stopword Dropped Post Text
0,intermittentfasting,daily fasting check-in!,"* **type** fast (water, juice, smoking, etc.) * **context** fast (start, end, day x y, etc.) * **length** fast (8 hours, 3 days, etc.) * **why?** hope accomplish fast * **notes** going far? concerns? insights share? sure check back often comments get posted throughout day. sort comments ""new"" sure newer comments get love well."
1,intermittentfasting,f/29/5’8” [246 > 134 = 112lbs]. i’ve maintaining couple months now. ama weight loss,
2,intermittentfasting,50 lbs - 3 months,started june 19th passed 50lb mark morning. started 16:8 eating breakfast lunch only. slowly moved 19:5. exercise 2-3 days week either rowing walking. using eufy scale weigh morning manually measure every sunday. strict diet trying avoid much sugar possible. sw: 305 cw: 255 gw: 220
3,intermittentfasting,222 195 6 months,"besides weight zero aches pains, breathe better medical blood work went 4 red flags 0."
4,intermittentfasting,far seems working!,"[22f - 4'10"" - sw: 143.4, cw: 138.2, gw: 115] started 16:8 calorie deficit 9/13/23. last weighed 9/19/23. went 143.4lbs 138.2lbs almost week. i’m probably 138lbs now, i’m going weigh end month. height, 5lbs seems make big difference. i’m sure lot water weight bloating. i’m still happy though, feel better gives even motivation stick it."
...,...,...,...
3958,anorexianervosa,anyone else fear becoming obese?,"know fear gaining weight pretty much universal thing anorexia, anyone else fear actual obesity? like i’m genuinely terrified start eating won’t stop. i’ll keep eating eating never stop i’m 300 pounds lost self control stopped restricting. plus hearing many people go anorexia binge eating disorder doesn’t help either. know it’s like everyone me? i’m terrified literally nightmares it, imaging happening makes feel sick anxiety. hope i’m one cus feel crazy"
3959,anorexianervosa,alone.,"i’m 16 year old male who’s suffering anorexia two years now, entering 3rd year fake recovered extent like 5 times every time going back path restricting. i’m approaching weight worst everyday deal constantly body shamed family/friends idea i’m going matter many attempts i’ve given explain fully i’m going through. i’m boy don’t get lot recognition struggles, i’m “the skinny one doesn’t eat lot” peoples eyes. top live place there’s virtually nowhere go eating disorders, there’s programs people help me. feel like i’m approaching end road point ready stop fighting give up. want eat, want recover, feel like it’s forever cycle i’m stuck restricting, hitting rock bottom, binging, restricting again."
3960,anorexianervosa,chocolate ice cream!,"ok, lot, use scared liquid calories (i count ice cream towards liquid calories) now. i'm eating chocolate ice cream! i'm proud myself! even feel sick (because motion sickness), i'm still going eat entire scoop ice cream!"
3961,anorexianervosa,experience oddly specific challenges right recovering tempt back eating disorder?,"y’all find soon get good place recovery something comes along wants push right back? whether it’s triggering comment, break up, new relationship, someone close going diet, triggering situation? know challenges come it’s crazy quickly specific seem personal weaknesses. like soon getting better boyfriend asked out. great also starting eating disorder first place years ago soooo hard relapse (& kinda bit). i’d love hear stories it’s similar! feel like i’ve noticed theme."


In [10]:
# Load the spaCy English language model
nlp = spacy.load("en_core_web_sm")

# Create a function to process and modify text
def process_text(text):
    if isinstance(text, str):
        doc = nlp(text)
        modified_tokens = []
        for token in doc:
            if token.text.lower() == "i'm":
                modified_tokens.extend(["i", "am"])
            elif token.text.lower() == "emma's":
                modified_tokens.append("emma")
            else:
                modified_tokens.append(token.text)
        return " ".join(modified_tokens)
    else:
        return text

# Loop through all columns and apply the processing function
for column in dropped_filtered_columns_dataframe.columns:
    if dropped_filtered_columns_dataframe[column].dtype == 'object':
        dropped_filtered_columns_dataframe[column] = dropped_filtered_columns_dataframe[column].apply(process_text)

# Print the resulting DataFrame
dropped_filtered_columns_dataframe

Unnamed: 0,Subreddit,Stopword Dropped Title,Stopword Dropped Post Text
0,intermittentfasting,daily fasting check - in !,"* * * type * * fast ( water , juice , smoking , etc . ) * * * context * * fast ( start , end , day x y , etc . ) * * * length * * fast ( 8 hours , 3 days , etc . ) * * * why ? * * hope accomplish fast * * * notes * * going far ? concerns ? insights share ? sure check back often comments get posted throughout day . sort comments "" new "" sure newer comments get love well ."
1,intermittentfasting,f/29/5’8 ” [ 246 > 134 = 112lbs ] . i ’ ve maintaining couple months now . ama weight loss,
2,intermittentfasting,50 lbs - 3 months,started june 19th passed 50 lb mark morning . started 16:8 eating breakfast lunch only . slowly moved 19:5 . exercise 2 - 3 days week either rowing walking . using eufy scale weigh morning manually measure every sunday . strict diet trying avoid much sugar possible . sw : 305 cw : 255 gw : 220
3,intermittentfasting,222 195 6 months,"besides weight zero aches pains , breathe better medical blood work went 4 red flags 0 ."
4,intermittentfasting,far seems working !,"[ 22f - 4'10 "" - sw : 143.4 , cw : 138.2 , gw : 115 ] started 16:8 calorie deficit 9/13/23 . last weighed 9/19/23 . went 143.4lbs 138.2lbs almost week . i ’m probably 138lbs now , i ’m going weigh end month . height , 5lbs seems make big difference . i ’m sure lot water weight bloating . i ’m still happy though , feel better gives even motivation stick it ."
...,...,...,...
3958,anorexianervosa,anyone else fear becoming obese ?,"know fear gaining weight pretty much universal thing anorexia , anyone else fear actual obesity ? like i ’m genuinely terrified start eating wo n’t stop . i ’ll keep eating eating never stop i ’m 300 pounds lost self control stopped restricting . plus hearing many people go anorexia binge eating disorder does n’t help either . know it ’s like everyone me ? i ’m terrified literally nightmares it , imaging happening makes feel sick anxiety . hope i ’m one cus feel crazy"
3959,anorexianervosa,alone .,"i ’m 16 year old male who ’s suffering anorexia two years now , entering 3rd year fake recovered extent like 5 times every time going back path restricting . i ’m approaching weight worst everyday deal constantly body shamed family / friends idea i ’m going matter many attempts i ’ve given explain fully i ’m going through . i ’m boy do n’t get lot recognition struggles , i ’m “ the skinny one does n’t eat lot ” peoples eyes . top live place there ’s virtually nowhere go eating disorders , there ’s programs people help me . feel like i ’m approaching end road point ready stop fighting give up . want eat , want recover , feel like it ’s forever cycle i ’m stuck restricting , hitting rock bottom , binging , restricting again ."
3960,anorexianervosa,chocolate ice cream !,"ok , lot , use scared liquid calories ( i count ice cream towards liquid calories ) now . i 'm eating chocolate ice cream ! i 'm proud myself ! even feel sick ( because motion sickness ) , i 'm still going eat entire scoop ice cream !"
3961,anorexianervosa,experience oddly specific challenges right recovering tempt back eating disorder ?,"y’ all find soon get good place recovery something comes along wants push right back ? whether it ’s triggering comment , break up , new relationship , someone close going diet , triggering situation ? know challenges come it ’s crazy quickly specific seem personal weaknesses . like soon getting better boyfriend asked out . great also starting eating disorder first place years ago soooo hard relapse ( & kinda bit ) . i ’d love hear stories it ’s similar ! feel like i ’ve noticed theme ."


In [11]:
# Remove punctuation from specific columns
columns_to_clean = ['Stopword Dropped Post Text', 'Stopword Dropped Title']

for column in columns_to_clean:
    dropped_filtered_columns_dataframe[column] = dropped_filtered_columns_dataframe[column].str.replace(r'[^\w\s]', '', regex=True)

# Display the DataFrame
dropped_filtered_columns_dataframe

Unnamed: 0,Subreddit,Stopword Dropped Title,Stopword Dropped Post Text
0,intermittentfasting,daily fasting check in,type fast water juice smoking etc context fast start end day x y etc length fast 8 hours 3 days etc why hope accomplish fast notes going far concerns insights share sure check back often comments get posted throughout day sort comments new sure newer comments get love well
1,intermittentfasting,f2958 246 134 112lbs i ve maintaining couple months now ama weight loss,
2,intermittentfasting,50 lbs 3 months,started june 19th passed 50 lb mark morning started 168 eating breakfast lunch only slowly moved 195 exercise 2 3 days week either rowing walking using eufy scale weigh morning manually measure every sunday strict diet trying avoid much sugar possible sw 305 cw 255 gw 220
3,intermittentfasting,222 195 6 months,besides weight zero aches pains breathe better medical blood work went 4 red flags 0
4,intermittentfasting,far seems working,22f 410 sw 1434 cw 1382 gw 115 started 168 calorie deficit 91323 last weighed 91923 went 1434lbs 1382lbs almost week i m probably 138lbs now i m going weigh end month height 5lbs seems make big difference i m sure lot water weight bloating i m still happy though feel better gives even motivation stick it
...,...,...,...
3958,anorexianervosa,anyone else fear becoming obese,know fear gaining weight pretty much universal thing anorexia anyone else fear actual obesity like i m genuinely terrified start eating wo nt stop i ll keep eating eating never stop i m 300 pounds lost self control stopped restricting plus hearing many people go anorexia binge eating disorder does nt help either know it s like everyone me i m terrified literally nightmares it imaging happening makes feel sick anxiety hope i m one cus feel crazy
3959,anorexianervosa,alone,i m 16 year old male who s suffering anorexia two years now entering 3rd year fake recovered extent like 5 times every time going back path restricting i m approaching weight worst everyday deal constantly body shamed family friends idea i m going matter many attempts i ve given explain fully i m going through i m boy do nt get lot recognition struggles i m the skinny one does nt eat lot peoples eyes top live place there s virtually nowhere go eating disorders there s programs people help me feel like i m approaching end road point ready stop fighting give up want eat want recover feel like it s forever cycle i m stuck restricting hitting rock bottom binging restricting again
3960,anorexianervosa,chocolate ice cream,ok lot use scared liquid calories i count ice cream towards liquid calories now i m eating chocolate ice cream i m proud myself even feel sick because motion sickness i m still going eat entire scoop ice cream
3961,anorexianervosa,experience oddly specific challenges right recovering tempt back eating disorder,y all find soon get good place recovery something comes along wants push right back whether it s triggering comment break up new relationship someone close going diet triggering situation know challenges come it s crazy quickly specific seem personal weaknesses like soon getting better boyfriend asked out great also starting eating disorder first place years ago soooo hard relapse kinda bit i d love hear stories it s similar feel like i ve noticed theme


In [12]:
#This is to split the content of each title and post into their unigrams and bigrams

# Initialize the Porter stemmer
stemmer = PorterStemmer()

# Initialize the Porter stemmer
stemmer = PorterStemmer()

# Function to stem text
def stem_text(text):
    if isinstance(text, str):  # Check if text is a string
        words = text.split()
        stemmed_words = [stemmer.stem(word) for word in words]
        return " ".join(stemmed_words)
    else:
        return text  # Return the original value if it's not a string

# Initialize the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to lemmatize text
def lemmatize_text(text):
    if isinstance(text, str):  # Check if text is a string
        words = text.split()
        lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
        return " ".join(lemmatized_words)
    else:
        return text  # Return the original value if it's not a string

# Function to split text into unigrams and bigrams
def split_text(text):
    if isinstance(text, str):  # Check if text is a string
        # Tokenize the text into words
        words = text.split()
        
        # Create unigrams (single words)
        unigrams = words
        
        # Create bigrams (consecutive word pairs)
        bigrams = list(zip(words, words[1:]))
        
        return unigrams, bigrams
    else:
        return [], []  # Return empty lists for unigrams and bigrams if text is not a string

# Apply stemming, lemmatization, and generate unigrams and bigrams to your columns
dropped_filtered_columns_dataframe["Stemmed Title"] = dropped_filtered_columns_dataframe["Stopword Dropped Title"].apply(stem_text)
dropped_filtered_columns_dataframe["Stemmed Post Text"] = dropped_filtered_columns_dataframe["Stopword Dropped Post Text"].apply(stem_text)
dropped_filtered_columns_dataframe["Lemmatized Title"] = dropped_filtered_columns_dataframe["Stopword Dropped Title"].apply(lemmatize_text)
dropped_filtered_columns_dataframe["Lemmatized Post Text"] = dropped_filtered_columns_dataframe["Stopword Dropped Post Text"].apply(lemmatize_text)
dropped_filtered_columns_dataframe[["Unigrams Title", "Bigrams Title"]] = dropped_filtered_columns_dataframe["Stopword Dropped Title"].apply(split_text).apply(pd.Series)
dropped_filtered_columns_dataframe[["Unigrams Post Text", "Bigrams Post Text"]] = dropped_filtered_columns_dataframe["Stopword Dropped Post Text"].apply(split_text).apply(pd.Series)

# Drop the original columns
dropped_filtered_columns_dataframe = dropped_filtered_columns_dataframe.drop(columns=["Stopword Dropped Title", "Stopword Dropped Post Text"])

# Convert all text columns to lowercase
text_columns = ["Stemmed Title", "Stemmed Post Text", "Lemmatized Title", "Lemmatized Post Text"]
for column in text_columns:
    dropped_filtered_columns_dataframe[column] = dropped_filtered_columns_dataframe[column].str.lower()

# Display the updated DataFrame
dropped_filtered_columns_dataframe

Unnamed: 0,Subreddit,Stemmed Title,Stemmed Post Text,Lemmatized Title,Lemmatized Post Text,Unigrams Title,Bigrams Title,Unigrams Post Text,Bigrams Post Text
0,intermittentfasting,daili fast check in,type fast water juic smoke etc context fast start end day x y etc length fast 8 hour 3 day etc whi hope accomplish fast note go far concern insight share sure check back often comment get post throughout day sort comment new sure newer comment get love well,daily fasting check in,type fast water juice smoking etc context fast start end day x y etc length fast 8 hour 3 day etc why hope accomplish fast note going far concern insight share sure check back often comment get posted throughout day sort comment new sure newer comment get love well,"[daily, fasting, check, in]","[(daily, fasting), (fasting, check), (check, in)]","[type, fast, water, juice, smoking, etc, context, fast, start, end, day, x, y, etc, length, fast, 8, hours, 3, days, etc, why, hope, accomplish, fast, notes, going, far, concerns, insights, share, sure, check, back, often, comments, get, posted, throughout, day, sort, comments, new, sure, newer, comments, get, love, well]","[(type, fast), (fast, water), (water, juice), (juice, smoking), (smoking, etc), (etc, context), (context, fast), (fast, start), (start, end), (end, day), (day, x), (x, y), (y, etc), (etc, length), (length, fast), (fast, 8), (8, hours), (hours, 3), (3, days), (days, etc), (etc, why), (why, hope), (hope, accomplish), (accomplish, fast), (fast, notes), (notes, going), (going, far), (far, concerns), (concerns, insights), (insights, share), (share, sure), (sure, check), (check, back), (back, often), (often, comments), (comments, get), (get, posted), (posted, throughout), (throughout, day), (day, sort), (sort, comments), (comments, new), (new, sure), (sure, newer), (newer, comments), (comments, get), (get, love), (love, well)]"
1,intermittentfasting,f2958 246 134 112lb i ve maintain coupl month now ama weight loss,,f2958 246 134 112lbs i ve maintaining couple month now ama weight loss,,"[f2958, 246, 134, 112lbs, i, ve, maintaining, couple, months, now, ama, weight, loss]","[(f2958, 246), (246, 134), (134, 112lbs), (112lbs, i), (i, ve), (ve, maintaining), (maintaining, couple), (couple, months), (months, now), (now, ama), (ama, weight), (weight, loss)]",[],[]
2,intermittentfasting,50 lb 3 month,start june 19th pass 50 lb mark morn start 168 eat breakfast lunch onli slowli move 195 exercis 2 3 day week either row walk use eufi scale weigh morn manual measur everi sunday strict diet tri avoid much sugar possibl sw 305 cw 255 gw 220,50 lb 3 month,started june 19th passed 50 lb mark morning started 168 eating breakfast lunch only slowly moved 195 exercise 2 3 day week either rowing walking using eufy scale weigh morning manually measure every sunday strict diet trying avoid much sugar possible sw 305 cw 255 gw 220,"[50, lbs, 3, months]","[(50, lbs), (lbs, 3), (3, months)]","[started, june, 19th, passed, 50, lb, mark, morning, started, 168, eating, breakfast, lunch, only, slowly, moved, 195, exercise, 2, 3, days, week, either, rowing, walking, using, eufy, scale, weigh, morning, manually, measure, every, sunday, strict, diet, trying, avoid, much, sugar, possible, sw, 305, cw, 255, gw, 220]","[(started, june), (june, 19th), (19th, passed), (passed, 50), (50, lb), (lb, mark), (mark, morning), (morning, started), (started, 168), (168, eating), (eating, breakfast), (breakfast, lunch), (lunch, only), (only, slowly), (slowly, moved), (moved, 195), (195, exercise), (exercise, 2), (2, 3), (3, days), (days, week), (week, either), (either, rowing), (rowing, walking), (walking, using), (using, eufy), (eufy, scale), (scale, weigh), (weigh, morning), (morning, manually), (manually, measure), (measure, every), (every, sunday), (sunday, strict), (strict, diet), (diet, trying), (trying, avoid), (avoid, much), (much, sugar), (sugar, possible), (possible, sw), (sw, 305), (305, cw), (cw, 255), (255, gw), (gw, 220)]"
3,intermittentfasting,222 195 6 month,besid weight zero ach pain breath better medic blood work went 4 red flag 0,222 195 6 month,besides weight zero ache pain breathe better medical blood work went 4 red flag 0,"[222, 195, 6, months]","[(222, 195), (195, 6), (6, months)]","[besides, weight, zero, aches, pains, breathe, better, medical, blood, work, went, 4, red, flags, 0]","[(besides, weight), (weight, zero), (zero, aches), (aches, pains), (pains, breathe), (breathe, better), (better, medical), (medical, blood), (blood, work), (work, went), (went, 4), (4, red), (red, flags), (flags, 0)]"
4,intermittentfasting,far seem work,22f 410 sw 1434 cw 1382 gw 115 start 168 calori deficit 91323 last weigh 91923 went 1434lb 1382lb almost week i m probabl 138lb now i m go weigh end month height 5lb seem make big differ i m sure lot water weight bloat i m still happi though feel better give even motiv stick it,far seems working,22f 410 sw 1434 cw 1382 gw 115 started 168 calorie deficit 91323 last weighed 91923 went 1434lbs 1382lbs almost week i m probably 138lbs now i m going weigh end month height 5lbs seems make big difference i m sure lot water weight bloating i m still happy though feel better give even motivation stick it,"[far, seems, working]","[(far, seems), (seems, working)]","[22f, 410, sw, 1434, cw, 1382, gw, 115, started, 168, calorie, deficit, 91323, last, weighed, 91923, went, 1434lbs, 1382lbs, almost, week, i, m, probably, 138lbs, now, i, m, going, weigh, end, month, height, 5lbs, seems, make, big, difference, i, m, sure, lot, water, weight, bloating, i, m, still, happy, though, feel, better, gives, even, motivation, stick, it]","[(22f, 410), (410, sw), (sw, 1434), (1434, cw), (cw, 1382), (1382, gw), (gw, 115), (115, started), (started, 168), (168, calorie), (calorie, deficit), (deficit, 91323), (91323, last), (last, weighed), (weighed, 91923), (91923, went), (went, 1434lbs), (1434lbs, 1382lbs), (1382lbs, almost), (almost, week), (week, i), (i, m), (m, probably), (probably, 138lbs), (138lbs, now), (now, i), (i, m), (m, going), (going, weigh), (weigh, end), (end, month), (month, height), (height, 5lbs), (5lbs, seems), (seems, make), (make, big), (big, difference), (difference, i), (i, m), (m, sure), (sure, lot), (lot, water), (water, weight), (weight, bloating), (bloating, i), (i, m), (m, still), (still, happy), (happy, though), (though, feel), (feel, better), (better, gives), (gives, even), (even, motivation), (motivation, stick), (stick, it)]"
...,...,...,...,...,...,...,...,...,...
3958,anorexianervosa,anyon els fear becom obes,know fear gain weight pretti much univers thing anorexia anyon els fear actual obes like i m genuin terrifi start eat wo nt stop i ll keep eat eat never stop i m 300 pound lost self control stop restrict plu hear mani peopl go anorexia bing eat disord doe nt help either know it s like everyon me i m terrifi liter nightmar it imag happen make feel sick anxieti hope i m one cu feel crazi,anyone else fear becoming obese,know fear gaining weight pretty much universal thing anorexia anyone else fear actual obesity like i m genuinely terrified start eating wo nt stop i ll keep eating eating never stop i m 300 pound lost self control stopped restricting plus hearing many people go anorexia binge eating disorder doe nt help either know it s like everyone me i m terrified literally nightmare it imaging happening make feel sick anxiety hope i m one cu feel crazy,"[anyone, else, fear, becoming, obese]","[(anyone, else), (else, fear), (fear, becoming), (becoming, obese)]","[know, fear, gaining, weight, pretty, much, universal, thing, anorexia, anyone, else, fear, actual, obesity, like, i, m, genuinely, terrified, start, eating, wo, nt, stop, i, ll, keep, eating, eating, never, stop, i, m, 300, pounds, lost, self, control, stopped, restricting, plus, hearing, many, people, go, anorexia, binge, eating, disorder, does, nt, help, either, know, it, s, like, everyone, me, i, m, terrified, literally, nightmares, it, imaging, happening, makes, feel, sick, anxiety, hope, i, m, one, cus, feel, crazy]","[(know, fear), (fear, gaining), (gaining, weight), (weight, pretty), (pretty, much), (much, universal), (universal, thing), (thing, anorexia), (anorexia, anyone), (anyone, else), (else, fear), (fear, actual), (actual, obesity), (obesity, like), (like, i), (i, m), (m, genuinely), (genuinely, terrified), (terrified, start), (start, eating), (eating, wo), (wo, nt), (nt, stop), (stop, i), (i, ll), (ll, keep), (keep, eating), (eating, eating), (eating, never), (never, stop), (stop, i), (i, m), (m, 300), (300, pounds), (pounds, lost), (lost, self), (self, control), (control, stopped), (stopped, restricting), (restricting, plus), (plus, hearing), (hearing, many), (many, people), (people, go), (go, anorexia), (anorexia, binge), (binge, eating), (eating, disorder), (disorder, does), (does, nt), (nt, help), (help, either), (either, know), (know, it), (it, s), (s, like), (like, everyone), (everyone, me), (me, i), (i, m), (m, terrified), (terrified, literally), (literally, nightmares), (nightmares, it), (it, imaging), (imaging, happening), (happening, makes), (makes, feel), (feel, sick), (sick, anxiety), (anxiety, hope), (hope, i), (i, m), (m, one), (one, cus), (cus, feel), (feel, crazy)]"
3959,anorexianervosa,alon,i m 16 year old male who s suffer anorexia two year now enter 3rd year fake recov extent like 5 time everi time go back path restrict i m approach weight worst everyday deal constantli bodi shame famili friend idea i m go matter mani attempt i ve given explain fulli i m go through i m boy do nt get lot recognit struggl i m the skinni one doe nt eat lot peopl eye top live place there s virtual nowher go eat disord there s program peopl help me feel like i m approach end road point readi stop fight give up want eat want recov feel like it s forev cycl i m stuck restrict hit rock bottom bing restrict again,alone,i m 16 year old male who s suffering anorexia two year now entering 3rd year fake recovered extent like 5 time every time going back path restricting i m approaching weight worst everyday deal constantly body shamed family friend idea i m going matter many attempt i ve given explain fully i m going through i m boy do nt get lot recognition struggle i m the skinny one doe nt eat lot people eye top live place there s virtually nowhere go eating disorder there s program people help me feel like i m approaching end road point ready stop fighting give up want eat want recover feel like it s forever cycle i m stuck restricting hitting rock bottom binging restricting again,[alone],[],"[i, m, 16, year, old, male, who, s, suffering, anorexia, two, years, now, entering, 3rd, year, fake, recovered, extent, like, 5, times, every, time, going, back, path, restricting, i, m, approaching, weight, worst, everyday, deal, constantly, body, shamed, family, friends, idea, i, m, going, matter, many, attempts, i, ve, given, explain, fully, i, m, going, through, i, m, boy, do, nt, get, lot, recognition, struggles, i, m, the, skinny, one, does, nt, eat, lot, peoples, eyes, top, live, place, there, s, virtually, nowhere, go, eating, disorders, there, s, programs, people, help, me, feel, like, i, m, approaching, end, road, point, ...]","[(i, m), (m, 16), (16, year), (year, old), (old, male), (male, who), (who, s), (s, suffering), (suffering, anorexia), (anorexia, two), (two, years), (years, now), (now, entering), (entering, 3rd), (3rd, year), (year, fake), (fake, recovered), (recovered, extent), (extent, like), (like, 5), (5, times), (times, every), (every, time), (time, going), (going, back), (back, path), (path, restricting), (restricting, i), (i, m), (m, approaching), (approaching, weight), (weight, worst), (worst, everyday), (everyday, deal), (deal, constantly), (constantly, body), (body, shamed), (shamed, family), (family, friends), (friends, idea), (idea, i), (i, m), (m, going), (going, matter), (matter, many), (many, attempts), (attempts, i), (i, ve), (ve, given), (given, explain), (explain, fully), (fully, i), (i, m), (m, going), (going, through), (through, i), (i, m), (m, boy), (boy, do), (do, nt), (nt, get), (get, lot), (lot, recognition), (recognition, struggles), (struggles, i), (i, m), (m, the), (the, skinny), (skinny, one), (one, does), (does, nt), (nt, eat), (eat, lot), (lot, peoples), (peoples, eyes), (eyes, top), (top, live), (live, place), (place, there), (there, s), (s, virtually), (virtually, nowhere), (nowhere, go), (go, eating), (eating, disorders), (disorders, there), (there, s), (s, programs), (programs, people), (people, help), (help, me), (me, feel), (feel, like), (like, i), (i, m), (m, approaching), (approaching, end), (end, road), (road, point), (point, ready), ...]"
3960,anorexianervosa,chocol ice cream,ok lot use scare liquid calori i count ice cream toward liquid calori now i m eat chocol ice cream i m proud myself even feel sick becaus motion sick i m still go eat entir scoop ice cream,chocolate ice cream,ok lot use scared liquid calorie i count ice cream towards liquid calorie now i m eating chocolate ice cream i m proud myself even feel sick because motion sickness i m still going eat entire scoop ice cream,"[chocolate, ice, cream]","[(chocolate, ice), (ice, cream)]","[ok, lot, use, scared, liquid, calories, i, count, ice, cream, towards, liquid, calories, now, i, m, eating, chocolate, ice, cream, i, m, proud, myself, even, feel, sick, because, motion, sickness, i, m, still, going, eat, entire, scoop, ice, cream]","[(ok, lot), (lot, use), (use, scared), (scared, liquid), (liquid, calories), (calories, i), (i, count), (count, ice), (ice, cream), (cream, towards), (towards, liquid), (liquid, calories), (calories, now), (now, i), (i, m), (m, eating), (eating, chocolate), (chocolate, ice), (ice, cream), (cream, i), (i, m), (m, proud), (proud, myself), (myself, even), (even, feel), (feel, sick), (sick, because), (because, motion), (motion, sickness), (sickness, i), (i, m), (m, still), (still, going), (going, eat), (eat, entire), (entire, scoop), (scoop, ice), (ice, cream)]"
3961,anorexianervosa,experi oddli specif challeng right recov tempt back eat disord,y all find soon get good place recoveri someth come along want push right back whether it s trigger comment break up new relationship someon close go diet trigger situat know challeng come it s crazi quickli specif seem person weak like soon get better boyfriend ask out great also start eat disord first place year ago soooo hard relaps kinda bit i d love hear stori it s similar feel like i ve notic theme,experience oddly specific challenge right recovering tempt back eating disorder,y all find soon get good place recovery something come along want push right back whether it s triggering comment break up new relationship someone close going diet triggering situation know challenge come it s crazy quickly specific seem personal weakness like soon getting better boyfriend asked out great also starting eating disorder first place year ago soooo hard relapse kinda bit i d love hear story it s similar feel like i ve noticed theme,"[experience, oddly, specific, challenges, right, recovering, tempt, back, eating, disorder]","[(experience, oddly), (oddly, specific), (specific, challenges), (challenges, right), (right, recovering), (recovering, tempt), (tempt, back), (back, eating), (eating, disorder)]","[y, all, find, soon, get, good, place, recovery, something, comes, along, wants, push, right, back, whether, it, s, triggering, comment, break, up, new, relationship, someone, close, going, diet, triggering, situation, know, challenges, come, it, s, crazy, quickly, specific, seem, personal, weaknesses, like, soon, getting, better, boyfriend, asked, out, great, also, starting, eating, disorder, first, place, years, ago, soooo, hard, relapse, kinda, bit, i, d, love, hear, stories, it, s, similar, feel, like, i, ve, noticed, theme]","[(y, all), (all, find), (find, soon), (soon, get), (get, good), (good, place), (place, recovery), (recovery, something), (something, comes), (comes, along), (along, wants), (wants, push), (push, right), (right, back), (back, whether), (whether, it), (it, s), (s, triggering), (triggering, comment), (comment, break), (break, up), (up, new), (new, relationship), (relationship, someone), (someone, close), (close, going), (going, diet), (diet, triggering), (triggering, situation), (situation, know), (know, challenges), (challenges, come), (come, it), (it, s), (s, crazy), (crazy, quickly), (quickly, specific), (specific, seem), (seem, personal), (personal, weaknesses), (weaknesses, like), (like, soon), (soon, getting), (getting, better), (better, boyfriend), (boyfriend, asked), (asked, out), (out, great), (great, also), (also, starting), (starting, eating), (eating, disorder), (disorder, first), (first, place), (place, years), (years, ago), (ago, soooo), (soooo, hard), (hard, relapse), (relapse, kinda), (kinda, bit), (bit, i), (i, d), (d, love), (love, hear), (hear, stories), (stories, it), (it, s), (s, similar), (similar, feel), (feel, like), (like, i), (i, ve), (ve, noticed), (noticed, theme)]"


In [13]:
# Combine the text from both "Unigrams Title" and "Unigrams Post Text" into a single column
dropped_filtered_columns_dataframe['Combined Unigrams'] = dropped_filtered_columns_dataframe['Unigrams Title'] + dropped_filtered_columns_dataframe['Unigrams Post Text']

# Convert the lists of unigrams into strings
dropped_filtered_columns_dataframe['Combined Unigrams'] = dropped_filtered_columns_dataframe['Combined Unigrams'].apply(lambda x: ' '.join(x))

# Initialize the CountVectorizer
vectorizer = CountVectorizer()

# Fit and transform the combined unigrams
unigram_counts = vectorizer.fit_transform(dropped_filtered_columns_dataframe['Combined Unigrams'])

# Get the feature names (unigrams)
feature_names = vectorizer.get_feature_names_out()

# Create a DataFrame to store the counts
unigram_counts_df = pd.DataFrame(unigram_counts.toarray(), columns=feature_names)

# Add the subreddit column back to the DataFrame
unigram_counts_df['Subreddit'] = dropped_filtered_columns_dataframe['Subreddit']

# Group the DataFrame by "Subreddit" and sum the counts
grouped_unigram_counts = unigram_counts_df.groupby('Subreddit').sum()

# Get a list of unique subreddits
unique_subreddits = dropped_filtered_columns_dataframe['Subreddit'].unique()

# Dictionary to store unigram counts for each subreddit
subreddit_unigram_counts = {}

# Loop through each unique subreddit
for subreddit_name in unique_subreddits:
    # Get the specific unigram count for the subreddit
    specific_unigram_count = grouped_unigram_counts.loc[subreddit_name]
    
    # To get the top N most common unigrams for a subreddit, you can use:
    top_n = 10  # Replace with the desired number
    top_n_unigrams = specific_unigram_count.nlargest(top_n)
    
    # Store the top N unigrams in the dictionary
    subreddit_unigram_counts[subreddit_name] = top_n_unigrams

# Loop through each subreddit and its top N unigrams
for subreddit_name, top_n_unigrams in subreddit_unigram_counts.items():
    print(f"Subreddit: {subreddit_name}")
    print(top_n_unigrams)
    print()

Subreddit: intermittentfasting
weight     815
fasting    764
ve         653
fast       645
day        567
nt         531
it         472
eating     467
days       447
eat        411
Name: intermittentfasting, dtype: int64

Subreddit: anorexianervosa
nt        2334
like      1819
feel      1423
it        1262
weight    1152
do        1130
ve        1077
eating    1031
know      1030
eat       1003
Name: anorexianervosa, dtype: int64

