In [1]:
# Standard Library Imports
import pandas as pd
import re
import time
import datetime
from collections import defaultdict, Counter

# Third-party Library Imports
import requests
from bs4 import BeautifulSoup
import praw
import nltk
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.util import bigrams
from sklearn.feature_extraction.text import CountVectorizer
import spacy
import string

In [2]:
# Set pandas display options to show the entire content of the "Post Text" column
pd.set_option('display.max_colwidth', None)

In [3]:
# Specify the path to your CSV file
csv_file_path = 'reddits.csv'

# Read the CSV file into a DataFrame
df_reddit_post = pd.read_csv(csv_file_path)

# Now, 'df' contains your data from the CSV file

len(df_reddit_post)

2489

In [4]:
df_reddit_post.head()

Unnamed: 0,title,post_text,id,score,total_comments,post_url,subreddit,post_type,title_&_text,title_text_stemmed,title_text_lemmatized,trending
0,Daily Fasting Check-in!,"* **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.",16o7z6r,1,2,https://www.reddit.com/r/intermittentfasting/comments/16o7z6r/daily_fasting_checkin/,intermittentfasting,hot,"Daily Fasting Check-in! * **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.","['daili', 'checkin', 'type', 'fast', 'water', 'juic', 'smoke', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'go', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'post', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']","['daily', 'checkin', 'type', 'fast', 'water', 'juice', 'smoking', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'going', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'posted', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']",2
1,I decided who I wanted to be and I became her 💅🏽,"So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎",16ntqoy,1176,36,https://i.redd.it/fclkjnwhmgpb1.jpg,intermittentfasting,hot,"I decided who I wanted to be and I became her 💅🏽 So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎","['decid', 'want', 'becam', 'littl', 'background', '39', 'birth', 'two', 'children', '5', '2', 'use', 'weigh', '220', 'lb', 'decad', 'sit', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'sinc', 'start', 'lifestyl', 'first', 'gain', 'weight', 'back', 'plu', 'fall', 'bit', 'last', 'year', 'gain', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'someth', 'though', 'handl', 'stress', 'give', 'setback', 'grate', 'stage', 'journey', 'current', 'mostli', '204', 'schedul', 'eat', 'whatev', 'satieti', 'within', 'window', 'rest', 'chillin', '']","['decided', 'wanted', 'became', 'little', 'background', '39', 'birthed', 'two', 'child', '5', '2', 'used', 'weigh', '220', 'lb', 'decade', 'sitting', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'since', 'started', 'lifestyle', 'first', 'gain', 'weight', 'back', 'plus', 'fall', 'bit', 'last', 'year', 'gained', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'something', 'though', 'handle', 'stress', 'giving', 'setback', 'grateful', 'stage', 'journey', 'currently', 'mostly', '204', 'schedule', 'eating', 'whatever', 'satiety', 'within', 'window', 'rest', 'chillin', '']",42336
2,Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb!,"I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!",16ni914,1505,77,https://www.reddit.com/gallery/16ni914,intermittentfasting,hot,"Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb! I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!","['photo', 'past', 'vacat', 'came', 'memori', 'today', 'realli', 'emot', 'look', 'back', 'far', 'come', 'start', '2020', 'kept', 'sinc', '225lb', '125lb', 'rememb', 'miser', 'insecur', 'entir', 'trip', 'realli', 'hard', 'enjoy', 'weight', 'thank', 'better', 'place', 'physic', 'mental', 'day']","['photo', 'past', 'vacation', 'came', 'memory', 'today', 'really', 'emotional', 'looking', 'back', 'far', 'come', 'started', '2020', 'kept', 'since', '225lb', '125lb', 'remember', 'miserable', 'insecure', 'entire', 'trip', 'really', 'hard', 'enjoy', 'weight', 'thankful', 'better', 'place', 'physically', 'mentally', 'day']",115885
3,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight?",I know I am an idiot.,16nuqx9,198,78,https://www.reddit.com/r/intermittentfasting/comments/16nuqx9/anybody_find_if_lose_weight_and_then_lose/,intermittentfasting,hot,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight? I know I am an idiot.","['anybodi', 'find', 'lose', 'weight', 'lose', 'motiv', 'alway', 'lose', 'weight', 'procrastin', 'lose', 'weight', 'idiot']","['anybody', 'find', 'lose', 'weight', 'lose', 'motivation', 'always', 'lose', 'weight', 'procrastinate', 'losing', 'weight', 'idiot']",15444
4,2 and a half months of IF,"From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !",16nuxqs,180,12,https://i.redd.it/30yqmtsdvgpb1.jpg,intermittentfasting,hot,"2 and a half months of IF From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplin', 'becam', 'easi', 'stay', 'consist', 'rapid', 'weight', 'gain', 'due', 'medic', 'feel', 'much', 'confid', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplined', 'became', 'easy', 'stay', 'consistent', 'rapid', 'weight', 'gain', 'due', 'medication', 'feeling', 'much', 'confident', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']",2160


In [5]:
# Define the regex pattern
pattern = r'^https?:\/\/.*[\r\n]*'

# Apply the regex substitution to the 'Text_Column' using apply and a lambda function
df_reddit_post['post_url'] = df_reddit_post['post_url'].apply(lambda text: re.sub(pattern, '', text, flags=re.MULTILINE))

# Display the updated DataFrame
df_reddit_post

Unnamed: 0,title,post_text,id,score,total_comments,post_url,subreddit,post_type,title_&_text,title_text_stemmed,title_text_lemmatized,trending
0,Daily Fasting Check-in!,"* **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.",16o7z6r,1,2,,intermittentfasting,hot,"Daily Fasting Check-in! * **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.","['daili', 'checkin', 'type', 'fast', 'water', 'juic', 'smoke', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'go', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'post', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']","['daily', 'checkin', 'type', 'fast', 'water', 'juice', 'smoking', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'going', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'posted', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']",2
1,I decided who I wanted to be and I became her 💅🏽,"So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎",16ntqoy,1176,36,,intermittentfasting,hot,"I decided who I wanted to be and I became her 💅🏽 So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎","['decid', 'want', 'becam', 'littl', 'background', '39', 'birth', 'two', 'children', '5', '2', 'use', 'weigh', '220', 'lb', 'decad', 'sit', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'sinc', 'start', 'lifestyl', 'first', 'gain', 'weight', 'back', 'plu', 'fall', 'bit', 'last', 'year', 'gain', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'someth', 'though', 'handl', 'stress', 'give', 'setback', 'grate', 'stage', 'journey', 'current', 'mostli', '204', 'schedul', 'eat', 'whatev', 'satieti', 'within', 'window', 'rest', 'chillin', '']","['decided', 'wanted', 'became', 'little', 'background', '39', 'birthed', 'two', 'child', '5', '2', 'used', 'weigh', '220', 'lb', 'decade', 'sitting', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'since', 'started', 'lifestyle', 'first', 'gain', 'weight', 'back', 'plus', 'fall', 'bit', 'last', 'year', 'gained', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'something', 'though', 'handle', 'stress', 'giving', 'setback', 'grateful', 'stage', 'journey', 'currently', 'mostly', '204', 'schedule', 'eating', 'whatever', 'satiety', 'within', 'window', 'rest', 'chillin', '']",42336
2,Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb!,"I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!",16ni914,1505,77,,intermittentfasting,hot,"Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb! I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!","['photo', 'past', 'vacat', 'came', 'memori', 'today', 'realli', 'emot', 'look', 'back', 'far', 'come', 'start', '2020', 'kept', 'sinc', '225lb', '125lb', 'rememb', 'miser', 'insecur', 'entir', 'trip', 'realli', 'hard', 'enjoy', 'weight', 'thank', 'better', 'place', 'physic', 'mental', 'day']","['photo', 'past', 'vacation', 'came', 'memory', 'today', 'really', 'emotional', 'looking', 'back', 'far', 'come', 'started', '2020', 'kept', 'since', '225lb', '125lb', 'remember', 'miserable', 'insecure', 'entire', 'trip', 'really', 'hard', 'enjoy', 'weight', 'thankful', 'better', 'place', 'physically', 'mentally', 'day']",115885
3,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight?",I know I am an idiot.,16nuqx9,198,78,,intermittentfasting,hot,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight? I know I am an idiot.","['anybodi', 'find', 'lose', 'weight', 'lose', 'motiv', 'alway', 'lose', 'weight', 'procrastin', 'lose', 'weight', 'idiot']","['anybody', 'find', 'lose', 'weight', 'lose', 'motivation', 'always', 'lose', 'weight', 'procrastinate', 'losing', 'weight', 'idiot']",15444
4,2 and a half months of IF,"From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !",16nuxqs,180,12,,intermittentfasting,hot,"2 and a half months of IF From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplin', 'becam', 'easi', 'stay', 'consist', 'rapid', 'weight', 'gain', 'due', 'medic', 'feel', 'much', 'confid', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplined', 'became', 'easy', 'stay', 'consistent', 'rapid', 'weight', 'gain', 'due', 'medication', 'feeling', 'much', 'confident', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']",2160
...,...,...,...,...,...,...,...,...,...,...,...,...
2484,anyone else have a fear of becoming obese?,"i know that fear of gaining weight is a pretty much universal thing for anorexia, but does anyone else have a fear of actual obesity? like i’m genuinely terrified that once i start eating more i just won’t stop. i’ll just keep eating and eating and never stop until i’m 300 pounds because i lost all my self control when i stopped restricting. plus hearing about how many people go from anorexia to binge eating disorder doesn’t help either. i know it’s not like that for everyone but what if it is for me? i’m so terrified of it that i literally have nightmares about it, and imaging it happening makes me feel sick with anxiety. i just hope i’m not the only one cus i feel crazy",kwdigq,89,24,,AnorexiaNervosa,top,"anyone else have a fear of becoming obese? i know that fear of gaining weight is a pretty much universal thing for anorexia, but does anyone else have a fear of actual obesity? like i’m genuinely terrified that once i start eating more i just won’t stop. i’ll just keep eating and eating and never stop until i’m 300 pounds because i lost all my self control when i stopped restricting. plus hearing about how many people go from anorexia to binge eating disorder doesn’t help either. i know it’s not like that for everyone but what if it is for me? i’m so terrified of it that i literally have nightmares about it, and imaging it happening makes me feel sick with anxiety. i just hope i’m not the only one cus i feel crazy","['anyon', 'els', 'fear', 'becom', 'obes', 'fear', 'gain', 'weight', 'pretti', 'much', 'univers', 'thing', 'anyon', 'els', 'fear', 'actual', 'obes', 'genuin', 'terrifi', 'start', 'eat', 'stop', 'keep', 'eat', 'eat', 'never', 'stop', '300', 'pound', 'lost', 'self', 'control', 'stop', 'restrict', 'plu', 'hear', 'mani', 'peopl', 'go', 'bing', 'eat', 'disord', 'help', 'either', 'everyon', 'terrifi', 'liter', 'nightmar', 'imag', 'happen', 'make', 'sick', 'anxieti', 'hope', 'one', 'cu', 'crazi']","['anyone', 'else', 'fear', 'becoming', 'obese', 'fear', 'gaining', 'weight', 'pretty', 'much', 'universal', 'thing', 'anyone', 'else', 'fear', 'actual', 'obesity', 'genuinely', 'terrified', 'start', 'eating', 'stop', 'keep', 'eating', 'eating', 'never', 'stop', '300', 'pound', 'lost', 'self', 'control', 'stopped', 'restricting', 'plus', 'hearing', 'many', 'people', 'go', 'binge', 'eating', 'disorder', 'help', 'either', 'everyone', 'terrified', 'literally', 'nightmare', 'imaging', 'happening', 'make', 'sick', 'anxiety', 'hope', 'one', 'cu', 'crazy']",2136
2485,I am so alone.,"I’m a 16 year old male who’s been suffering from anorexia for two years now, entering my 3rd year and have been fake recovered to some extent like 5 times now but every time has me going back down the same path I was on when restricting.\n\nI’m approaching the weight I was at when I was at my worst and everyday I have to deal with constantly being body shamed by my family/friends who have no idea what I’m going through no matter how many attempts I’ve given to explain fully about what I’m going through.\n\nBecause I’m a boy I don’t get a lot of recognition for these struggles, I’m just “the skinny one who doesn’t eat a lot” in peoples eyes.\n\nOn top of this I live in a place where there’s virtually nowhere to go for eating disorders, there’s no programs or people to help me. I feel like I’m approaching the end of my road and at this point I am ready to stop fighting and give up. I want to eat, I want to recover, but I feel like it’s a forever cycle I’m stuck on of restricting, hitting rock bottom, binging, and then restricting again.",kq3m1u,87,15,,AnorexiaNervosa,top,"I am so alone. I’m a 16 year old male who’s been suffering from anorexia for two years now, entering my 3rd year and have been fake recovered to some extent like 5 times now but every time has me going back down the same path I was on when restricting.\n\nI’m approaching the weight I was at when I was at my worst and everyday I have to deal with constantly being body shamed by my family/friends who have no idea what I’m going through no matter how many attempts I’ve given to explain fully about what I’m going through.\n\nBecause I’m a boy I don’t get a lot of recognition for these struggles, I’m just “the skinny one who doesn’t eat a lot” in peoples eyes.\n\nOn top of this I live in a place where there’s virtually nowhere to go for eating disorders, there’s no programs or people to help me. I feel like I’m approaching the end of my road and at this point I am ready to stop fighting and give up. I want to eat, I want to recover, but I feel like it’s a forever cycle I’m stuck on of restricting, hitting rock bottom, binging, and then restricting again.","['alon', '16', 'year', 'old', 'male', 'suffer', 'two', 'year', 'enter', '3rd', 'year', 'fake', 'recov', 'extent', '5', 'time', 'go', 'back', 'path', 'restrict', 'approach', 'weight', 'worst', 'everyday', 'deal', 'constantli', 'bodi', 'shame', 'familyfriend', 'idea', 'go', 'matter', 'mani', 'attempt', 'given', 'explain', 'fulli', 'go', 'boy', 'get', 'lot', 'recognit', 'struggl', 'skinni', 'one', 'eat', 'lot', 'peopl', 'eye', 'top', 'live', 'place', 'virtual', 'nowher', 'go', 'eat', 'disord', 'program', 'peopl', 'help', 'approach', 'end', 'road', 'point', 'readi', 'stop', 'fight', 'give', 'eat', 'recov', 'forev', 'cycl', 'stuck', 'restrict', 'hit', 'rock', 'bottom', 'bing', 'restrict']","['alone', '16', 'year', 'old', 'male', 'suffering', 'two', 'year', 'entering', '3rd', 'year', 'fake', 'recovered', 'extent', '5', 'time', 'going', 'back', 'path', 'restricting', 'approaching', 'weight', 'worst', 'everyday', 'deal', 'constantly', 'body', 'shamed', 'familyfriends', 'idea', 'going', 'matter', 'many', 'attempt', 'given', 'explain', 'fully', 'going', 'boy', 'get', 'lot', 'recognition', 'struggle', 'skinny', 'one', 'eat', 'lot', 'people', 'eye', 'top', 'live', 'place', 'virtually', 'nowhere', 'go', 'eating', 'disorder', 'program', 'people', 'help', 'approaching', 'end', 'road', 'point', 'ready', 'stop', 'fighting', 'give', 'eat', 'recover', 'forever', 'cycle', 'stuck', 'restricting', 'hitting', 'rock', 'bottom', 'binging', 'restricting']",1305
2486,Chocolate Ice Cream!,"Ok, so it's not a lot, and I didn't use to be as scared of liquid calories (I count ice cream towards liquid calories) as I am now. But I'm eating chocolate ice cream! And I'm so proud of myself! Even if I do feel sick (because of motion sickness), I'm still going to eat this entire scoop of ice cream!",kc3nau,87,16,,AnorexiaNervosa,top,"Chocolate Ice Cream! Ok, so it's not a lot, and I didn't use to be as scared of liquid calories (I count ice cream towards liquid calories) as I am now. But I'm eating chocolate ice cream! And I'm so proud of myself! Even if I do feel sick (because of motion sickness), I'm still going to eat this entire scoop of ice cream!","['chocol', 'ice', 'cream', 'ok', 'lot', 'didnt', 'use', 'scare', 'liquid', 'calori', 'count', 'ice', 'cream', 'toward', 'liquid', 'calori', 'eat', 'chocol', 'ice', 'cream', 'proud', 'even', 'sick', 'motion', 'sick', 'still', 'go', 'eat', 'entir', 'scoop', 'ice', 'cream']","['chocolate', 'ice', 'cream', 'ok', 'lot', 'didnt', 'use', 'scared', 'liquid', 'calorie', 'count', 'ice', 'cream', 'towards', 'liquid', 'calorie', 'eating', 'chocolate', 'ice', 'cream', 'proud', 'even', 'sick', 'motion', 'sickness', 'still', 'going', 'eat', 'entire', 'scoop', 'ice', 'cream']",1392
2487,do you experience oDdLy specific challenges RIGHT after recovering that tempt you back into your eating disorder?,"Do y’all find that as SOON as you get to a good place in recovery something comes along that wants to push you right back? Whether it’s a triggering comment, a break up, a new relationship, someone close to you going on a diet, or some other triggering situation? \n\nI know that challenges will come but it’s just crazy how quickly and specific they seem to be to your own personal weaknesses. Like for me as soon as I was getting better my now boyfriend asked me out. Which was great but also what starting my eating disorder in the first place years ago so it was soooo hard to not relapse (& I kinda did a bit). \n\nI’d love to hear your stories with this if it’s similar! I just feel like I’ve noticed this theme.",k4lpsn,88,17,,AnorexiaNervosa,top,"do you experience oDdLy specific challenges RIGHT after recovering that tempt you back into your eating disorder? Do y’all find that as SOON as you get to a good place in recovery something comes along that wants to push you right back? Whether it’s a triggering comment, a break up, a new relationship, someone close to you going on a diet, or some other triggering situation? \n\nI know that challenges will come but it’s just crazy how quickly and specific they seem to be to your own personal weaknesses. Like for me as soon as I was getting better my now boyfriend asked me out. Which was great but also what starting my eating disorder in the first place years ago so it was soooo hard to not relapse (& I kinda did a bit). \n\nI’d love to hear your stories with this if it’s similar! I just feel like I’ve noticed this theme.","['experi', 'oddli', 'specif', 'challeng', 'right', 'recov', 'tempt', 'back', 'eat', 'disord', 'find', 'soon', 'get', 'good', 'place', 'recoveri', 'someth', 'come', 'along', 'want', 'push', 'right', 'back', 'whether', 'trigger', 'comment', 'break', 'new', 'relationship', 'someon', 'close', 'go', 'diet', 'trigger', 'situat', 'challeng', 'come', 'crazi', 'quickli', 'specif', 'seem', 'person', 'weak', 'soon', 'get', 'better', 'boyfriend', 'ask', 'great', 'also', 'start', 'eat', 'disord', 'first', 'place', 'year', 'ago', 'soooo', 'hard', 'relaps', 'kinda', 'bit', 'love', 'hear', 'stori', 'similar', 'notic', 'theme']","['experience', 'oddly', 'specific', 'challenge', 'right', 'recovering', 'tempt', 'back', 'eating', 'disorder', 'find', 'soon', 'get', 'good', 'place', 'recovery', 'something', 'come', 'along', 'want', 'push', 'right', 'back', 'whether', 'triggering', 'comment', 'break', 'new', 'relationship', 'someone', 'close', 'going', 'diet', 'triggering', 'situation', 'challenge', 'come', 'crazy', 'quickly', 'specific', 'seem', 'personal', 'weakness', 'soon', 'getting', 'better', 'boyfriend', 'asked', 'great', 'also', 'starting', 'eating', 'disorder', 'first', 'place', 'year', 'ago', 'soooo', 'hard', 'relapse', 'kinda', 'bit', 'love', 'hear', 'story', 'similar', 'noticed', 'theme']",1496


In [6]:
# Define a regular expression pattern to identify image links
image_pattern = r'\b(?:https?://\S+\.(?:jpg|jpeg|png|gif|bmp|svg|webp))\b'

# Create a function to check if any column contains an image link
def contains_image_link(row):
    for column in df_reddit_post.columns:
        if isinstance(row[column], str) and re.search(image_pattern, row[column], re.IGNORECASE):
            return True
    return False

# Apply the function to filter rows with image links
non_image_posts = df_reddit_post[~df_reddit_post.apply(contains_image_link, axis=1)]

# Save the non-image posts to a new CSV file
non_image_posts.to_csv('reddit_posts_non_image.csv', index=False)

In [7]:
non_image_posts.head()

Unnamed: 0,title,post_text,id,score,total_comments,post_url,subreddit,post_type,title_&_text,title_text_stemmed,title_text_lemmatized,trending
0,Daily Fasting Check-in!,"* **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.",16o7z6r,1,2,,intermittentfasting,hot,"Daily Fasting Check-in! * **Type** of fast (water, juice, smoking, etc.)\n* **Context** of fast (start, end, day x of y, etc.)\n* **Length** of fast (8 hours, 3 days, etc.)\n* **Why?** What you hope to accomplish with your fast\n* **Notes** How is it going so far? Any concerns? Insights to share?\n\nBe sure to check back often as comments get posted throughout the day. Sort comments by ""new"" to be sure the newer comments get some love as well.","['daili', 'checkin', 'type', 'fast', 'water', 'juic', 'smoke', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'go', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'post', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']","['daily', 'checkin', 'type', 'fast', 'water', 'juice', 'smoking', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'going', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'posted', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']",2
1,I decided who I wanted to be and I became her 💅🏽,"So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎",16ntqoy,1176,36,,intermittentfasting,hot,"I decided who I wanted to be and I became her 💅🏽 So a little background: I’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. I’m sitting around 150 lbs right now. It’s been almost 3 years since I started this intermittent fasting lifestyle and it’s the first time I didn’t gain ALL OF the weight back plus more. But I did fall off a bit last year and gained back about 25 lbs, 20 of which I’ve lost again. That regain did teach me something though. About how to handle stress. About not giving up when there are setbacks. About being grateful for every stage of the journey you’re at. I’ve currently been doing mostly a 20/4 schedule but eating whatever I want to satiety within my window… and the rest of the time I’m just chillin’. 😎","['decid', 'want', 'becam', 'littl', 'background', '39', 'birth', 'two', 'children', '5', '2', 'use', 'weigh', '220', 'lb', 'decad', 'sit', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'sinc', 'start', 'lifestyl', 'first', 'gain', 'weight', 'back', 'plu', 'fall', 'bit', 'last', 'year', 'gain', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'someth', 'though', 'handl', 'stress', 'give', 'setback', 'grate', 'stage', 'journey', 'current', 'mostli', '204', 'schedul', 'eat', 'whatev', 'satieti', 'within', 'window', 'rest', 'chillin', '']","['decided', 'wanted', 'became', 'little', 'background', '39', 'birthed', 'two', 'child', '5', '2', 'used', 'weigh', '220', 'lb', 'decade', 'sitting', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'since', 'started', 'lifestyle', 'first', 'gain', 'weight', 'back', 'plus', 'fall', 'bit', 'last', 'year', 'gained', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'something', 'though', 'handle', 'stress', 'giving', 'setback', 'grateful', 'stage', 'journey', 'currently', 'mostly', '204', 'schedule', 'eating', 'whatever', 'satiety', 'within', 'window', 'rest', 'chillin', '']",42336
2,Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb!,"I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!",16ni914,1505,77,,intermittentfasting,hot,"Some photos from a past vacation came up as a memory today. Really emotional looking back at how far I’ve come. Started IF in 2020 & have kept up with it since then. 225lb to 125lb! I remember being miserable and insecure the entire time I was on this trip, it was really hard to enjoy myself. It was about more than just the weight- thankful to be in a better place physically and mentally these days!!","['photo', 'past', 'vacat', 'came', 'memori', 'today', 'realli', 'emot', 'look', 'back', 'far', 'come', 'start', '2020', 'kept', 'sinc', '225lb', '125lb', 'rememb', 'miser', 'insecur', 'entir', 'trip', 'realli', 'hard', 'enjoy', 'weight', 'thank', 'better', 'place', 'physic', 'mental', 'day']","['photo', 'past', 'vacation', 'came', 'memory', 'today', 'really', 'emotional', 'looking', 'back', 'far', 'come', 'started', '2020', 'kept', 'since', '225lb', '125lb', 'remember', 'miserable', 'insecure', 'entire', 'trip', 'really', 'hard', 'enjoy', 'weight', 'thankful', 'better', 'place', 'physically', 'mentally', 'day']",115885
3,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight?",I know I am an idiot.,16nuqx9,198,78,,intermittentfasting,hot,"Anybody find IF, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight? I know I am an idiot.","['anybodi', 'find', 'lose', 'weight', 'lose', 'motiv', 'alway', 'lose', 'weight', 'procrastin', 'lose', 'weight', 'idiot']","['anybody', 'find', 'lose', 'weight', 'lose', 'motivation', 'always', 'lose', 'weight', 'procrastinate', 'losing', 'weight', 'idiot']",15444
4,2 and a half months of IF,"From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !",16nuxqs,180,12,,intermittentfasting,hot,"2 and a half months of IF From 234 to 211 in 2.5 months. It works! Once I got disciplined with fasting it became so easy to stay consistent. I had rapid weight gain due to a medication but now feeling much more confident. To compliment IF, I do cardio 3-4 times a week. 11 more pounds to go !! You can do this !","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplin', 'becam', 'easi', 'stay', 'consist', 'rapid', 'weight', 'gain', 'due', 'medic', 'feel', 'much', 'confid', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplined', 'became', 'easy', 'stay', 'consistent', 'rapid', 'weight', 'gain', 'due', 'medication', 'feeling', 'much', 'confident', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']",2160


In [8]:
# Iterate through all columns and convert text data to lowercase
for column in non_image_posts.columns:
    if non_image_posts[column].dtype == 'object':
        non_image_posts[column] = non_image_posts[column].str.lower()

# Print the resulting DataFrame
non_image_posts.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_image_posts[column] = non_image_posts[column].str.lower()


Unnamed: 0,title,post_text,id,score,total_comments,post_url,subreddit,post_type,title_&_text,title_text_stemmed,title_text_lemmatized,trending
0,daily fasting check-in!,"* **type** of fast (water, juice, smoking, etc.)\n* **context** of fast (start, end, day x of y, etc.)\n* **length** of fast (8 hours, 3 days, etc.)\n* **why?** what you hope to accomplish with your fast\n* **notes** how is it going so far? any concerns? insights to share?\n\nbe sure to check back often as comments get posted throughout the day. sort comments by ""new"" to be sure the newer comments get some love as well.",16o7z6r,1,2,,intermittentfasting,hot,"daily fasting check-in! * **type** of fast (water, juice, smoking, etc.)\n* **context** of fast (start, end, day x of y, etc.)\n* **length** of fast (8 hours, 3 days, etc.)\n* **why?** what you hope to accomplish with your fast\n* **notes** how is it going so far? any concerns? insights to share?\n\nbe sure to check back often as comments get posted throughout the day. sort comments by ""new"" to be sure the newer comments get some love as well.","['daili', 'checkin', 'type', 'fast', 'water', 'juic', 'smoke', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'go', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'post', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']","['daily', 'checkin', 'type', 'fast', 'water', 'juice', 'smoking', 'etc', 'context', 'fast', 'start', 'end', 'day', 'x', 'etc', 'length', 'fast', '8', 'hour', '3', 'day', 'etc', 'hope', 'accomplish', 'fast', 'note', 'going', 'far', 'concern', 'insight', 'share', 'sure', 'check', 'back', 'often', 'comment', 'get', 'posted', 'throughout', 'day', 'sort', 'comment', 'new', 'sure', 'newer', 'comment', 'get', 'love', 'well']",2
1,i decided who i wanted to be and i became her 💅🏽,"so a little background: i’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. i’m sitting around 150 lbs right now. it’s been almost 3 years since i started this intermittent fasting lifestyle and it’s the first time i didn’t gain all of the weight back plus more. but i did fall off a bit last year and gained back about 25 lbs, 20 of which i’ve lost again. that regain did teach me something though. about how to handle stress. about not giving up when there are setbacks. about being grateful for every stage of the journey you’re at. i’ve currently been doing mostly a 20/4 schedule but eating whatever i want to satiety within my window… and the rest of the time i’m just chillin’. 😎",16ntqoy,1176,36,,intermittentfasting,hot,"i decided who i wanted to be and i became her 💅🏽 so a little background: i’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. i’m sitting around 150 lbs right now. it’s been almost 3 years since i started this intermittent fasting lifestyle and it’s the first time i didn’t gain all of the weight back plus more. but i did fall off a bit last year and gained back about 25 lbs, 20 of which i’ve lost again. that regain did teach me something though. about how to handle stress. about not giving up when there are setbacks. about being grateful for every stage of the journey you’re at. i’ve currently been doing mostly a 20/4 schedule but eating whatever i want to satiety within my window… and the rest of the time i’m just chillin’. 😎","['decid', 'want', 'becam', 'littl', 'background', '39', 'birth', 'two', 'children', '5', '2', 'use', 'weigh', '220', 'lb', 'decad', 'sit', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'sinc', 'start', 'lifestyl', 'first', 'gain', 'weight', 'back', 'plu', 'fall', 'bit', 'last', 'year', 'gain', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'someth', 'though', 'handl', 'stress', 'give', 'setback', 'grate', 'stage', 'journey', 'current', 'mostli', '204', 'schedul', 'eat', 'whatev', 'satieti', 'within', 'window', 'rest', 'chillin', '']","['decided', 'wanted', 'became', 'little', 'background', '39', 'birthed', 'two', 'child', '5', '2', 'used', 'weigh', '220', 'lb', 'decade', 'sitting', 'around', '150', 'lb', 'right', 'almost', '3', 'year', 'since', 'started', 'lifestyle', 'first', 'gain', 'weight', 'back', 'plus', 'fall', 'bit', 'last', 'year', 'gained', 'back', '25', 'lb', '20', 'lost', 'regain', 'teach', 'something', 'though', 'handle', 'stress', 'giving', 'setback', 'grateful', 'stage', 'journey', 'currently', 'mostly', '204', 'schedule', 'eating', 'whatever', 'satiety', 'within', 'window', 'rest', 'chillin', '']",42336
2,some photos from a past vacation came up as a memory today. really emotional looking back at how far i’ve come. started if in 2020 & have kept up with it since then. 225lb to 125lb!,"i remember being miserable and insecure the entire time i was on this trip, it was really hard to enjoy myself. it was about more than just the weight- thankful to be in a better place physically and mentally these days!!",16ni914,1505,77,,intermittentfasting,hot,"some photos from a past vacation came up as a memory today. really emotional looking back at how far i’ve come. started if in 2020 & have kept up with it since then. 225lb to 125lb! i remember being miserable and insecure the entire time i was on this trip, it was really hard to enjoy myself. it was about more than just the weight- thankful to be in a better place physically and mentally these days!!","['photo', 'past', 'vacat', 'came', 'memori', 'today', 'realli', 'emot', 'look', 'back', 'far', 'come', 'start', '2020', 'kept', 'sinc', '225lb', '125lb', 'rememb', 'miser', 'insecur', 'entir', 'trip', 'realli', 'hard', 'enjoy', 'weight', 'thank', 'better', 'place', 'physic', 'mental', 'day']","['photo', 'past', 'vacation', 'came', 'memory', 'today', 'really', 'emotional', 'looking', 'back', 'far', 'come', 'started', '2020', 'kept', 'since', '225lb', '125lb', 'remember', 'miserable', 'insecure', 'entire', 'trip', 'really', 'hard', 'enjoy', 'weight', 'thankful', 'better', 'place', 'physically', 'mentally', 'day']",115885
3,"anybody find if, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight?",i know i am an idiot.,16nuqx9,198,78,,intermittentfasting,hot,"anybody find if, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight? i know i am an idiot.","['anybodi', 'find', 'lose', 'weight', 'lose', 'motiv', 'alway', 'lose', 'weight', 'procrastin', 'lose', 'weight', 'idiot']","['anybody', 'find', 'lose', 'weight', 'lose', 'motivation', 'always', 'lose', 'weight', 'procrastinate', 'losing', 'weight', 'idiot']",15444
4,2 and a half months of if,"from 234 to 211 in 2.5 months. it works! once i got disciplined with fasting it became so easy to stay consistent. i had rapid weight gain due to a medication but now feeling much more confident. to compliment if, i do cardio 3-4 times a week. 11 more pounds to go !! you can do this !",16nuxqs,180,12,,intermittentfasting,hot,"2 and a half months of if from 234 to 211 in 2.5 months. it works! once i got disciplined with fasting it became so easy to stay consistent. i had rapid weight gain due to a medication but now feeling much more confident. to compliment if, i do cardio 3-4 times a week. 11 more pounds to go !! you can do this !","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplin', 'becam', 'easi', 'stay', 'consist', 'rapid', 'weight', 'gain', 'due', 'medic', 'feel', 'much', 'confid', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']","['2', 'half', 'month', '234', '211', '25', 'month', 'work', 'got', 'disciplined', 'became', 'easy', 'stay', 'consistent', 'rapid', 'weight', 'gain', 'due', 'medication', 'feeling', 'much', 'confident', 'compliment', 'cardio', '34', 'time', 'week', '11', 'pound', 'go', '']",2160


In [9]:
nltk.download("stopwords")
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\weege\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [10]:
df_kept_columns = pd.DataFrame(non_image_posts)

# Specify the columns to keep
columns_to_keep = ['title_&_text', "score", "total_comments", "trending", "subreddit"]

# Create an empty DataFrame with the desired columns
df_kept_columns = df_kept_columns[columns_to_keep]

# Display the resulting DataFrame
df_kept_columns.head()

Unnamed: 0,title_&_text,score,total_comments,trending,subreddit
0,"daily fasting check-in! * **type** of fast (water, juice, smoking, etc.)\n* **context** of fast (start, end, day x of y, etc.)\n* **length** of fast (8 hours, 3 days, etc.)\n* **why?** what you hope to accomplish with your fast\n* **notes** how is it going so far? any concerns? insights to share?\n\nbe sure to check back often as comments get posted throughout the day. sort comments by ""new"" to be sure the newer comments get some love as well.",1,2,2,intermittentfasting
1,"i decided who i wanted to be and i became her 💅🏽 so a little background: i’m 39, have birthed two children and at 5’2, used to weigh 220 lbs for about a decade. i’m sitting around 150 lbs right now. it’s been almost 3 years since i started this intermittent fasting lifestyle and it’s the first time i didn’t gain all of the weight back plus more. but i did fall off a bit last year and gained back about 25 lbs, 20 of which i’ve lost again. that regain did teach me something though. about how to handle stress. about not giving up when there are setbacks. about being grateful for every stage of the journey you’re at. i’ve currently been doing mostly a 20/4 schedule but eating whatever i want to satiety within my window… and the rest of the time i’m just chillin’. 😎",1176,36,42336,intermittentfasting
2,"some photos from a past vacation came up as a memory today. really emotional looking back at how far i’ve come. started if in 2020 & have kept up with it since then. 225lb to 125lb! i remember being miserable and insecure the entire time i was on this trip, it was really hard to enjoy myself. it was about more than just the weight- thankful to be in a better place physically and mentally these days!!",1505,77,115885,intermittentfasting
3,"anybody find if, lose weight, and then lose motivation because you know that you can always lose weight, so you procrastinate about losing weight? i know i am an idiot.",198,78,15444,intermittentfasting
4,"2 and a half months of if from 234 to 211 in 2.5 months. it works! once i got disciplined with fasting it became so easy to stay consistent. i had rapid weight gain due to a medication but now feeling much more confident. to compliment if, i do cardio 3-4 times a week. 11 more pounds to go !! you can do this !",180,12,2160,intermittentfasting


In [11]:
# Define a function to remove punctuation from a string
def remove_punctuation(text):
    if isinstance(text, str):  # Check if the input is a string
        return ''.join([char for char in text if char not in string.punctuation])
    else:
        return text  # Return the original value if it's not a string

# Apply the remove_punctuation function to all columns using applymap
df_cleaned = df_kept_columns.applymap(remove_punctuation)

# Display the cleaned DataFrame
df_cleaned.head()

Unnamed: 0,title_&_text,score,total_comments,trending,subreddit
0,daily fasting checkin type of fast water juice smoking etc\n context of fast start end day x of y etc\n length of fast 8 hours 3 days etc\n why what you hope to accomplish with your fast\n notes how is it going so far any concerns insights to share\n\nbe sure to check back often as comments get posted throughout the day sort comments by new to be sure the newer comments get some love as well,1,2,2,intermittentfasting
1,i decided who i wanted to be and i became her 💅🏽 so a little background i’m 39 have birthed two children and at 5’2 used to weigh 220 lbs for about a decade i’m sitting around 150 lbs right now it’s been almost 3 years since i started this intermittent fasting lifestyle and it’s the first time i didn’t gain all of the weight back plus more but i did fall off a bit last year and gained back about 25 lbs 20 of which i’ve lost again that regain did teach me something though about how to handle stress about not giving up when there are setbacks about being grateful for every stage of the journey you’re at i’ve currently been doing mostly a 204 schedule but eating whatever i want to satiety within my window… and the rest of the time i’m just chillin’ 😎,1176,36,42336,intermittentfasting
2,some photos from a past vacation came up as a memory today really emotional looking back at how far i’ve come started if in 2020 have kept up with it since then 225lb to 125lb i remember being miserable and insecure the entire time i was on this trip it was really hard to enjoy myself it was about more than just the weight thankful to be in a better place physically and mentally these days,1505,77,115885,intermittentfasting
3,anybody find if lose weight and then lose motivation because you know that you can always lose weight so you procrastinate about losing weight i know i am an idiot,198,78,15444,intermittentfasting
4,2 and a half months of if from 234 to 211 in 25 months it works once i got disciplined with fasting it became so easy to stay consistent i had rapid weight gain due to a medication but now feeling much more confident to compliment if i do cardio 34 times a week 11 more pounds to go you can do this,180,12,2160,intermittentfasting


In [12]:
# Load the spaCy English language model
nlp = spacy.load("en_core_web_sm")

# Create a function to process and modify text
def process_text(text):
    if isinstance(text, str):
        doc = nlp(text)
        modified_tokens = []
        for token in doc:
            if token.text.lower() == "i'm":
                modified_tokens.extend(["i", "am"])
            elif token.text.lower() == "emma's":
                modified_tokens.append("emma")
            else:
                modified_tokens.append(token.text)
        return " ".join(modified_tokens)
    else:
        return text

# Loop through all columns and apply the processing function
for column in df_cleaned.columns:
    if df_cleaned[column].dtype == 'object':
        df_cleaned[column] = df_cleaned[column].apply(process_text)

# Print the resulting DataFrame
df_cleaned

Unnamed: 0,title_&_text,score,total_comments,trending,subreddit
0,daily fasting checkin type of fast water juice smoking etc \n context of fast start end day x of y etc \n length of fast 8 hours 3 days etc \n why what you hope to accomplish with your fast \n notes how is it going so far any concerns insights to share \n\n be sure to check back often as comments get posted throughout the day sort comments by new to be sure the newer comments get some love as well,1,2,2,intermittentfasting
1,i decided who i wanted to be and i became her 💅 🏽 so a little background i ’m 39 have birthed two children and at 5’2 used to weigh 220 lbs for about a decade i ’m sitting around 150 lbs right now it ’s been almost 3 years since i started this intermittent fasting lifestyle and it ’s the first time i did n’t gain all of the weight back plus more but i did fall off a bit last year and gained back about 25 lbs 20 of which i ’ve lost again that regain did teach me something though about how to handle stress about not giving up when there are setbacks about being grateful for every stage of the journey you ’re at i ’ve currently been doing mostly a 204 schedule but eating whatever i want to satiety within my window … and the rest of the time i ’m just chillin ’ 😎,1176,36,42336,intermittentfasting
2,some photos from a past vacation came up as a memory today really emotional looking back at how far i ’ve come started if in 2020 have kept up with it since then 225 lb to 125 lb i remember being miserable and insecure the entire time i was on this trip it was really hard to enjoy myself it was about more than just the weight thankful to be in a better place physically and mentally these days,1505,77,115885,intermittentfasting
3,anybody find if lose weight and then lose motivation because you know that you can always lose weight so you procrastinate about losing weight i know i am an idiot,198,78,15444,intermittentfasting
4,2 and a half months of if from 234 to 211 in 25 months it works once i got disciplined with fasting it became so easy to stay consistent i had rapid weight gain due to a medication but now feeling much more confident to compliment if i do cardio 34 times a week 11 more pounds to go you can do this,180,12,2160,intermittentfasting
...,...,...,...,...,...
2484,anyone else have a fear of becoming obese i know that fear of gaining weight is a pretty much universal thing for anorexia but does anyone else have a fear of actual obesity like i ’m genuinely terrified that once i start eating more i just wo n’t stop i ’ll just keep eating and eating and never stop until i ’m 300 pounds because i lost all my self control when i stopped restricting plus hearing about how many people go from anorexia to binge eating disorder does n’t help either i know it ’s not like that for everyone but what if it is for me i ’m so terrified of it that i literally have nightmares about it and imaging it happening makes me feel sick with anxiety i just hope i ’m not the only one cus i feel crazy,89,24,2136,anorexianervosa
2485,i am so alone i ’m a 16 year old male who ’s been suffering from anorexia for two years now entering my 3rd year and have been fake recovered to some extent like 5 times now but every time has me going back down the same path i was on when restricting \n\n i ’m approaching the weight i was at when i was at my worst and everyday i have to deal with constantly being body shamed by my familyfriends who have no idea what i ’m going through no matter how many attempts i ’ve given to explain fully about what i ’m going through \n\n because i ’m a boy i do n’t get a lot of recognition for these struggles i ’m just “ the skinny one who does n’t eat a lot ” in peoples eyes \n\n on top of this i live in a place where there ’s virtually nowhere to go for eating disorders there ’s no programs or people to help me i feel like i ’m approaching the end of my road and at this point i am ready to stop fighting and give up i want to eat i want to recover but i feel like it ’s a forever cycle i ’m stuck on of restricting hitting rock bottom binging and then restricting again,87,15,1305,anorexianervosa
2486,chocolate ice cream ok so its not a lot and i did nt use to be as scared of liquid calories i count ice cream towards liquid calories as i am now but i m eating chocolate ice cream and i m so proud of myself even if i do feel sick because of motion sickness i m still going to eat this entire scoop of ice cream,87,16,1392,anorexianervosa
2487,do you experience oddly specific challenges right after recovering that tempt you back into your eating disorder do y’ all find that as soon as you get to a good place in recovery something comes along that wants to push you right back whether it ’s a triggering comment a break up a new relationship someone close to you going on a diet or some other triggering situation \n\n i know that challenges will come but it ’s just crazy how quickly and specific they seem to be to your own personal weaknesses like for me as soon as i was getting better my now boyfriend asked me out which was great but also what starting my eating disorder in the first place years ago so it was soooo hard to not relapse i kinda did a bit \n\n i ’d love to hear your stories with this if it ’s similar i just feel like i ’ve noticed this theme,88,17,1496,anorexianervosa


In [None]:
# Initialize the CountVectorizer for unigrams, bigrams, and trigrams
ngram_range = (1, 3)  # Change to (1, 1) for unigrams, (2, 2) for bigrams, or (3, 3) for trigrams
vectorizer = CountVectorizer(ngram_range=ngram_range)

# Fit and transform the text into n-grams
ngram_counts = vectorizer.fit_transform(df_cleaned['title_&_text'])
ngram_feature_names = vectorizer.get_feature_names_out()

# Create a DataFrame to store the n-gram counts
ngram_counts_df = pd.DataFrame(ngram_counts.toarray(), columns=ngram_feature_names)

# Add the 'Subreddit' column back to the DataFrame
ngram_counts_df['Subreddit'] = df_cleaned['subreddit']

# Group the DataFrame by 'Subreddit' and sum the counts
grouped_ngram_counts = ngram_counts_df.groupby('Subreddit').sum()

# Get a list of unique subreddits
unique_subreddits = df_cleaned['subreddit'].unique()

# Dictionary to store n-gram counts for each subreddit
subreddit_ngram_counts = {}

# Loop through each unique subreddit
for subreddit_name in unique_subreddits:
    # Get the specific n-gram count for the subreddit
    specific_ngram_count = grouped_ngram_counts.loc[subreddit_name]
    
    # To get the top N most common n-grams for a subreddit, you can use:
    top_n = 10  # Replace with the desired number
    
    # Store unigrams, bigrams, and trigrams in a dictionary for the subreddit
    ngram_data = {}
    
    # Iterate through the columns and identify n-grams
    for col_name in specific_ngram_count.index:
        if len(col_name.split()) == 1:  # Unigrams
            ngram_data['Unigrams'] = specific_ngram_count.nlargest(top_n)
        elif len(col_name.split()) == 2:  # Bigrams
            ngram_data['Bigrams'] = specific_ngram_count.nlargest(top_n)
        elif len(col_name.split()) == 3:  # Trigrams
            ngram_data['Trigrams'] = specific_ngram_count.nlargest(top_n)
    
    # Store the n-gram data in the dictionary
    subreddit_ngram_counts[subreddit_name] = ngram_data

# Now, subreddit_ngram_counts contains the n-gram data for creating a bar chart or further analysis.

In [None]:
# Loop through each subreddit and its n-grams
for subreddit_name, ngram_data in subreddit_ngram_counts.items():
    print(f"Subreddit: {subreddit_name}")
    
    # Check if unigram data is available
    if '1' in ngram_data:
        print("Unigrams:")
        print(ngram_data['1'])
    
    # Check if bigram data is available
    if '2' in ngram_data:
        print("Bigrams:")
        print(ngram_data['2'])
    
    # Check if trigram data is available
    if '3' in ngram_data:
        print("Trigrams:")
        print(ngram_data['3'])
    
    print()  # Add a blank line between subreddits for readability

In [None]:
# Replace 'desired_subreddit_name' with the subreddit you want to extract n-gram data for
desired_subreddit_name = 'subreddit_ngram_counts'

# Access the n-gram data for the desired subreddit
ngram_data_for_desired_subreddit = subreddit_ngram_counts.get(desired_subreddit_name)

# Check if the subreddit exists in the dictionary
if ngram_data_for_desired_subreddit is not None:
    print(f"N-gram data for '{desired_subreddit_name}':")
    print(ngram_data_for_desired_subreddit)
else:
    print(f"No data found for subreddit '{desired_subreddit_name}'.")

In [None]:
# Convert the dictionary to a DataFrame
df_unigrams = pd.DataFrame(subreddit_unigram_counts)

# Plot the data as a bar chart
ax = df_unigrams.plot(kind='bar', figsize=(10, 6))
plt.title('Top Unigrams in Subreddits')
plt.xlabel('Unigrams')
plt.ylabel('Frequency')
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Convert the dictionary to a DataFrame
df_bigram = pd.DataFrame(subreddit_bigram_counts)

# Plot the data as a bar chart
ax = df_bigram.plot(kind='bar', figsize=(10, 6))
plt.title('Top Bigrams in Subreddits')
plt.xlabel('Bigrams')
plt.ylabel('Frequency')
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Convert the dictionary to a DataFrame
df_trigram = pd.DataFrame(subreddit_trigram_counts)

# Plot the data as a bar chart
ax = df_trigram.plot(kind='bar', figsize=(10, 6))
plt.title('Top Trigrams in Subreddits')
plt.xlabel('Trigrams')
plt.ylabel('Frequency')
plt.xticks(ha='right')

# Show the plot
plt.tight_layout()
plt.show()