In [2]:
import praw
import pandas as pd
import os
from dotenv import load_dotenv

In [3]:
# Get the credentials from the environment
load_dotenv()

reddit_client_id = os.environ['REDDIT_CLIENT_ID']
reddit_client_secret = os.environ['REDDIT_CLIENT_SECRET']
reddit_user_agent = os.environ['REDDIT_USER_AGENT']

In [4]:
# Log in the Reddit App with the credentials from the environment
reddit = praw.Reddit(client_id=reddit_client_id,
                     client_secret=reddit_client_secret,
                     user_agent=reddit_user_agent)

In [66]:
# Get the top 1000 posts from the subreddit r/AskReddit from the last year, filer out nsfw posts and deleted or removed posts
def get_top_1000_posts():
    posts = []
    for submission in reddit.subreddit('AskReddit').top('year', limit=1000):
        if not submission.over_18 and not submission.title == '[deleted]' and not submission.title == '[removed]':
            posts.append({
                'post_id': submission.id,
                'post_title': submission.title,
                'selftext': submission.selftext,
                'post_url': submission.url,
                'score': submission.score,
                'num_comments': submission.num_comments,
            })                
    return posts

In [5]:
# Profanity filter
def profanity_filter():
    profanity = pd.read_csv('profanity_en.csv', header=None, skiprows=1)
    profanity = profanity[0].apply(lambda x: x.split(',')[0])
    return profanity

In [9]:
# Filter the posts that have profanity in the title with the profanity filter
# profanity_en.csv and return the filtered posts
# The first row is the header, so we skip it
# The profanities are in the first column

def filter_profanity(posts):
    profanity = profanity_filter()
    filtered_posts = []
    for post in posts:
        if not any(prof in post['post_title'].lower() for prof in profanity):
            filtered_posts.append(post)
    return filtered_posts


In [131]:
# Get the top 1000 posts from the subreddit r/AskReddit from the last year, filter out nsfw posts and deleted or removed posts
posts = get_top_1000_posts()

# Filter the posts that have profanity in the title
filtered_posts = filter_profanity(posts)

# Create a DataFrame from the filtered posts
df = pd.DataFrame(filtered_posts)

# Save the DataFrame to a csv file
df.to_csv('askreddit.csv', header=True, index=False)

Call this function with 'time_filter' as a keyword argument.
  for submission in reddit.subreddit('AskReddit').top('year', limit=1000):


In [139]:
# Get the top 50 comments from the posts from the askreddit.csv file, filter out deleted or removed comments, and filter out comments that have profanity or have links
# The first row is the header, so we skip it
# Only take the first 50 top level comments from each post

def get_top_50_comments():
    comments = []
    for post_id in df['post_id']:
        submission = reddit.submission(id=post_id)
        submission.comments.replace_more(limit=50)
        top_level_comments = submission.comments
        top_50_comments = top_level_comments[:50]
        for comment in top_50_comments:
            if not comment.body == '[deleted]' and not comment.body == '[removed]' and not any(link in comment.body for link in ['http', 'www']):
                comments.append({
                    'post_id': post_id,
                    'comment_id': comment.id,
                    'comment_body': comment.body,
                    'comment_score': comment.score,
                })    
    return comments

In [7]:
# Filter the comments that have profanity in the body

def filter_comments_profanity(comments):
    profanity = profanity_filter()
    filtered_comments = []
    for comment in comments:
        if not any(prof in comment['comment_body'].lower() for prof in profanity):
            filtered_comments.append(comment)
    return filtered_comments


In [None]:
# Get the top 50 comments from the posts from the askreddit.csv file
comments = get_top_50_comments()

# Filter the comments that have profanity in the body
filtered_comments = filter_comments_profanity(comments)

# Create a DataFrame from the comments
df = pd.DataFrame(filtered_comments)

# Save the DataFrame to a csv file
df.to_csv('askreddit_comments.csv', header=True, index=False)

In [7]:
# Filter out the comments that have less than 200 upvotes from the askreddit_comments.csv file
df = pd.read_csv('askreddit_comments.csv')
df = df[df['comment_score'] >= 200]

# Save the DataFrame to a csv file
df.to_csv('askreddit_comments.csv', header=True, index=False)

In [10]:
# Get the top 1000 posts from the subreddit r/AskReddit of all time, filer out nsfw posts and deleted or removed posts
# This is used as validation data for post model
def get_top_1000_posts_all_time():
    posts = []
    for submission in reddit.subreddit('AskReddit').top('all', limit=1000):
        if not submission.over_18 and not submission.title == '[deleted]' and not submission.title == '[removed]' and not any(link in submission.title for link in ['http', 'www']) and not any(link in submission.selftext for link in ['http', 'www']):
            posts.append({
                'post_id': submission.id,
                'post_title': submission.title,
                'selftext': submission.selftext,
                'post_url': submission.url,
                'score': submission.score,
                'num_comments': submission.num_comments,
            })
    return posts

# Get the top 1000 posts from the subreddit r/AskReddit of all time, filter out nsfw posts and deleted or removed posts
posts_all_time = get_top_1000_posts_all_time()

# Filter the posts that have profanity in the title
filtered_posts_all_time = filter_profanity(posts_all_time)

# Create a DataFrame from the filtered posts
df = pd.DataFrame(filtered_posts_all_time)

# Save the DataFrame to a csv file
df.to_csv('askreddit_post_validation.csv', header=True, index=False)

Call this function with 'time_filter' as a keyword argument.
  for submission in reddit.subreddit('AskReddit').top('all', limit=1000):


In [36]:
# Get the top 10 comments from the posts from the askreddit_post_validation.csv file, filter out deleted or removed comments, and filter out comments that have profanity or have links
# The first row is the header, so we skip it
# Only take the first 5 top level comments from each post

# Get df from the askreddit_post_validation.csv file
df = pd.read_csv('askreddit_post_validation.csv')

# Get the top 10 comments from the first 100 posts from the askreddit_post_validation.csv file
def get_top_10_comments():
    comments = []
    iteration = 0
    for post_id in df['post_id']:
        iteration += 1
        submission = reddit.submission(id=post_id)
        submission.comments.replace_more(limit=1)
        top_level_comments = submission.comments
        top_10_comments = top_level_comments[:20]
        print(str(iteration) + ". " + submission.id + " :")
        for comment in top_10_comments:
            if not comment.body == '[deleted]' and not comment.body == '[removed]' and not any(link in comment.body for link in ['http', 'www']) and not any(edit in comment.body for edit in ['edit', 'EDIT']):
                comments.append({
                    'post_id': post_id,
                    'comment_id': comment.id,
                    'comment_body': comment.body,
                    'comment_score': comment.score,
                })
                print(" " + comment.body)
        if iteration == 100:
            break
    return comments


In [37]:
# Get the top 50 comments from the posts from the askreddit.csv file
comments = get_top_10_comments()

# Filter the comments that have profanity in the body
filtered_comments = filter_comments_profanity(comments)

# Create a DataFrame from the comments
df = pd.DataFrame(filtered_comments)

# Save the DataFrame to a csv file
df.to_csv('askreddit_comments_validation.csv', header=True, index=False)

1. f08dxb :
 They'd be fine.

The problem with poverty is not usually day-to-day costs. People can adjust to that.  It's unexpected expenses that are crippling.

~~*Edit: Why are people still responding two weeks later! Nobody is reading this thread any more! At least not the responses at the bottom*~~ (Edit2: rant was too whiney. If you have something to say, who am I to say otherwise?)
 I doubt a month is enough to really feel the effect
 It would be better if they had to live *with* their lowest salaried employee for a month or two, imo.   
In their house, dinner with the family, travel to and from work together, same lunch, kids with homework, the works.

Building that relation would imo do much more for this problem than just "experiencing below your normal standard of living before returning to it"..   
You're more likely to feel empathy for a family you know, and much less likely to forget them.

Anything else would just be another show about *wealthy people having a fun adventu