In [7]:
# imports
from decouple import config
import pandas as pd
import praw
import psycopg2
import schedule
from sqlalchemy import create_engine
import time



current_day = time.strftime("%m/%d/%Y")
print(f"Performing job on {current_day}")

# connecting to reddit API
reddit = praw.Reddit(
    client_id=config("CLIENT_ID"),
    client_secret=config("SECRET"),
    user_agent=config("USER"),
    username=config("USERNAME"),
    password=config("PASSWORD")
)

subreddit = reddit.subreddit("wallstreetbets")

hot_wsb = subreddit.hot(limit=1000)

# storing submission data in a dictionary
submissions = {
    "title": [],
    "subreddit": [],
    "author": [],
    "score": [],
    "id": [],
    "url": [],
    "num_comments": [],
    "created": [],
    "body": []
}

# iterate over each submission and store data in the submissions dictionary 
for submission in hot_wsb:
    submissions["title"].append(submission.title)
    submissions["subreddit"].append(submission.subreddit)
    submissions["author"].append(submission.author)
    submissions["score"].append(submission.score)
    submissions["id"].append(submission.id)
    submissions["url"].append(submission.url)
    submissions["num_comments"].append(submission.num_comments)
    submissions["created"].append(submission.created)
    submissions["body"].append(submission.selftext)

# transform the submissions dictionary into a pandas dataframe
df = pd.DataFrame(submissions)

# convert created to date 
df['created'] = pd.to_datetime(df['created'], unit='s')

# convert subreddit column to string
df['subreddit'] = df['subreddit'].astype(str)

# convert author column to string
df['author'] = df['author'].astype(str)

# connect to postgresql database
db_pass = config("PASSWORD")
engine = create_engine(
    f'postgresql://postgres:{db_pass}@localhost:5432/postgres')

# store pandas dataframe in sql database
# df.to_sql('submissions', engine, if_exists='append')

# create dictionary to store comments
comments = {
    "submission_id": [],
    "comment_id": [],
    "score": [],
    "author": [],
    "created": [],
    "comment": []
}

# iterating over each submission and collecting relevent comment data

submission = reddit.submission(id='n4oegm')
submission.comments.replace_more(limit=None)
for comment in submission.comments.list():
    comments["submission_id"].append(id)
    comments["comment_id"].append(comment.id)
    comments["score"].append(comment.score)
    comments["author"].append(comment.author)
    comments["created"].append(comment.created)
    comments["comment"].append(comment.body)

# converting comments dictionary to a pandas dataframe
comments_df = pd.DataFrame(comments)

# store comments_df in sql table
# comments_df.to_sql('comments', engine, if_exists='append', index=False)

Performing job on 05/06/2021


In [8]:
df.head()

Unnamed: 0,title,subreddit,author,score,id,url,num_comments,created,body
0,"Daily Discussion Thread for May 06, 2021",wallstreetbets,OPINION_IS_UNPOPULAR,331,n63s48,https://www.reddit.com/r/wallstreetbets/commen...,18566,2021-05-06 18:00:11,
1,We’ve been compromised!,wallstreetbets,joeygallinal,10949,n69ajb,https://v.redd.it/5zfzinkrlix61,798,2021-05-06 23:01:47,
2,The Insurrection of the Apes,wallstreetbets,No_Chocolate_6259,35058,n63q7n,https://v.redd.it/xl5x239t2hx61,687,2021-05-06 17:56:30,
3,"100% concentration, all in GME! Final purchase...",wallstreetbets,SlothGoBrrr,1792,n6am1n,https://i.redd.it/egjg4b68wix61.jpg,167,2021-05-07 00:00:00,
4,Mods: SIKE!,wallstreetbets,Im_Blind_And_Deaf,2020,n67a3w,https://v.redd.it/4xmplrdy4ix61,103,2021-05-06 21:28:30,


In [9]:
comments_df.head()

Unnamed: 0,submission_id,comment_id,score,author,created,comment
0,<built-in function id>,gwwopgp,559,monsterbangster,1620166000.0,May THE stock be with you!
1,<built-in function id>,gwwo9hk,142,Wonderful_Court1076,1620166000.0,And also with you.
2,<built-in function id>,gwwr9o5,276,Low-Hovercraft-9849,1620167000.0,Game force 400 gang getting a better average e...
3,<built-in function id>,gwwn1u7,262,BlazinWarrior,1620165000.0,Now this is the kinda content I like to see in...
4,<built-in function id>,gwwppxl,32,fredkarlsson,1620166000.0,Never give me the odds!
