In [1]:
import praw
import pandas as pd
from datetime import datetime

In [2]:
#import keys.txt into praw.Reddit
with open('keys.txt') as f:
    keys = f.readlines()
    keys = [key.strip() for key in keys]

reddit = praw.Reddit(client_id=keys[0], client_secret=keys[1], user_agent=keys[2])


In [6]:
stock_subreddits=['wallstreetbets', 'stocks', 'stockmarket', 'investing', 'valueinvesting', 'investing_discussion', \
                'pennystocks', 'pennystockswatch', 'robinhood', 'vanturetrading', 'squeezeplays', 'stocknewsimpact', \
                'stocksandtrading', 'wallstreetbetselite', 'short_selling', 'burryedge', 'shortsqueeze', 'stockinvest', \
                'stockbreakouts', 'millennialbets', 'stocktitan', 'wallstreet', 'superstonk', 'stonks']

In [13]:
biotech_subreddits=['biotech', 'biotechplays', 'biotechnology', 'pharmaindustry', 'pharmacy', 'healthcare', 'medicine']

In [36]:
queries={'APLD':{'queries':['APLD', 'applied digital'],
                'subreddits':stock_subreddits+['technology', 'cryptocurrency', 'cryptostocks']},
        'CNEY':{'queries':['CNEY', 'CN energy'],
                'subreddits':stock_subreddits+['energy', 'renewableenergy']}, 
        'KTTA':{'queries':['KTTA', 'pasithea therapeutics'],
                'subreddits':stock_subreddits+biotech_subreddits+\
                ['multiplesclerosis', 'neurofibromatosis', 'als', 'schizophrenia', 'ketaminetherapy']},
        'ONCO':{'queries':['ONCO', 'onconetix'],
                'subreddits':stock_subreddits+biotech_subreddits+\
                ['oncology', 'menshealth', 'askmen', 'iama', 'prostatecancer']},
        'TNXP':{'queries':['TNXP', 'Tonix'],
                'subreddits':stock_subreddits+biotech_subreddits+\
                ['coronavirus', 'covid19', 'covidiots', 'covid19positive', 'covidvaccinated', \
                 'vaccine', 'zerocovidcommunity', 'migraine', 'fibromyalgia', 'tnxp']}
}

In [38]:
start_date = int(datetime.strptime('2022-09-26', "%Y-%m-%d").timestamp())
end_date = int(datetime.strptime('2024-09-25', "%Y-%m-%d").timestamp())


In [37]:
def search_reddit(queries, start_date, end_date):
    results = []  

    for key in queries.keys():
        for query in queries[key]['queries']:
            for subreddit in queries[key]['subreddits']:
                print(f"Searching for '{query}' in '{subreddit}' subreddit...")
                try:
                    submissions = reddit.subreddit(subreddit).search(
                        query, time_filter='all', sort='relevance', limit=None
                    )
                except Exception as e:
                    print(f"An error occurred: {e}")
                    continue

                for submission in submissions:
                    # check if the submission is within the date range
                    if start_date <= submission.created_utc <= end_date:
                        # extract submission details
                        post_data = {
                            'stock': key,
                            'subreddit': subreddit,
                            'query': query,
                            'title': submission.title,
                            'author': submission.author.name if submission.author else None,
                            'url': submission.url,
                            'post_date': datetime.fromtimestamp(submission.created_utc),
                            'selftext': submission.selftext,
                            'num_comments': submission.num_comments,
                        }
                        submission.comments.replace_more(limit=0)
                        comments=[]
                        for comment in submission.comments.list():
                            comment_data={
                                'comment': comment.body,
                                'comment_author': comment.author.name if comment.author else None,
                                'comment_date': datetime.fromtimestamp(comment.created_utc)
                            }
                            comments.append(comment_data)
                        post_data['comments'] = comments

                        # Append the post data to the results list
                        results.append(post_data)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(results)
    return df

In [39]:
df=search_reddit(queries, start_date, end_date)

Searching for 'APLD' in 'wallstreetbets' subreddit...
Searching for 'APLD' in 'stocks' subreddit...
Searching for 'APLD' in 'stockmarket' subreddit...
Searching for 'APLD' in 'investing' subreddit...
Searching for 'APLD' in 'valueinvesting' subreddit...
Searching for 'APLD' in 'pennystocks' subreddit...
Searching for 'APLD' in 'robinhood' subreddit...
Searching for 'APLD' in 'vanturetrading' subreddit...
Searching for 'APLD' in 'renewableenergy' subreddit...
Searching for 'APLD' in 'biotech' subreddit...
Searching for 'APLD' in 'biotech_stocks' subreddit...
Searching for 'APLD' in 'healthcare' subreddit...
Searching for 'APLD' in 'pharma' subreddit...
Searching for 'APLD' in 'technology' subreddit...
Searching for 'APLD' in 'cryptocurrency' subreddit...
Searching for 'APLD' in 'cryptostocks' subreddit...
Searching for 'applied digital' in 'wallstreetbets' subreddit...
Searching for 'applied digital' in 'stocks' subreddit...
Searching for 'applied digital' in 'stockmarket' subreddit...


In [40]:
df.head()

Unnamed: 0,stock,subreddit,query,title,author,url,post_date,selftext,num_comments,comments
0,APLD,wallstreetbets,APLD,Found a newspaper from 2011. Imagine all the g...,nywarpath,https://i.redd.it/s7q97fka9vic1.jpeg,2024-02-16 03:26:31,,688,[{'comment': ' **User Report**| | | | :--|:--|...
1,APLD,wallstreetbets,APLD,Micro Trading Options on QQQ!,Vivo__,https://i.redd.it/vd1e4xdqugnd1.jpeg,2024-09-07 22:48:32,The good side to options is its worst side! Yo...,153,[{'comment': ' **User Report**| | | | :--|:--|...
2,APLD,wallstreetbets,APLD,Most Anticipated Earnings Releases for the wee...,ItsNotYourFault,https://i.redd.it/d2nn39ignwbc1.png,2024-01-12 00:37:59,,244,[{'comment': 'Not open on MLK Day? He’d want p...
3,APLD,wallstreetbets,APLD,Dow drops more than 400 points as Wall Street ...,mediterranean2,https://www.reddit.com/r/wallstreetbets/commen...,2024-04-02 19:07:01,The Dow Jones Industrial Average\n fell for a ...,54,[{'comment': ' **User Report**| | | | :--|:--|...
4,APLD,stocks,APLD,(9/20) Friday's Pre-Market News & Stock Movers,bigbear0083,https://www.reddit.com/r/stocks/comments/1flat...,2024-09-20 12:30:23,#Good Friday morning traders and investors of ...,0,[]


In [41]:
df.shape

(460, 10)

In [42]:
df.comments

0      [{'comment': '
**User Report**| | | |
:--|:--|...
1      [{'comment': '
**User Report**| | | |
:--|:--|...
2      [{'comment': 'Not open on MLK Day? He’d want p...
3      [{'comment': '
**User Report**| | | |
:--|:--|...
4                                                     []
                             ...                        
455    [{'comment': 'Zoom out - worth a look', 'comme...
456    [{'comment': 'If they do another reverse split...
457    [{'comment': 'Tonix cost me 10k with that last...
458    [{'comment': 'Like that's gonna work lol, have...
459    [{'comment': 'Scam', 'comment_author': 'BledOr...
Name: comments, Length: 460, dtype: object

In [43]:
df.to_csv('reddit.csv', index=False)