In [13]:
import praw
from datetime import datetime as dt
import pandas as pd

In [14]:
reddit = praw.Reddit(
    client_id="**********************",
    client_secret="******************************",
    user_agent="**********"
)

In [15]:
subreddit = reddit.subreddit("fantasyfootball")

In [16]:
posts = subreddit.top(time_filter="all", limit=None)

# only get posts from during the 2023 NFL regular season
start_date = dt(2023, 9, 6).timestamp()
end_date = dt(2024, 1, 8).timestamp()

## Overperformers
Top 5 players at each position who overperformed their projections.
- Each list contains multiple variations of how the player may be mentioned or referred to by in Reddit posts

In [17]:
t5_qb = ['Minshew', 'Baker', 'Mayfield', 'Jordan Love', 'Stroud', 'Purdy']
t5_rb = ['Kyren', 'Justice Hill', 'Jerome Ford', 'Ford','Keaton Mitchell', 'Keaton', 'Mitchell', 'Royce Freeman']
t5_wr = ['Puka', 'Nacua', 'CeeDee', 'Ceedee', 'Lamb', 'Rashee', 'Nico', 'Nico Collins', 'Jayden Reed', 'Reed']
t5_te = ['LaPorta', 'Laporta', 'McBride', 'Mcbride', 'Engram', 'Ferguson', 'Jonnu']

## Underperformers
Bottom 5 players at each position who underperformed their projections.
- Each list contains multiple variations of how the player may be mentioned or referred to by in Reddit posts

In [18]:
b5_qb = ['Mahomes', 'Pat', 'Bryce', 'Bryce Young', 'Geno', 'Trevor', 'Lawrence', 'TLaw', 'Tlaw', 'T Law', 'Russ', 'Russell Wilson']
b5_rb = ['Dameon', 'Pierce', 'Miles Sanders', 'Derrick Henry', 'King Henry', 'Henry', 'Dalvin Cook', 'Dalvin', 'Mattison']
b5_wr = ['Skyy Moore', 'Sky Moore', 'Peoples-Jones', 'Peoples Jones', 'DPJ', 'Renfrow', 'Lazard', 'Van Jefferson']
b5_te = ['Kelce', 'Travis Kelce', 'Fant', 'Brenton Strange', 'Higbee', 'Gesicki']

In [19]:
# get reddit posts mentioning each player
def query(query_list):
    posts = []
    for q in query_list:
        # Perform search query
        search_results = subreddit.search(q,
                                          sort='top',
                                          time_filter='year',  # Filter by posts created after a specific time
                                          syntax='cloudsearch',  # Use cloudsearch syntax
                                          limit=1000)  # Return all search results

        for result in search_results:
            post_id =  result.id
            body = f'{result.title} {result.selftext}'
            created = result.created_utc
            author = result.author

            if created >= start_date and created <= end_date:
                post=[post_id,body,created,author]
                
                # prevent duplicate posts in dataset
                if post not in posts:
                    posts.append(post)
                    
    df = pd.DataFrame(posts, columns=['id','body','created','author'])
    
    
    return df

In [20]:
# overperformer posts
t5_qb_posts = query(t5_qb)
t5_rb_posts = query(t5_rb)
t5_wr_posts = query(t5_wr)
t5_te_posts = query(t5_te)

# underperformer posts
b5_qb_posts = query(b5_qb)
b5_rb_posts = query(b5_rb)
b5_wr_posts = query(b5_wr)
b5_te_posts = query(b5_te)

## Writing each dataframe of positional groups to individual Excel sheets
The Overperformers and Underperformers were written in separate code blocks to bypass the "Too Many Requests" error

In [24]:
with pd.ExcelWriter('player_reddit_raw.xlsx') as writer:         
        # writes the dataframes of each position group to their own sheet in the excel file
        t5_qb_posts.to_excel(writer, sheet_name=f'QB Overperformers', index=False)
        t5_rb_posts.to_excel(writer, sheet_name=f'RB Overperformers', index=False)
        t5_wr_posts.to_excel(writer, sheet_name=f'WR Overperformers', index=False)
        t5_te_posts.to_excel(writer, sheet_name=f'TE Overperformers', index=False)

In [27]:
with pd.ExcelWriter('player_reddit_raw.xlsx', mode='a') as writer:         
        # writes the dataframes of each position group to their own sheet in the excel file
        b5_qb_posts.to_excel(writer, sheet_name=f'QB Underperformers', index=False)
        b5_rb_posts.to_excel(writer, sheet_name=f'RB Underperformers', index=False)
        b5_wr_posts.to_excel(writer, sheet_name=f'WR Underperformers', index=False)
        b5_te_posts.to_excel(writer, sheet_name=f'TE Underperformers', index=False)