In [1]:
import praw
import pandas as pd

In [17]:
#Reddit API credentials
#put the client id from Reddit here
client_id = ""
#your API key goes here
client_secret = ""
#username goes here after the u/
user_agent = "NashvilleSC_Scraper by /u/"

#Use the PRAW package to extract 
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)

#Define the subreddit and search query
#change the subreddit here 
subreddit_name = "NashvilleSC"
subreddit = reddit.subreddit(subreddit_name)

### Functions to scrape posts ###
def scrape_reddit_posts(subreddit, limit=100):
    posts_data = []

    for post in subreddit.new(limit=limit):  # Use .top(), .hot(), or .new()
        posts_data.append({
            "Post Title": post.title,
            "Post ID": post.id,
            "Author": post.author.name if post.author else "Deleted",
            "Score": post.score,
            "Comments": post.num_comments,
            "Created": pd.to_datetime(post.created_utc, unit="s"),
            "Post URL": post.url,
            "Text": post.selftext,
        })

    return pd.DataFrame(posts_data)

### Functions to scrape comments ###
def scrape_comments(post_id):
    submission = reddit.submission(id=post_id)
    comments_data = []

    submission.comments.replace_more(limit=None)  # Expand all comments
    for comment in submission.comments.list():
        comments_data.append({
            "Post ID": post_id,
            "Comment Author": comment.author.name if comment.author else "Deleted",
            "Comment Body": comment.body,
            "Comment Score": comment.score,
            "Created": pd.to_datetime(comment.created_utc, unit="s"),
        })

    return comments_data

# Main function to scrape posts and comments
def scrape_posts_and_comments(subreddit, post_limit=100):
    # Scrape posts
    posts_df = scrape_reddit_posts(subreddit, limit=post_limit)

    # Scrape comments for each post
    all_comments_data = []
    for _, row in posts_df.iterrows():
        post_id = row["Post ID"]
        try:
            comments_data = scrape_comments(post_id)
            all_comments_data.extend(comments_data)
        except Exception as e:
            break

    #Combine into df
    comments_df = pd.DataFrame(all_comments_data)

    return posts_df, comments_df

# call the scrape posts and comments function
nashville_posts, nashville_comments = scrape_posts_and_comments(subreddit, post_limit=1000)

Scraping posts...
Scraping comments for each post...
Scraping comments for Post ID: 1i8fygd
Scraping comments for Post ID: 1i87o7p
Scraping comments for Post ID: 1i7i4dk
Scraping comments for Post ID: 1i70tfd
Scraping comments for Post ID: 1i5u4f4
Scraping comments for Post ID: 1i3wiud
Scraping comments for Post ID: 1i369f4
Scraping comments for Post ID: 1i33n67
Scraping comments for Post ID: 1i2rdfn
Scraping comments for Post ID: 1i2r1z4
Scraping comments for Post ID: 1i1is5s
Scraping comments for Post ID: 1i1iivl
Scraping comments for Post ID: 1i18jco
Scraping comments for Post ID: 1i0tw28
Scraping comments for Post ID: 1i0orsp
Scraping comments for Post ID: 1i0njum
Scraping comments for Post ID: 1i0g0ke
Scraping comments for Post ID: 1i043pj
Scraping comments for Post ID: 1i023os
Scraping comments for Post ID: 1hzon5z
Scraping comments for Post ID: 1hzo27n
Scraping comments for Post ID: 1hznpd6
Scraping comments for Post ID: 1hza3mv
Scraping comments for Post ID: 1hz43qg
Scraping co

In [34]:
#reddit comments
nashville_comments.head()

Unnamed: 0,Post ID,Comment Author,Comment Body,Comment Score,Created
0,1i87o7p,Whiskey615,Didn’t really play much last season in Spain’s...,11,2025-01-23 16:59:16
1,1i87o7p,Swaggron,"Ok, but what will we chant when he blocks a shot?",3,2025-01-23 18:04:38
2,1i87o7p,Jindro41,I come in peace as someone who's known Brian f...,3,2025-01-24 02:36:33
3,1i87o7p,DrEvilEngineer,I know it's exiciting to sign a player with ex...,1,2025-01-23 21:51:01
4,1i87o7p,DrEvilEngineer,[Greenock Morton](https://en.wikipedia.org/wik...,2,2025-01-23 21:55:31


In [61]:
#Leverage OpenAI for sentiment analysis 
#better than something like VADER that can't detect sarcasm
from openai import OpenAI
import pandas as pd

#API Key goes here
client = OpenAI(api_key="")


In [68]:
### Here is the function to prompt OpenAI ###
#Told the 4.0 model that its a helpful assistant, give me sentiment
def analyze_sentiment(comment_text):
    try:
        completion = client.chat.completions.create(
            model="gpt-4",  
            messages=[
                {"role": "system", "content": "You are a helpful assistant that analyzes sentiment as Positive, Neutral, or Negative. Just respond with either Positive, Neutral, or Negative. No other words in your response."},
                {
                    "role": "user",
                    "content": f"Analyze the sentiment of this comment: '{comment_text}'."
                }
            ]
        )
        # Extract the sentiment from the response
        sentiment = completion.choices[0].message.content
        return sentiment
    except Exception as e:
        return f"Error: {e}"

In [75]:
#Adds sentiment to data
#Applies function to all 16,000 comments

nashville_comments["Sentiment"] = nashville_comments["Comment Body"].apply(analyze_sentiment)

In [76]:
#Value counts after sentiment is determined 
nashville_comments['Sentiment'].value_counts()

Sentiment
Neutral     8959
Negative    4677
Positive    3350
Name: count, dtype: int64

In [79]:
#Output to CSVs 
nashville_comments.to_csv(r"C:\Users\felip\OneDrive\Documents\nashvillesc_comments.csv")
nashville_posts.to_csv(r"C:\Users\felip\OneDrive\Documents\nashvillesc_posts.csv")