# Netflix Show Cancellations: Reddit API Analysis TEST


## Step 1: Install Required Libraries + Set Up the Reddit API Connection
Before using the Reddit API, we need to install the `praw` library. It simplifies authentication and API interactions.

In [2]:
# Install the required library
#!pip install praw

In [None]:
import praw
print("praw library imported successfully!")

In [None]:
#!pip install python-dotenv
#This is needed to create an .env for your API credentials. We dont want random people having access to our keys!

### Create Reddit app
1. Go to Reddit Apps (https://www.reddit.com/prefs/apps) to create a new Reddit app
2. Select app type 'script'
3. Enter name (e.g., DW64) and description (e.g., Data Wrangling 64)
4. Enter redirect uri http://localhost:8080 and click 'create app'
5. Create a file '.env' in your git project folder in which you define:\
CLIENT_ID=[whatever's under 'personal use script' on the Reddit app page]\
CLIENT_SECRET=[whatever's next to 'secret' on the Reddit page]\
USER_AGENT=your_reddit_username

In [None]:
import praw
from dotenv import load_dotenv
import os

# Load environment variables from the .env file
load_dotenv()

# Retrieve credentials from environment variables
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
USER_AGENT = os.getenv('USER_AGENT')

# Authenticate with Reddit API
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT
)

# Test connection
print("Reddit API is read-only:", reddit.read_only)

## Step 2: Test Access to the Subreddits
verify that we can access the two subreddits (`r/NetflixBestOf` and `r/television`) using the Reddit API.

In [None]:
# Test access to the subreddits
subreddits_to_test = ['NetflixBestOf', 'television']

for subreddit_name in subreddits_to_test:
    try:
        subreddit = reddit.subreddit(subreddit_name)  # Access subreddit
        print(f"Successfully accessed subreddit: {subreddit.display_name}")
    except Exception as e:
        print(f"Error accessing subreddit {subreddit_name}: {e}")

### Step 2.1: Search for Posts with "Mindhunter" in the Title (Scenario 2)
We will search the two subreddits for posts that include the title of the show ("Mindhunter") in their post titles.


In [11]:
# install pandas if you haven't already
#!pip install pandas

In [None]:
import pandas as pd

# Search for posts with "Mindhunter" in the title
show_title = "Mindhunter"  # The show we're testing
subreddits_to_test = ['NetflixBestOf', 'television']
posts_with_title = []  # List to store results

for subreddit_name in subreddits_to_test:
    print(f"Searching in subreddit: {subreddit_name}")
    try:
        subreddit = reddit.subreddit(subreddit_name)
        for post in subreddit.search(show_title, limit=10):  # Limit to 10 posts
            posts_with_title.append({
                'Subreddit': subreddit_name,
                'Title': post.title,
                'Num_Comments': post.num_comments,
                'Upvotes': post.score,
                'Post_ID': post.id,
                'Created_UTC': post.created_utc
            })
    except Exception as e:
        print(f"Error searching subreddit {subreddit_name}: {e}")

# Convert results to DataFrame for easier analysis
if posts_with_title:
    posts_df = pd.DataFrame(posts_with_title)
    print(f"Found {len(posts_df)} posts with '{show_title}' in the title:")
    display(posts_df)
else:
    print(f"No posts with '{show_title}' in the title found.")


## Step 3.4.3: Search for Comments Mentioning "Mindhunter" (For scenario 1- assume no posts with Minhunter in title were found)
We will search all posts in the two subreddits for comments that mention "Mindhunter."


In [None]:
# Search all comments in a subreddit for mentions of "Mindhunter"
show_title = "Mindhunter"  # The show we're testing
subreddit_name = "NetflixBestOf"  # Choose one subreddit to test
comments_with_show = []  # List to store matching comments

try:
    subreddit = reddit.subreddit(subreddit_name)
    print(f"Searching all comments in subreddit: {subreddit_name}")

    # Stream all comments from the subreddit (can be computationally expensive for large subreddits)
    for comment in subreddit.comments(limit=5000):  # Limit to the most recent 50 comments for testing
        if show_title.lower() in comment.body.lower():  # Check if show is mentioned
            comments_with_show.append({
                'Subreddit': subreddit_name,
                'Comment': comment.body,
                'Upvotes': comment.score,
                'Comment_ID': comment.id,
                'Created_UTC': comment.created_utc
            })
except Exception as e:
    print(f"Error accessing comments in subreddit {subreddit_name}: {e}")

# Convert results to a DataFrame for better readability
if comments_with_show:
    comments_df = pd.DataFrame(comments_with_show)
    print(f"Found {len(comments_df)} comments mentioning '{show_title}':")
    display(comments_df)
else:
    print(f"No comments mentioning '{show_title}' found in subreddit {subreddit_name}.")
