### Imports and setup for PRAW

In [None]:
import praw
import pandas as pd
import time
import re
from nltk.corpus import stopwords

client_id = '0X3RFkechF94pb0jhrBaBA'
client_secret = '_A0RRGRXo4w_rWKm6mGYeoqqvW2NnA'
user_agent='MyRedditBot:v1.0 (by u/Healthy-Pollution929)'

### Get the data

In [None]:
# Set up the PRAW client
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)

def fetch_comments(subreddit_name, search_query, time_filter='year', post_limit=200, comment_limit=10, sleep_time=1):
    """
    Fetch the top 10 most upvoted comments from posts on a specific topic in a given subreddit.
    
    Parameters:
        subreddit_name (str): Name of the subreddit to search.
        search_query (str): The search term to filter posts.
        time_filter (str): Time range for the search ('day', 'week', 'month', 'year', 'all').
        post_limit (int): Maximum number of posts to retrieve.
        comment_limit (int): Maximum number of comments to retrieve per post.
        sleep_time (int): Time in seconds to wait between each request batch.

    Returns:
        pd.DataFrame: A DataFrame containing collected comment data.
    """
    comments_data = []
    subreddit = reddit.subreddit(subreddit_name)
    print(f"Collecting comments from posts in r/{subreddit_name} related to '{search_query}'...")

    try:
        # Search for relevant posts
        for post in subreddit.search(search_query, time_filter=time_filter, limit=post_limit):
            # Set comment sort order to "top" to get the highest upvoted comments
            post.comment_sort = 'top'
            
            # Get the post permalink to construct URLs
            post_url = f"https://www.reddit.com{post.permalink}"

            # Get the top comments for each post
            post.comments.replace_more(limit=0)
            for comment in post.comments[:comment_limit]:  # Limit to top 10 comments per post
                if comment.body.strip():  # Only include non-empty comments
                    # Construct the direct URL to the comment
                    comment_url = f"{post_url}{comment.id}"

                    # Store the comment data in a dictionary
                    comment_info = {
                        "created_date": pd.to_datetime(comment.created_utc, unit='s'),
                        "subreddit_id": subreddit_name,
                        "search_query": search_query,
                        "post_id": post.id,
                        "comment_id": comment.id,
                        "post_title": post.title,
                        "comment_text": comment.body,
                        "upvotes": comment.ups,
                        "post_url": post_url,         
                        "comment_url": comment_url  
                    }

                    # Append the comment data to the list
                    comments_data.append(comment_info)

            # Sleep to prevent hitting Reddit's rate limit
            time.sleep(sleep_time)

    # Handle API exceptions
    except praw.exceptions.RedditAPIException as api_error:
        print(f"Rate limit or other API error: {api_error}")
        time.sleep(60) 

    # Handle other exceptions
    except Exception as e:
        print(f"Unexpected error occurred: {e}")
        time.sleep(10) 

    # Convert collected data to DataFrame
    df_comments = pd.DataFrame(comments_data)
    
    # Reorder columns
    column_order = ["created_date", "subreddit_id", "search_query", "post_id", "comment_id", 
                    "post_title", "comment_text", "upvotes", "post_url", "comment_url"]
    df_comments = df_comments[column_order]

    return df_comments

# Define parameters
subreddit_name = 'Israel'  # Specify one subreddit
search_query = 'Palestine'  # Single topic

# Fetch comments for one subreddit and topic
df_comments = fetch_comments(subreddit_name=subreddit_name, search_query=search_query)

# Optional: Save the data to a CSV file
df_comments.to_csv(f'data/{subreddit_name}_{search_query}_comments.csv', index=False)

print(f"Collected {len(df_comments)} comments on '{search_query}' from r/{subreddit_name}.")