In [None]:
import os
import time
import csv
from datetime import datetime, timezone

import pandas as pd
import praw
from prawcore.exceptions import RequestException, ResponseException

In [None]:
threads_df = pd.read_csv('2024.csv', header=None, names=['thread'])
thread_dict = {i+1: [f"{i+1}", f"Match {i+1}", row['thread']] for i, row in threads_df.iterrows()}

In [None]:
reddit = praw.Reddit(
    client_id=os.getenv('CLIENT_ID'),
    client_secret=os.getenv('CLIENT_SECRET'),
    user_agent=os.getenv('USER_AGENT'),
)

In [None]:

def get_comments(submission_url, filename, retries=10, initial_wait=5):
    
    filepath = os.path.join('/Users/darshan/Documents/GitHub/ipl-sentiment-betting/reddit/2024', filename)
    
    with open(filepath, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Timestamp', 'Comment', 'Upvotes'])
        
        submission = reddit.submission(url=submission_url)
        submission.comment_sort = 'old'
        
        wait_time = initial_wait
        
        for attempt in range(retries):
            try:
                submission.comments.replace_more(limit=None)
                
                for top_level_comment in submission.comments:
                    writer.writerow([
                        datetime.fromtimestamp(top_level_comment.created_utc, timezone.utc).isoformat(),
                        top_level_comment.body,
                        top_level_comment.score
                    ])
                break
            
            except (RequestException, ResponseException) as e:
                if '429' in str(e):
                    print(f"Rate limited. Retrying in {wait_time} seconds... (Attempt {attempt + 1}/{retries})")
                    time.sleep(wait_time)
                    wait_time *= 2
                elif '500' in str(e):
                    print(f"Server error. Retrying in {wait_time} seconds... (Attempt {attempt + 1}/{retries})")
                    time.sleep(wait_time)
                else:
                    raise
        else:
            raise Exception(f"Failed to fetch comments from {submission_url} after {retries} attempts.")


In [None]:
for match, (match_number, match_teams, thread_url) in thread_dict.items():
    filename = f'{match_number}.csv'
    filepath = os.path.join('/Users/darshan/Documents/GitHub/ipl-sentiment-betting/reddit/2024', filename)
    
    print(f"Processing Match {match_number}")
    
    if os.path.exists(filepath):
        print(f"Skipping - file already exists for Match {match_number}\n")
        continue
    
    try:
        get_comments(thread_url, filename)
    except Exception as e:
        print(f"Error {match_number}")
        print(e)
    
    print(f"Finished processing Match {match_number}\n")
