In [4]:
import praw
import pandas as pd
from datetime import datetime

def create_reddit_instance():
    """
    Create an authenticated Reddit instance using PRAW with read-only access
    """
    reddit = praw.Reddit(
        client_id="vMkGhZpXGoSNj9k3jPTtHQ",
        client_secret="B6_PjVkIWqn9HdD2MPdehNPuC4DTwA",
        user_agent="Comment Scraper v1.0",
        read_only=True
    )
    return reddit

def scrape_comments_from_post(post_url, comment_limit=None):
    """
    Scrape comments from a Reddit post and combine with post info
    """
    try:
        reddit = create_reddit_instance()
        
        if '/comments/' in post_url:
            submission_id = post_url.split('/comments/')[1].split('/')[0]
        else:
            raise ValueError("Invalid Reddit URL format")
            
        submission = reddit.submission(id=submission_id)
        comments_data = []
        
        # Get post information
        post_info = {
            'post_title': submission.title,
            'post_date': datetime.fromtimestamp(submission.created_utc).strftime('%Y-%m-%d'),
            'post_score': submission.score,
            'post_author': str(submission.author),
            'post_num_comments': submission.num_comments
        }
        
        try:
            submission.comments.replace_more(limit=0)
        except Exception as e:
            print(f"Warning: Could not expand all comments for post {submission.title}: {str(e)}")
        
        comments_to_process = submission.comments.list()
        if comment_limit:
            comments_to_process = comments_to_process[:comment_limit]
            
        for comment in comments_to_process:
            try:
                comment_data = post_info.copy()  # Include post info in each row
                comment_data.update({
                    'comment_id': comment.id,
                    'comment_author': str(comment.author) if comment.author else '[deleted]',
                    'comment_body': comment.body,
                    'comment_score': comment.score,
                    'comment_date': datetime.fromtimestamp(comment.created_utc).strftime('%Y-%m-%d'),
                    'is_submitter': comment.is_submitter
                })
                comments_data.append(comment_data)
            except Exception as e:
                print(f"Warning: Skipping comment in post {submission.title} due to error: {str(e)}")
                continue
        
        return comments_data
    
    except Exception as e:
        print(f"An error occurred while processing {post_url}: {str(e)}")
        return []

def main():
    post_urls = [
        'https://www.reddit.com/r/pakistan/comments/1eqbu7z/knorr_has_been_reducing_the_quantity_and/',
        'https://www.reddit.com/r/chutyapa/comments/147j0ub/k_lets_try_this/',
        'https://www.reddit.com/r/pakistan/comments/y6f7xh/how_many_of_you_have_developed_a_craze_for_korean/',
        'https://www.reddit.com/r/chutyapa/comments/1clynsr/whatever_happened_to_maggi_noodles_they_were/',
        'https://www.reddit.com/r/chutyapa/comments/15lrcef/dont_forget_what_they_took_from_us_even_though_i/',
        'https://www.reddit.com/r/PakistaniiConfessions/comments/1hh8k7p/shan_chatpatta_shoop_noodles_are_100_times_better/',
        'https://www.reddit.com/r/pakistan/comments/1dyab7z/local_indomie_noodles_are_cheap_but_flavors_are/',
        'https://www.reddit.com/r/chutyapa/comments/13oojjz/how_people_who_love_indomie_noodles_look_at/'
    ]
    
    try:
        all_comments = []
        
        # Process each URL
        for url in post_urls:
            print(f"\nProcessing URL: {url}")
            comments_data = scrape_comments_from_post(url)
            all_comments.extend(comments_data)
            print(f"Successfully scraped {len(comments_data)} comments from this post")
        
        # Create DataFrame with all comments
        columns = [
            'post_title', 'post_date', 'post_score', 'post_author', 'post_num_comments',
            'comment_id', 'comment_author', 'comment_body', 'comment_score', 
            'comment_date', 'is_submitter'
        ]
        
        df = pd.DataFrame(all_comments, columns=columns)
        
        # Create filename with timestamp to avoid overwrites
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f'reddit_comments_{timestamp}.csv'
        
        df.to_csv(filename, index=False, encoding='utf-8')
        
        print(f"\nTotal comments scraped: {len(df)}")
        print(f"Data saved to: {filename}")
        print("\nFirst few rows:")
        print(df.head())
        
    except Exception as e:
        print(f"Failed to process URLs: {str(e)}")

if __name__ == "__main__":
    main()


Processing URL: https://www.reddit.com/r/pakistan/comments/1eqbu7z/knorr_has_been_reducing_the_quantity_and/
Successfully scraped 129 comments from this post

Processing URL: https://www.reddit.com/r/chutyapa/comments/147j0ub/k_lets_try_this/
Successfully scraped 108 comments from this post

Processing URL: https://www.reddit.com/r/pakistan/comments/y6f7xh/how_many_of_you_have_developed_a_craze_for_korean/
Successfully scraped 41 comments from this post

Processing URL: https://www.reddit.com/r/chutyapa/comments/1clynsr/whatever_happened_to_maggi_noodles_they_were/
Successfully scraped 84 comments from this post

Processing URL: https://www.reddit.com/r/chutyapa/comments/15lrcef/dont_forget_what_they_took_from_us_even_though_i/
Successfully scraped 134 comments from this post

Processing URL: https://www.reddit.com/r/PakistaniiConfessions/comments/1hh8k7p/shan_chatpatta_shoop_noodles_are_100_times_better/
Successfully scraped 27 comments from this post

Processing URL: https://www.red

In [6]:
df= pd.read_csv(r"E:\UTH\daraz bot\reddit_comments_20250130_003525.csv")

In [7]:
df

Unnamed: 0,post_title,post_date,post_score,post_author,post_num_comments,comment_id,comment_author,comment_body,comment_score,comment_date,is_submitter
0,Knorr has been reducing the quantity and incre...,2024-08-12,225,Middle_child496,138,lhqgv0j,honest_jamal,"Indomie taste better, give more noodles and ar...",91,2024-08-12,False
1,Knorr has been reducing the quantity and incre...,2024-08-12,225,Middle_child496,138,lhqgu8p,Poodina,I dont understand why people aren't talking ab...,64,2024-08-12,False
2,Knorr has been reducing the quantity and incre...,2024-08-12,225,Middle_child496,138,lhqi6r7,Stock-Respond5598,Shrinkflation at its finest.,31,2024-08-12,False
3,Knorr has been reducing the quantity and incre...,2024-08-12,225,Middle_child496,138,lhqrh0r,kingshuk3,Bro discovered shrinkflation,20,2024-08-12,False
4,Knorr has been reducing the quantity and incre...,2024-08-12,225,Middle_child496,138,lhqsbra,Zacnocap,"Indomie are faaar better then these , indomie ...",15,2024-08-12,False
...,...,...,...,...,...,...,...,...,...,...,...
675,How people who love Indomie noodles look at pe...,2023-05-22,144,_NineZero_,74,jl95ypw,PrinceSam321,From where are you getting it ?,1,2023-05-23,False
676,How people who love Indomie noodles look at pe...,2023-05-22,144,_NineZero_,74,jl5lyvs,mkbilli,Mujha yad hai aik zamanay mein 100 USD (6000 p...,3,2023-05-22,False
677,How people who love Indomie noodles look at pe...,2023-05-22,144,_NineZero_,74,jl9rgz4,Ancient-Astronaut-98,Yess I imagine\n\nTry with a little sugar\n\nA...,1,2023-05-23,False
678,How people who love Indomie noodles look at pe...,2023-05-22,144,_NineZero_,74,jlahh6y,Connect-Effort5979,I am not in Pakistan rn.,2,2023-05-23,False


In [9]:
len(df)

680