# r/NTU Subreddit Scraper

In [24]:
import pandas as pd
import praw
import os
from datetime import datetime, timezone
from dotenv import load_dotenv

In [19]:
load_dotenv()

True

In [20]:
reddit_read_only = praw.Reddit(client_id = os.getenv('REDDIT_CLIENT_ID'),
                               client_secret = os.getenv('REDDIT_CLIENT_SECRET'),
                               user_agent = os.getenv('REDDIT_USER_AGENT'))

In [22]:
subreddit = reddit_read_only.subreddit("NTU")
 
# Display the name of the Subreddit
print("Display Name:", subreddit.display_name)
 
# Display the title of the Subreddit
print("Title:", subreddit.title)
 
# Display the description of the Subreddit
print("Description:", subreddit.description)

Display Name: NTU
Title: Nanyang Technological University Singapore
Description: **Nanyang Technological University, Singapore** 

The official subreddit for **NTU**

* Student? 
* Professor?
* Procrastinating PhD?
* RA?
* Camp Sec?
* Cleaning Auntie? 

Everyone is welcome here!
___________________________________

**Related Subreddits:**

[/r/Singapore](http://www.reddit.com/r/singapore/)
[/r/NUS](http://www.reddit.com/r/NUS)



In [32]:
target_year = 2024
target_month = 7

In [34]:
posts = subreddit.top(time_filter="all", limit=1000)
data = []

for post in posts:
    post_timestamp = post.created_utc
    post_datetime = datetime.fromtimestamp(post_timestamp, tz=timezone.utc)
    if post_datetime.year == target_year and post_datetime.month == target_month:
        data.append({
            'Type': 'Post',
            'Post_id': post.id,
            'Title': post.title,
            'Author': post.author.name if post.author else 'Unknown',
            'Timestamp': post.created_utc,
            'Text': post.selftext,
            'Score': post.score,
            'Total_comments': post.num_comments,
            'Post_URL': post.url
        })

        if post.num_comments > 0:
            post.comments.replace_more(limit=None)
            for comment in post.comments.list():
                data.append({
                    'Type': 'Comment',
                    'Post_id': post.id,
                    'Title': post.title,
                    'Author': comment.author.name if comment.author else 'Unknown',
                    'Timestamp': pd.to_datetime(comment.created_utc, unit='s'),
                    'Text': comment.body,
                    'Score': comment.score,
                    'Total_comments': 0, #Comments don't have this attribute
                    'Post_URL': None  #Comments don't have this attribute
                })


jul_df = pd.DataFrame(data)

In [35]:
jul_df.info()
jul_df.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Type            277 non-null    object
 1   Post_id         277 non-null    object
 2   Title           277 non-null    object
 3   Author          277 non-null    object
 4   Timestamp       277 non-null    object
 5   Text            277 non-null    object
 6   Score           277 non-null    int64 
 7   Total_comments  277 non-null    int64 
 8   Post_URL        11 non-null     object
dtypes: int64(2), object(7)
memory usage: 19.6+ KB


Unnamed: 0,Type,Post_id,Title,Author,Timestamp,Text,Score,Total_comments,Post_URL
0,Post,1dvuzoj,BFFR,suspenz,1720176039.0,I cannot believe people like this get accepted...,364,35,https://i.redd.it/sumhycyiioad1.jpeg
1,Comment,1dvuzoj,BFFR,Relative-Parfait-385,2024-07-05 10:50:42,Bru speed run icebreaker,212,0,
2,Comment,1dvuzoj,BFFR,thesgtrends,2024-07-05 12:35:29,https://preview.redd.it/19x0mln03pad1.jpeg?wid...,124,0,
3,Comment,1dvuzoj,BFFR,Competitive_Pair7874,2024-07-05 11:16:03,I left the grp. It has no value add,88,0,
4,Comment,1dvuzoj,BFFR,Sharp_Appearance7212,2024-07-05 11:05:01,school haven’t start yet wtf,76,0,
5,Comment,1dvuzoj,BFFR,vajraadhvan,2024-07-05 11:33:44,Garry Tan biting the curb 4k,77,0,
6,Comment,1dvuzoj,BFFR,ramenrami22,2024-07-05 11:34:17,Rookie mistake. That one go ntu chatbot ask. T...,65,0,
7,Comment,1dvuzoj,BFFR,TOFU-area,2024-07-05 11:46:06,iq != eq,54,0,
8,Comment,1dvuzoj,BFFR,stateofbrave,2024-07-06 01:40:47,How is someone 20+ years old and still a dumb ...,33,0,
9,Comment,1dvuzoj,BFFR,Heblehblehbleh,2024-07-05 12:03:09,NTU does not accept people based on character.,53,0,
