In [1]:
import json
import praw
from datetime import datetime
from redditClient import redditClient
from youtubeClient import youtubeClient
from googleapiclient.errors import HttpError

In [2]:
# Initialise clients
reddit_client = redditClient()
youtube_client = youtubeClient()

In [3]:
# Define the keywords and time period (2024)
keywords = ['AI in games', 'mental health in gaming', 'cyberbullying in gaming', 'stress relief in gaming']
year_filter = 2024

In [4]:
reddit_data = []
youtube_data = []

In [5]:
# --- Reddit Data Collection ---
ID = 0  
for keyword in keywords:
    for submission in reddit_client.subreddit('all').search(keyword, time_filter='year', limit=1000):
        # Extract data from Reddit submissions and ensure it's from 2024
        if datetime.utcfromtimestamp(submission.created_utc).year == year_filter:
            subData = {
                'title': submission.title,
                'ID': ID,
                'date': datetime.fromtimestamp(submission.created_utc).strftime('%Y-%m-%d %H:%M:%S'),
                'keyword': keyword,
                'score': submission.score,
                'comments': [comment.body for comment in submission.comments[:50] if isinstance(comment, praw.models.Comment)],
            }
            ID += 1
            reddit_data.append(subData)

In [6]:
# Print the number of posts
print(f"Total number of posts: {len(reddit_data)}")

# Calculate the total number of comments
print(f"Total number of comments: {sum(len(submission['comments']) for submission in reddit_data)}")

Total number of posts: 833
Total number of comments: 25333


In [7]:
# --- YouTube Data Collection ---
ID = 0  
for keyword in keywords:
    videos = youtube_client.search().list(q=keyword, part='snippet', type='video', maxResults=1000).execute()

    for video in videos['items']:
        video_id = video['id']['videoId']
        title = video['snippet']['title']
        published_at = video['snippet']['publishedAt'][:10]  # Get date in YYYY-MM-DD format
        year = int(published_at.split('-')[0])
        
        # Filter by the current year (2024)
        if year == year_filter:
            try:
                comments = youtube_client.commentThreads().list(part='snippet', videoId=video_id, maxResults=100).execute()
                video_data = {
                    'title': title,
                    'ID': ID,
                    'date': published_at,
                    'keyword': keyword,
                    'comments': [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in comments['items']],
                }
            except HttpError as e:
                # Handle the case where comments are disabled
                print(f"Comments are disabled for video {video_id}. Skipping...")
                continue
            ID += 1
            youtube_data.append(video_data)

Comments are disabled for video 0dEm2lF2dH4. Skipping...


In [8]:
# Print the number of posts
print(f"Total number of posts: {len(youtube_data)}")

# Calculate the total number of comments
print(f"Total number of comments: {sum(len(videos['comments']) for videos in youtube_data)}")

Total number of posts: 70
Total number of comments: 3064


In [9]:
# Save Reddit data to JSON
with open('redditGamingData.json', 'w') as jsonFile:
    json.dump(reddit_data, jsonFile, indent=4)

# Save YouTube data to JSON
with open('youtubeGamingData.json', 'w') as jsonFile:
    json.dump(youtube_data, jsonFile, indent=4)

print(f"Reddit and YouTube data saved!!!")

Reddit and YouTube data saved!!!
