In [1]:
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [7]:
# Function to make the API request
def fetch_reddit_data(subreddit, min_score, after, before, size, sort_type, headers):
    url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={subreddit}&size={size}&after={after}&before={before}&sort_type={sort_type}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()  # Parse JSON data
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

In [8]:
# Function to process the API data
def process_reddit_data(data, category, subreddit):
    posts = []
    for submission in data.get('data', []):
        if not submission.get('over_18', False) and submission.get('is_self', False):
            post = {
                'title': submission['title'],
                'content': submission['selftext'],
                'time': submission.get('time', ''),
                'category': category,
                'subreddit': subreddit,
                'score': submission['score'],
                'upvote_ratio': submission.get('upvote_ratio', '')
            }
            print(f"{submission['title']}")
            posts.append(post)
    return posts

# Main configuration
subreddit = "subsimulatorgpt2"
min_score = 1
size = 100
sort_type = "score"
category = "imposter_content"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

In [None]:
# Date range configuration
after = int(datetime(2023, 1, 1, 12, 0, 0).timestamp())
before = int(datetime(2024, 1, 1, 12, 0, 0).timestamp())

# Fetch and process data
data = fetch_reddit_data(subreddit, min_score, after, before, size, sort_type, headers)
if data:
    processed_data = process_reddit_data(data, category, subreddit)
    
    # Save to DataFrame and CSV
    df = pd.DataFrame(processed_data)
    df.insert(0, 'index', range(1, len(df) + 1))
    df.to_csv(f'./imposter_content_{subreddit}_6.csv', index=False)