In [26]:
import praw
import pandas as pd
from datetime import datetime

# Reddit API credentials (add your own)
reddit = praw.Reddit(client_id='',
                     client_secret='',
                     user_agent='')

def fetch_reddit_data(query, time_filter):
    posts = []
    
    # Search for posts with the query in 'all' subreddits using the provided time_filter
    for submission in reddit.subreddit('all').search(query, sort='new', time_filter=time_filter):
        posts.append({
            'title': submission.title,
            'score': submission.score,
            'id': submission.id,
            'url': submission.url,
            'num_comments': submission.num_comments,
            'created': datetime.fromtimestamp(submission.created_utc),
            'body': submission.selftext
        })
        
        # Fetch comments for each post
        submission.comments.replace_more(limit=0)
        for comment in submission.comments.list():
            comment_time = datetime.fromtimestamp(comment.created_utc)
            posts.append({
                'title': f"Comment on: {submission.title}",
                'score': comment.score,
                'id': comment.id,
                'url': submission.url,
                'num_comments': '',
                'created': comment_time,
                'body': comment.body
            })
    
    return pd.DataFrame(posts)

query = 'Trump'
time_filter = 'year'

df = fetch_reddit_data(query, time_filter)

df

Unnamed: 0,title,score,id,url,num_comments,created,body
0,"Americans of Reddit, what are the reasons your...",2,1faa08i,https://www.reddit.com/r/AskReddit/comments/1f...,4,2024-09-06 11:16:33,
1,"Comment on: Americans of Reddit, what are the ...",1,llrjdzu,https://www.reddit.com/r/AskReddit/comments/1f...,,2024-09-06 11:19:05,Not an American but why would it matter? The r...
2,"Comment on: Americans of Reddit, what are the ...",1,llrjcxv,https://www.reddit.com/r/AskReddit/comments/1f...,,2024-09-06 11:18:45,"You know how they say, ""if it bleeds it leads?..."
3,"Comment on: Americans of Reddit, what are the ...",1,llrk0up,https://www.reddit.com/r/AskReddit/comments/1f...,,2024-09-06 11:26:35,"As shitty as Trump is, he gets ratings. I wis..."
4,"Comment on: Americans of Reddit, what are the ...",1,llrjwq2,https://www.reddit.com/r/AskReddit/comments/1f...,,2024-09-06 11:25:14,"He literally said this. ""I could stand in the ..."
...,...,...,...,...,...,...,...
296,Comment on: Trump Lawyer Corrects Self After S...,1,llrd2qg,https://www.newsweek.com/trump-lawyer-jack-smi...,,2024-09-06 10:06:18,TRUMP LAWYER SAYS SUPREME COURT JUSTICE DIRECT...
297,Trump Isn't Anti-War. It’s Time to Debunk This...,0,1fa8ajw,https://www.youtube.com/watch?v=RNnjqsAbYoU,0,2024-09-06 09:12:15,
298,Trump’s New Efficiency Plan: A Commission Led ...,1,1fa89iu,https://www.reddit.com/r/millenials/comments/1...,1,2024-09-06 09:10:23,"Trump's proposal to have Elon Musk lead a ""gov..."
299,Comment on: Trump’s New Efficiency Plan: A Com...,1,llrdqwg,https://www.reddit.com/r/millenials/comments/1...,,2024-09-06 10:13:42,"Not rewrite, Trump wants to replace the gov wi..."


In [27]:
from nltk.stem import PorterStemmer
import string

df = df.dropna(subset=['body'])
df['body'] = df['body'].str.lower()

with open('stopwords-en.txt', 'r') as f:
    stop_words = set(f.read().split())

def preprocess_text(text):
    withoutpunct = str(text).translate(str.maketrans('', '', string.punctuation))
    words = withoutpunct.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

stemmer = PorterStemmer()

def apply_stemming(text):
    words = text.split()
    stemmed_words = [stemmer.stem(word) for word in words]
    return ' '.join(stemmed_words)


body_cleaned = df['body'].apply(preprocess_text)
body_cleaned = body_cleaned.apply(lambda x: apply_stemming(x))
body_cleaned = body_cleaned[body_cleaned != '']

body_cleaned

1      american matter right american polit religi cu...
2      know say bleed lead trump train wreck keep air...
3      shitti trump get rate wish media portray accur...
4      liter said stand middl fifth avenu shoot someb...
10     httpswwwyoutubecomwatchvvw4qxft8vb8httpswwwyou...
                             ...                        
293    submiss sourc like soft paywal articl paywal r...
295    advoc violenc brigad bigotri troll dick peopl ...
296    trump lawyer say suprem court justic direct de...
298    trump propos elon musk lead govern effici comm...
299                  rewrit trump want replac gov ye men
Name: body, Length: 222, dtype: object