In [None]:
!pip install praw

In [9]:
import csv
import praw
import nltk
import os
import re
from nltk.corpus import stopwords
from textblob import TextBlob
import matplotlib.pyplot as plt
from wordcloud import WordCloud

In [4]:
def preprocessPosts(original_post):
    # Load the English stopwords in NLTK
    stop_words = set(stopwords.words('english'))

    # Remove the stop words from the post
    post = " ".join([word for word in original_post.split() if word.lower() not in stop_words])

    # Remove URLs
    post = re.sub(r"http\S+", "", post)

    # Remove emojis and other unwanted characters
    post = re.sub(r'[^\w\s#@/:%.,_-]', '', post)
    return post

In [5]:
def findPosts(topic):
    # Set up your Reddit API credentials
    client_id = "REPALCE_WITH_YOUR_CLIENT_ID"
    client_secret = "REPALCE_WITH_YOUR_CLIENT_SECRET"
    user_agent = "REPALCE_WITH_YOUR_USER_ID"

    # Authenticate with the Reddit API using PRAW
    reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)

    # Grab 100 posts with the requested topic
    posts = []
    for submission in reddit.subreddit("all").search(topic, limit=100):
        posts.append(submission.title + " " + submission.selftext)
    print("Collected %d posts about %s." % (len(posts), topic))
    return posts

In [16]:
def sentimentAnalysis(text):
    return TextBlob(text).sentiment.polarity

def plotSentimentScores(sentiments):
    plt.scatter(range(len(sentiments)), sentiments, c=['green' if s >= 0 else 'red' for s in sentiments], alpha=0.5)
    plt.title('Sentiment Score Distribution')
    plt.xlabel('Sentiment Score')
    plt.ylabel('Frequency')
    plt.axhline(0, color='grey', linestyle='--', linewidth=0.7)
    plt.grid(True)
    plt.show()

def labelSentiment(score):
    return 'Positive' if score >= 0 else 'Negative'

def plotWordCloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.show()

In [None]:
if __name__ == "__main__":
    # Download the stopwords if necessary
    nltk.download('stopwords')

    # Search for posts about the chosen topic
    posts = findPosts("Computational Biology")

    # Define the sentiment analyzer
    sentimentAnalysis = lambda text: TextBlob(text).sentiment.polarity

    # Open a CSV file to store the retrieved posts and sentiment scores
    with open('posts_sentiment.csv', mode='w', newline='', encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(['Post', 'Sentiment Score'])

        sentiments = []
        all_text = ""
        print("Performing Sentiment Analysis on the posts.")
        # Analyze the sentiment of each post and write it to the CSV file
        for post in posts:
            clean_post = preprocessPosts(post)

            # Analyze the sentiment of the filtered post
            sentiment = sentimentAnalysis(clean_post)
            sentiments.append(sentiment)

            # Label the sentiment
            label = labelSentiment(sentiment)

            # Aggregate text for word cloud
            all_text += " " + clean_post

            # Write the post, sentiment score, and label to the CSV file
            writer.writerow([clean_post, sentiment, label])

    print("CSV file with posts and sentiment scores saved.")

In [None]:
posts

In [None]:
plotSentimentScores(sentiments)

In [None]:
plotWordCloud(all_text)