In [23]:
import os
import praw
import pandas as pd
from dotenv import load_dotenv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

load_dotenv()

def fetch_reddit_sentiment(subreddits=["renewableenergy", "energy", "Texas"], 
                           keywords=["Texas wind", "Texas solar"], 
                           limit=100):
    try:
        
        client_id = os.getenv("reddit_client_id")
        client_secret = os.getenv("reddit_client_secret")
        user_agent = os.getenv("reddit_user_agent")
        username = os.getenv("username")
        password = os.getenv("password")
        username = username.strip('",\' ')
        password = password
        # Authenticate with Reddit
        reddit = praw.Reddit(
            client_id=client_id,
            client_secret=client_secret,
            user_agent=user_agent,
            username=username,
            password=password
        )
        
        # Sentiment analyzer
        analyzer = SentimentIntensityAnalyzer()
        data = []

        # Loop through subreddits and fetch matching posts
        for subreddit in subreddits:
            query = " OR ".join(keywords)
            for submission in reddit.subreddit(subreddit).search(query, limit=limit):
                combined_text = (submission.title or "") + " " + (submission.selftext or "")
                sentiment = analyzer.polarity_scores(combined_text)
                data.append({
                    'subreddit': subreddit,
                    'date': pd.to_datetime(submission.created_utc, unit='s'),
                    'text': combined_text,
                    'sentiment_score': sentiment['compound'],
                    'title': submission.title,
                    'url': submission.url
                })

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Error fetching Reddit data: {e}")
        return pd.DataFrame()


In [24]:
df = fetch_reddit_sentiment()
df.head()

Unnamed: 0,subreddit,date,text,sentiment_score,title,url
0,renewableenergy,2025-01-13 15:02:26,"Texas leads U.S. in wind, solar, No. 2 in batt...",0.2732,"Texas leads U.S. in wind, solar, No. 2 in batt...",https://www.chron.com/news/houston-texas/artic...
1,renewableenergy,2025-05-02 04:12:44,Texas House passes bill to require recycling o...,0.0,Texas House passes bill to require recycling o...,https://pv-magazine-usa.com/2025/05/01/texas-h...
2,renewableenergy,2025-04-17 15:56:10,"Wind, solar, and battery storage projects are ...",0.0,"Wind, solar, and battery storage projects are ...",https://yaleclimateconnections.org/2025/03/cle...
3,renewableenergy,2025-03-10 20:17:45,"Texas broke its solar, wind, and battery recor...",-0.4215,"Texas broke its solar, wind, and battery recor...",https://www.canarymedia.com/articles/clean-ene...
4,renewableenergy,2022-03-22 20:14:11,Texas has enough solar and wind planned to per...,0.0,Texas has enough solar and wind planned to per...,https://pv-magazine-usa.com/2022/03/22/solar-a...


In [25]:
df_copy = df.copy()
df_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   subreddit        300 non-null    object        
 1   date             300 non-null    datetime64[ns]
 2   text             300 non-null    object        
 3   sentiment_score  300 non-null    float64       
 4   title            300 non-null    object        
 5   url              300 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 14.2+ KB
