In [None]:
# Imports
import pandas as pd
import numpy as np
import sys
sys.path.append('..')

from load_data import load_dataset
import google.generativeai as genai
from insights.prompt_templates import *
import json
from datetime import datetime
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Configure Gemini
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
model = genai.GenerativeModel('gemini-pro')

print("Gemini model loaded successfully")

In [None]:
# Load Reddit Data
reddit_df = load_dataset('reddit')
print(f"Loaded {len(reddit_df)} Reddit posts")
print(f"Columns: {list(reddit_df.columns)}")

# Sample the data to see text content
print("\nSample post:")
print(reddit_df[['title', 'text', 'subreddit', 'score']].iloc[0])

In [None]:
def prepare_reddit_sample(df, max_posts=20, max_length=200):
    """Prepare a sample of Reddit posts for LLM analysis"""
    # Sample posts to stay within API limits
    sample_df = df.sample(n=min(len(df), max_posts))
    
    posts_text = []
    for _, row in sample_df.iterrows():
        # Truncate long posts to avoid token limits
        title = row['title'][:100]
        text = str(row['text'])[:max_length] if pd.notna(row['text']) else ""
        subreddit = row['subreddit']
        
        post_summary = f"Subreddit: {subreddit}\nTitle: {title}\nText: {text}\n---"
        posts_text.append(post_summary)
    
    return "\n".join(posts_text)

# Prepare sample
reddit_sample = prepare_reddit_sample(reddit_df)
print(f"Prepared sample with {len(reddit_sample.split('---'))-1} posts")
print(f"Sample length: {len(reddit_sample)} characters")

In [None]:
def analyze_reddit_sentiment(posts_text):
    """Analyze Reddit posts using Gemini"""
    try:
        prompt = REDDIT_SENTIMENT_PROMPT.format(posts_text=posts_text)
        
        response = model.generate_content(prompt)
        
        return response.text
    
    except Exception as e:
        print(f"Error calling Gemini API: {e}")
        return None

# Run analysis
print("Analyzing Reddit posts with Gemini...")
sentiment_analysis = analyze_reddit_sentiment(reddit_sample)

if sentiment_analysis:
    print("Analysis Results:")
    print(sentiment_analysis)
else:
    print("Analysis failed")

In [None]:
if sentiment_analysis:
    # Save analysis to outputs
    os.makedirs('../outputs/reports', exist_ok=True)
    
    report = {
        'timestamp': datetime.now().isoformat(),
        'sample_size': len(reddit_sample.split('---'))-1,
        'analysis': sentiment_analysis
    }
    
    filename = f"../outputs/reports/reddit_sentiment_{datetime.now().strftime('%Y%m%d')}.json"
    
    with open(filename, 'w') as f:
        json.dump(report, f, indent=2)
    
    print(f"Analysis saved to: {filename}")