In [None]:
"""
GenAI-Driven Product Feedback Miner
Author: [Your Name / Dr. Neha Sardana]

Pipeline:
 - Load 80K+ app-store reviews
 - Clean & preprocess text
 - Apply LLM-based embeddings (e.g. Hugging Face / OpenAI)
 - Cluster into topics (BERTopic)
 - Extract sentiment trends
 - Export summaries for Power BI dashboards
"""

import pandas as pd
import matplotlib.pyplot as plt
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# ---------------- Data Loading ----------------
def load_data(file="data/reviews.csv", text_col="review"):
    df = pd.read_csv(file)
    df = df.dropna(subset=[text_col])
    return df

# ---------------- Topic Modeling ----------------
def topic_modeling(reviews):
    # Use SentenceTransformer for embeddings
    embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
    topic_model = BERTopic(embedding_model=embedding_model, verbose=True)

    topics, probs = topic_model.fit_transform(reviews)
    topic_info = topic_model.get_topic_info()
    return topic_model, topics, probs, topic_info

# ---------------- Sentiment Analysis ----------------
def sentiment_analysis(reviews):
    sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
    sentiments = sentiment_pipeline(reviews, truncation=True)
    sentiment_df = pd.DataFrame(sentiments)
    return sentiment_df

# ---------------- Export Results ----------------
def export_reports(df, topics, topic_info, sentiment_df):
    # Add topics + sentiment back to dataframe
    df["topic"] = topics
    df["sentiment"] = sentiment_df["label"]
    df["sentiment_score"] = sentiment_df["score"]

    # Topic summary
    topic_info.to_csv("reports/topics_summary.csv", index=False)

    # Sentiment distribution
    sentiment_summary = df.groupby("sentiment").size().reset_index(name="count")
    sentiment_summary.to_csv("reports/sentiment_summary.csv", index=False)

    # Trend export (example: by month)
    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"])
        trend = df.groupby([df["date"].dt.to_period("M"), "sentiment"]).size().reset_index(name="count")
        trend.to_csv("reports/feedback_trends.csv", index=False)

    return df

# ---------------- Main ----------------
def main():
    df = load_data("data/reviews.csv", text_col="review")

    print("Running Topic Modeling...")
    topic_model, topics, probs, topic_info = topic_modeling(df["review"].tolist())

    print("Running Sentiment Analysis...")
    sentiment_df = sentiment_analysis(df["review"].tolist()[:5000])  # batch (limit for demo)

    print("Exporting reports...")
    df_out = export_reports(df, topics, topic_info, sentiment_df)

    # Quick visualization of top topics
    topic_model.visualize_barchart(top_n_topics=10).show()
    topic_model.visualize_topics().show()

    print("✅ Pipeline complete! Reports saved in /reports/")

if __name__ == "__main__":
    main()
