### **Setup environment & Gemini API**

In [None]:
!pip install google-generativeai pandas matplotlib wordcloud --quiet

In [None]:
import google.generativeai as genai
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from google.colab import userdata, files  # <- tambah files untuk download
import re, csv

# Ambil API key dari Colab secret
API_KEY = userdata.get('api_token2')
if not API_KEY:
    raise ValueError("❌ API Key tidak ditemukan di Colab userdata. Simpan dulu dengan userdata.set('api_token2', 'YOUR_KEY')")

# Konfigurasi Gemini API
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")
print("✅ Environment siap digunakan (Google Gemini API)")

### **Load dataset**

In [None]:
dataset_path = "/content/Amazon_Unlocked_Mobile2.csv"  # Pastikan file sudah diupload

try:
    df = pd.read_csv(dataset_path, on_bad_lines='skip', quoting=csv.QUOTE_NONE, encoding='utf-8')
    print(f"✅ Dataset berhasil dimuat. Jumlah data: {len(df)}")
except Exception as e:
    raise RuntimeError(f"❌ Gagal membaca CSV: {e}")

print("Kolom dataset:", df.columns.tolist())

# Cleaning data
if 'Reviews' not in df.columns:
    raise ValueError("❌ Kolom 'Reviews' tidak ditemukan di dataset.")

df = df.dropna(subset=['Reviews'])
df['Reviews'] = df['Reviews'].astype(str).str.strip()

# Ambil sample 20 data
sample_df = df.sample(min(20, len(df)), random_state=42).reset_index(drop=True)
print("\nContoh Reviews:")
print(sample_df['Reviews'].head())

### **Initial Sentiment Check**

In [None]:
reviews_text = "\n".join([f"Review {i+1}: {review}" for i, review in enumerate(sample_df['Reviews'])])

prompt_initial = f"""
Classify these reviews as Positive, Negative, or Mixed:

{reviews_text}
"""
response_initial = model.generate_content(prompt_initial)
print("\n>> Initial Response:\n", response_initial.text)

### **Request Markdown Table**

In [None]:
prompt_table = f"""
Classify these reviews as Positive, Negative, or Mixed, and tag relevant categories.
Return the output as a Markdown table with the following columns:
| Review # | Sentiment | Categories |

Example:
| Review # | Sentiment | Categories |
| 1 | Positive | Battery Life, Performance |

Here are the reviews:
{reviews_text}
"""
response_table = model.generate_content(prompt_table)
print("\n>> Gemini Model Table Output:\n", response_table.text)

### **Parse Table (robust)**

In [None]:
def parse_table_response(response_text, expected_rows):
    """Parse Markdown table dari Gemini ke DataFrame"""
    table_text = []
    start_table = False
    for line in response_text.splitlines():
        if line.strip().startswith("|"):
            start_table = True
        if start_table:
            table_text.append(line)

    if not table_text:
        return None

    clean_lines = [line for line in table_text if not re.match(r'^\|\s*-', line.strip())]

    rows = []
    for line in clean_lines:
        parts = [col.strip() for col in re.split(r'\s*\|\s*', line.strip())[1:-1]]
        if len(parts) >= 3:
            rows.append(parts)

    if len(rows) < 2:
        return None

    header = rows[0]
    data = rows[1:]
    max_cols = len(header)
    data_fixed = [row[:max_cols] if len(row) >= max_cols else row + [""]*(max_cols-len(row)) for row in data]

    return pd.DataFrame(data_fixed, columns=header)


df_table = parse_table_response(response_table.text, len(sample_df))

if df_table is None:
    print("⚠️ Model tidak mengembalikan tabel. Meminta ulang...")
    retry_prompt = f"""
    STRICT INSTRUCTION: Only return a Markdown table.
    Classify the following reviews as Positive, Negative, or Mixed, and include Categories.
    Do not add extra explanation, only the table.
    | Review # | Sentiment | Categories |

    {reviews_text}
    """
    retry_response = model.generate_content(retry_prompt)
    df_table = parse_table_response(retry_response.text, len(sample_df))

    if df_table is None:
        raise ValueError("❌ Gagal parsing tabel dari Gemini meskipun sudah retry.")

print("\n✅ DataFrame hasil parsing tabel:")
print(df_table.head())

df_table.to_csv("classification_results_table.csv", index=False)
print("💾 Disimpan ke classification_results_table.csv")

### **Visualisasi Sentiment**

In [None]:
df_table.columns = [col.strip() for col in df_table.columns]
df_table['Sentiment'] = df_table['Sentiment'].str.strip().str.capitalize()

sentiment_counts = df_table['Sentiment'].value_counts()

# Pie Chart
plt.figure(figsize=(6, 6))
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
plt.title("Distribusi Sentiment Review")
plt.savefig("sentiment_pie.png")  # <- simpan untuk download
plt.show()

# Bar Chart
plt.figure(figsize=(6, 4))
sentiment_counts.plot(kind='bar')
plt.ylabel("Jumlah Review")
plt.title("Jumlah Review per Sentiment")
plt.xticks(rotation=0)
plt.savefig("sentiment_bar.png")  # <- simpan untuk download
plt.show()

print("\n📊 Visualisasi selesai.")

### **Merge & Insight**

In [None]:
min_len = min(len(sample_df), len(df_table))
merged_df = sample_df.iloc[:min_len].copy()
merged_df['Sentiment']  = df_table['Sentiment'].iloc[:min_len].values
merged_df['Categories'] = df_table['Categories'].iloc[:min_len].values

print("\n✅ merged_df:")
print(merged_df.head())

positive_reviews = merged_df.loc[merged_df['Sentiment'] == 'Positive', 'Reviews'].tolist()
negative_reviews = merged_df.loc[merged_df['Sentiment'] == 'Negative', 'Reviews'].tolist()
mixed_reviews    = merged_df.loc[merged_df['Sentiment'] == 'Mixed', 'Reviews'].tolist()

insight_prompt = f"""
You are a data analyst. I have classified customer reviews into Positive, Negative, and Mixed.
Now provide a summary with the following sections:
1. Overall Sentiment Summary (brief percentage breakdown)
2. Key Positive Themes
3. Key Negative Themes
4. Recommendations for improvement

Positive reviews:
{positive_reviews}

Negative reviews:
{negative_reviews}

Mixed reviews:
{mixed_reviews}
"""
response_insight = model.generate_content(insight_prompt)
print("\n>> Insight & Recommendations:\n", response_insight.text)

with open("sentiment_insight.txt", "w", encoding="utf-8") as f:
    f.write(response_insight.text)
print("💾 Insight disimpan ke sentiment_insight.txt")

### **WordCloud**

In [None]:
positive_text = " ".join(positive_reviews) if positive_reviews else ""
negative_text = " ".join(negative_reviews) if negative_reviews else ""

if positive_text.strip():
    wc_pos = WordCloud(width=800, height=400, background_color="white", colormap="Greens").generate(positive_text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wc_pos, interpolation="bilinear")
    plt.axis("off")
    plt.title("🌟 WordCloud - Positive Reviews")
    plt.savefig("wordcloud_positive.png")  # <- simpan file
    plt.show()
else:
    print("⚠️ Tidak ada review positif untuk WordCloud.")

if negative_text.strip():
    wc_neg = WordCloud(width=800, height=400, background_color="white", colormap="Reds").generate(negative_text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wc_neg, interpolation="bilinear")
    plt.axis("off")
    plt.title("⚠️ WordCloud - Negative Reviews")
    plt.savefig("wordcloud_negative.png")  # <- simpan file
    plt.show()
else:
    print("⚠️ Tidak ada review negatif untuk WordCloud.")

### **Download Files**

In [None]:
print("\n📂 Menyiapkan file untuk download...")

files.download("classification_results_table.csv")
files.download("sentiment_insight.txt")
files.download("sentiment_pie.png")
files.download("sentiment_bar.png")

if positive_text.strip():
    files.download("wordcloud_positive.png")
if negative_text.strip():
    files.download("wordcloud_negative.png")