In [26]:
!pip install langchain_community replicate pandas newsapi-python

import os
import re
import json
import pandas as pd
from newsapi import NewsApiClient
from langchain_community.llms import Replicate

# =========================================
# KONFIGURASI API
# =========================================
NEWS_API_KEY = "masukan key dari newsapi,org"
REPLICATE_API_TOKEN = "masukan token dari replicate"

os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

# Inisialisasi News API
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

# Inisialisasi model Granite di Replicate
model_id = "ibm-granite/granite-3.3-8b-instruct"
llm = Replicate(model=model_id, replicate_api_token=REPLICATE_API_TOKEN)

# =========================================
# PARSING JSON
# =========================================
def safe_json_parse(text: str):
    """Ambil blok JSON valid dari output model"""
    try:
        matches = re.findall(r"\{[\s\S]*\}", text)
        if matches:
            for m in matches:
                try:
                    return json.loads(m)
                except:
                    continue
    except Exception as e:
        print("Parsing error:", e)
    return {}

# =========================================
# FUNGSI ANALISIS
# =========================================
def analyze_article(title, description, url, source):
    prompt = f"""
Kamu adalah seorang analis media independen dengan keahlian dalam jurnalistik, ilmu politik, dan komunikasi massa.
Tugas Kamu adalah membaca teks berita secara kritis, menilai sudut pandang penulis, serta mengidentifikasi indikasi
bias politik berdasarkan bahasa, framing, dan pemilihan fakta yang digunakan.

Instruksi:

1. Buat ringkasan singkat dari berita berikut (maksimal 3 kalimat).
2. Analisis isi berita dan tentukan bias politik dominan:
   - "Left" → cenderung progresif, liberal, pro-kebijakan sosial, atau kritis terhadap institusi konservatif.
   - "Center" → cenderung netral, berbasis fakta, berimbang tanpa bahasa emosional yang kuat.
   - "Right" → cenderung konservatif, tradisional, pro-pemerintah/otoritas, atau kritis terhadap kebijakan progresif.
3. Berikan probabilitas realistis (0.0–1.0) untuk setiap kategori bias,
   dengan total jumlah = 1.0. Probabilitas harus mencerminkan analisis Anda terhadap teks berita.

Berita:
Judul: {title}
Deskripsi: {description}
Sumber: {source}
URL: {url}

Output HARUS berupa JSON valid, tanpa penjelasan tambahan.
Contoh struktur (isi harus sesuai hasil analisis Anda, bukan tiruan semata):

{{
  "summary": "...",
  "bias": "...",
  "scores": {{
    "Left": ...,
    "Center": ...,
    "Right": ...
  }}
}}
"""
    try:
        response = llm.invoke(prompt)
        result = safe_json_parse(response)
        return (
            result.get("summary", ""),
            result.get("bias", ""),
            result.get("scores", {}),
        )
    except Exception as e:
        print("Error:", e)
        return "", "", {}

# =========================================
# AMBIL DATA BERITA DARI NEWSAPI
# =========================================

# Ganti sesuai topik yang diinginkan
topic = "health"   # contoh: business, entertainment, general, health, science, sports, technology

articles = newsapi.get_top_headlines(language="en", category=topic, page_size=10)

if not articles.get("articles"):
    print("Tidak ada artikel ditemukan untuk kategori:", topic)
    exit()

data = []
for art in articles["articles"]:
    source = art.get("source", {}).get("name", "")
    title = art.get("title", "")
    desc = art.get("description", "")
    url = art.get("url", "")

    summary, bias, scores = analyze_article(title, desc, url, source)
    data.append({
        "source": source,
        "title": title,
        "url": url,
        "summary": summary,
        "bias": bias,
        "scores": scores
    })

# =========================================
# HASIL KE DATAFRAME
# =========================================
df = pd.DataFrame(data)
pd.set_option("max_colwidth", 120)

print(f"\n=== HASIL ANALISIS BERITA ({topic.upper()}) ===\n")
print(df.head())

# Simpan ke CSV
filename = f"news_bias_analysis_{topic}.csv"
df.to_csv(filename, index=False)
print(f"\nFile disimpan sebagai {filename}")



=== HASIL ANALISIS BERITA (HEALTH) ===

                source  \
0  The Washington Post   
1             CBS News   
2              PsyPost   
3          Google News   
4  The Washington Post   

                                                                                                     title  \
0                Column | It’s rude to ask if someone is taking Ozempic. Here’s why. - The Washington Post   
1  Hundreds may have been exposed to rabies at bat-infested cabins in Grand Teton National Park - CBS News   
2                          These fascinating new studies show ADHD extends into unexpected areas - PsyPost   
3                             Study Reveals The Biggest Risk Factors For Getting Early Dementia - HuffPost   
4           Another covid summer wave is here, but the future of vaccines is unclear - The Washington Post   

                                                                                                                       url  \
0              