### 한일 기후변화비교 연구
#### Step 3. 레이블 생성

In [5]:
import os
import glob
import ollama
import pandas as pd

FOLDER = r"E:/Data_for_Practice/JapMedia/data/kor_data/" 
PATTERN = "*topic_summary.csv"  

MODEL_NAME = "llama4" # "llama3:8b" 
OLLAMA_OPTIONS = {
    "temperature": 1.15,
    "num_predict": 50
}

def build_prompt(keywords: str) -> str:
    return f"""Keywords:
{keywords}

Context:
The keywords come from topic modeling on ENGLISH-TRANSLATED articles from KOREAN news outlets about climate and environmental issues (climate policy, decarbonization, energy transition, biodiversity, pollution, ESG).

You must classify the topic into ONE of the following four frames:

1. Economic Costs/Benefits Frame
   - Costs/benefits of climate policy
   - Subsidies, tax credits, discrimination complaints
   - Energy prices, fuel costs, employment effects
   - Carbon pricing, ETS, industrial relocation

2. Technological Transition / Industrial Competition Frame
   - Energy/industrial transition; new technologies
   - EV/battery supply chains and rules of origin
   - Renewable expansion, hydrogen, CCS, smart grid
   - Infrastructure, siting strategies, green investments

3. Political Imbalance / Institutions & Geopolitics Frame
   - Diplomacy, trade conflict, governance issues
   - U.S.–Korea/Japan–Korea disputes, norms, protectionism
   - Domestic political conflict, legislative battles
   - International negotiations (COP, Loss & Damage)

4. Climate Crisis Response & Justice Frame
   - Carbon neutrality/transition, mitigation and adaptation
   - Citizen/youth action, justice, corporate leadership
   - Scientific evidence, physical impacts, disasters
   - Lifestyle, behavior, community transition

Task:
Based on the keywords, produce:
1) The most relevant frame (choose exactly one from the four above)
2) A concise topic label (≤ 6 words), specific and concrete to the theme

Constraints:
- Output format MUST be:
   Frame: <one of the four frame names>
   Label: <your concise topic label>
- No explanations
- No quotation marks or punctuation at the end
- The label must NOT be generic (e.g., “Climate Policy”, “Environmental Issues”)
"""

def label_one_file(csv_path: str) -> pd.DataFrame:
    print(f"\n[FILE] {os.path.basename(csv_path)} 로드 중...")
    df = pd.read_csv(csv_path, encoding="utf-8-sig")

    # 필수 컬럼 확인
    required_cols = ["Topic", "Count", "Representation"]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(f"필수 컬럼 누락: {missing} | 사용 가능한 컬럼: {df.columns.tolist()}")

    rows = []
    print("Ollama로 라벨 생성 시작...")
    for idx, row in df.iterrows():
        keywords = str(row["Representation"])
        prompt = build_prompt(keywords)
        raw_resp = ""
        try:
            response = ollama.chat(
                model=MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You generate precise topic labels from keyword lists."},
                    {"role": "user", "content": prompt}
                ],
                options=OLLAMA_OPTIONS
            )
            raw_resp = response["message"]["content"].strip()
        except Exception as e:
            print(f"  ! {idx+1}행 오류: {e}")
            raw_resp = ""

        best_frame, label = parse_frame_and_label(raw_resp)

        rows.append({
            "Topic": row["Topic"],
            "Count": row["Count"],
            "Representation": row["Representation"],
            "best_frame": best_frame,
            "label": label
        })

        if best_frame or label:
            print(f"  - {idx+1}행 완료: [{best_frame}] {label}")

    return pd.DataFrame(rows)

def main():
    files = sorted(glob.glob(os.path.join(FOLDER, PATTERN)))
    if not files:
        raise FileNotFoundError(f"No files matching '{PATTERN}' in: {FOLDER}")

    all_results = []
    print(f"[INFO] 발견된 topic_summary 파일 수: {len(files)}")

    for fp in files:
        try:
            out_df = label_one_file(fp)
            stem = os.path.splitext(os.path.basename(fp))[0]
            out_path = os.path.join(FOLDER, f"{stem}_ollama.csv")
            out_df.to_csv(out_path, index=False, encoding="utf-8-sig")
            print(f"[SAVED] {out_path}")

            out_df.insert(0, "source_file", os.path.basename(fp))
            all_results.append(out_df)
        except Exception as e:
            print(f"[ERROR] {os.path.basename(fp)} 실패: {e}")

    if all_results:
        combined = pd.concat(all_results, ignore_index=True)
        combined_path = os.path.join(FOLDER, "__all_topic_labels_ollama.csv")
        combined.to_csv(combined_path, index=False, encoding="utf-8-sig")
        print(f"\n[SUMMARY SAVED] {combined_path}")

if __name__ == "__main__":
    main()

[INFO] 발견된 topic_summary 파일 수: 6

[FILE] 조선일보_2022년도 데이터_translated_gpt_topic_summary.csv 로드 중...
Ollama로 라벨 생성 시작...
  - 1행 완료: [Climate Crisis Response & Justice Frame] Carbon Neutrality and Crop Resilience
  - 2행 완료: [Technological Transition / Industrial Competition Frame] EV Battery Industry Developments
  - 3행 완료: [Climate Crisis Response & Justice Frame] Extreme Weather Events Response
  - 4행 완료: [Political Imbalance / Institutions & Geopolitics Frame] Indo-Pacific Diplomatic Relations
  - 5행 완료: [Technological Transition / Industrial Competition Frame] Nuclear Energy Technology Development
[SAVED] E:/Data_for_Practice/JapMedia/data/kor_data/조선일보_2022년도 데이터_translated_gpt_topic_summary_ollama.csv

[FILE] 조선일보_2023년도 데이터_translated_gpt_topic_summary.csv 로드 중...
Ollama로 라벨 생성 시작...
  - 1행 완료: [Economic Costs/Benefits Frame] Carbon Pricing and Energy Costs
  - 2행 완료: [Climate Crisis Response & Justice Frame] Extreme Weather Event Response
  - 3행 완료: [Political Imbalance / Instituti

### 일본

In [3]:
import os
import glob
import ollama
import pandas as pd

FOLDER = r"E:/Data_for_Practice/JapMedia/data/jap_data/translated/" 
PATTERN = "*topic_summary.csv"  

MODEL_NAME = "llama4"  # "llama3:8b" 
OLLAMA_OPTIONS = {
    "temperature": 1.15,
    "num_predict": 50
}

def build_prompt(keywords: str) -> str:
    return f"""
Keywords: {keywords}

Context:
The keywords come from topic modeling on ENGLISH-TRANSLATED articles from KOREAN news outlets about climate and environmental issues (climate policy, decarbonization, energy transition, biodiversity, pollution, ESG).

You must classify the topic into ONE of the following four frames:

1. Economic Costs/Benefits Frame
   - Costs/benefits of climate policy
   - Subsidies, tax credits, discrimination complaints
   - Energy prices, fuel costs, employment effects
   - Carbon pricing, ETS, industrial relocation

2. Technological Transition / Industrial Competition Frame
   - Energy/industrial transition; new technologies
   - EV/battery supply chains and rules of origin
   - Renewable expansion, hydrogen, CCS, smart grid
   - Infrastructure, siting strategies, green investments

3. Political Imbalance / Institutions & Geopolitics Frame
   - Diplomacy, trade conflict, governance issues
   - U.S.–Korea/Japan–Korea disputes, norms, protectionism
   - Domestic political conflict, legislative battles
   - International negotiations (COP, Loss & Damage)

4. Climate Crisis Response & Justice Frame
   - Carbon neutrality/transition, mitigation and adaptation
   - Citizen/youth action, justice, corporate leadership
   - Scientific evidence, physical impacts, disasters
   - Lifestyle, behavior, community transition

Task:
Based on the keywords, produce:
1) The most relevant frame (choose exactly one from the four above)
2) A concise topic label (≤ 6 words), specific and concrete to the theme

Constraints:
- Output format MUST be:
   Frame: <one of the four frame names>
   Label: <your concise topic label>
- No explanations
- No quotation marks or punctuation at the end
- The label must NOT be generic (e.g., “Climate Policy”, “Environmental Issues”)
"""

def parse_frame_and_label(raw: str):
    """Ollama 응답에서 Frame / Label 두 줄을 파싱해서 반환."""
    frame = ""
    label = ""
    if not raw:
        return frame, label

    for line in raw.splitlines():
        line = line.strip()
        lower = line.lower()
        if lower.startswith("frame:"):
            frame = line.split(":", 1)[1].strip()
        elif lower.startswith("label:"):
            label = line.split(":", 1)[1].strip()
    return frame, label

def label_one_file(csv_path: str) -> pd.DataFrame:
    print(f"\n[FILE] {os.path.basename(csv_path)} 로드 중...")
    df = pd.read_csv(csv_path, encoding="utf-8-sig")

    # 필수 컬럼 확인
    required_cols = ["Topic", "Count", "Representation"]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(f"필수 컬럼 누락: {missing} | 사용 가능한 컬럼: {df.columns.tolist()}")

    rows = []
    print("Ollama로 라벨 생성 시작...")
    for idx, row in df.iterrows():
        keywords = str(row["Representation"])
        prompt = build_prompt(keywords)
        raw_resp = ""
        try:
            response = ollama.chat(
                model=MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You generate precise topic labels from keyword lists."},
                    {"role": "user", "content": prompt}
                ],
                options=OLLAMA_OPTIONS
            )
            raw_resp = response["message"]["content"].strip()
        except Exception as e:
            print(f"  ! {idx+1}행 오류: {e}")
            raw_resp = ""

        best_frame, label = parse_frame_and_label(raw_resp)

        rows.append({
            "Topic": row["Topic"],
            "Count": row["Count"],
            "Representation": row["Representation"],
            "best_frame": best_frame,
            "label": label
        })

        if best_frame or label:
            print(f"  - {idx+1}행 완료: [{best_frame}] {label}")

    return pd.DataFrame(rows)

def main():
    files = sorted(glob.glob(os.path.join(FOLDER, PATTERN)))
    if not files:
        raise FileNotFoundError(f"No files matching '{PATTERN}' in: {FOLDER}")

    all_results = []
    print(f"[INFO] 발견된 topic_summary 파일 수: {len(files)}")

    for fp in files:
        try:
            out_df = label_one_file(fp)
            stem = os.path.splitext(os.path.basename(fp))[0]
            out_path = os.path.join(FOLDER, f"{stem}_ollama.csv")
            out_df.to_csv(out_path, index=False, encoding="utf-8-sig")
            print(f"[SAVED] {out_path}")

            out_df.insert(0, "source_file", os.path.basename(fp))
            all_results.append(out_df)
        except Exception as e:
            print(f"[ERROR] {os.path.basename(fp)} 실패: {e}")

    if all_results:
        combined = pd.concat(all_results, ignore_index=True)
        combined_path = os.path.join(FOLDER, "__all_topic_labels_ollama.csv")
        combined.to_csv(combined_path, index=False, encoding="utf-8-sig")
        print(f"\n[SUMMARY SAVED] {combined_path}")

if __name__ == "__main__":
    main()


[INFO] 발견된 topic_summary 파일 수: 6

[FILE] (영문번역 추가)_아사히신문_2022년도 데이터_topic_summary.csv 로드 중...
Ollama로 라벨 생성 시작...
  - 1행 완료: [Climate Crisis Response & Justice Frame] Royal Tourist Alpine Skiing Impacts
  - 2행 완료: [Political Imbalance / Institutions & Geopolitics Frame] Taiwan China US Tensions Escalate
  - 3행 완료: [Climate Crisis Response & Justice Frame] Soil Methane Emission Reduction
[SAVED] E:/Data_for_Practice/JapMedia/data/jap_data/translated/(영문번역 추가)_아사히신문_2022년도 데이터_topic_summary_ollama.csv

[FILE] (영문번역 추가)_아사히신문_2023년도 데이터_topic_summary.csv 로드 중...
Ollama로 라벨 생성 시작...
  - 1행 완료: [Climate Crisis Response & Justice Frame] Flood Risk and Climate Impact
  - 2행 완료: [Political Imbalance / Institutions & Geopolitics Frame] US-China Climate Diplomacy Efforts
  - 3행 완료: [Political Imbalance / Institutions & Geopolitics Frame] Disarmament and Diplomacy Efforts
  - 4행 완료: [Climate Crisis Response & Justice Frame] Global Warming Temperature Rise
[SAVED] E:/Data_for_Practice/JapMedia/dat