In [1]:
import pandas as pd
import os

def merge_descriptions(csv_path, reasoning_folder, output_csv_path, output_folder_combined):
    # Baca file CSV utama
    df = pd.read_csv(csv_path)

    # Pastikan kolom 'id' dan 'description' ada
    if 'id' not in df.columns or 'description' not in df.columns:
        raise ValueError("CSV harus memiliki kolom 'id' dan 'description'.")

    # Buat folder output jika belum ada
    os.makedirs(output_folder_combined, exist_ok=True)

    # List untuk menyimpan deskripsi gabungan
    combined_descriptions = []

    for _, row in df.iterrows():
        id_ = row['id']
        desc_a = str(row['description']).strip()

        # Path ke reasoning file (misalnya: reasoning_folder/123.txt)
        reasoning_path = os.path.join(reasoning_folder, f"{id_}.txt")

        if os.path.exists(reasoning_path):
            with open(reasoning_path, 'r', encoding='utf-8') as f:
                desc_b = f.read().strip()
        else:
            desc_b = ""

        # Gabungkan description A dan B
        combined = desc_a
        if desc_b:
            combined += " " + desc_b

        # Simpan ke folder output sebagai file .txt
        output_path = os.path.join(output_folder_combined, f"{id_}.txt")
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(combined)

        # Tambahkan ke list
        combined_descriptions.append(combined)

    # Tambahkan kolom baru ke DataFrame
    df['description_gabungan'] = combined_descriptions

    # Simpan ke CSV output
    df.to_csv(output_csv_path, index=False, encoding='utf-8')

    print(f"Sukses! File CSV gabungan disimpan di: {output_csv_path}")
    print(f"Deskripsi gabungan disimpan di folder: {output_folder_combined}")

In [3]:
merge_descriptions(
    csv_path='BPS Dataset Version 2/bps_descriptions.csv',
    reasoning_folder='BPS Dataset Version 2/descriptions',
    output_csv_path='BPS Dataset Version 2/combined_descriptions.csv',
    output_folder_combined='BPS Dataset Version 2/combined_descriptions'
)


Sukses! File CSV gabungan disimpan di: BPS Dataset Version 2/combined_descriptions.csv
Deskripsi gabungan disimpan di folder: BPS Dataset Version 2/combined_descriptions
