In [1]:
import os
import pandas as pd
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [2]:
# Load model
llm = Ollama(model="gemma2")

  llm = Ollama(model="gemma2")


In [3]:
# Template prompting
prompt = PromptTemplate(
    input_variables=["transcript"],
    template = """
        Berikan ringkasan dari transkrip percakapan berikut. Ringkasan harus mencakup informasi penting dan disusun dalam format sebagai berikut:

        1. **Poin-Poin Penting:**
        2. **Ringkasan Utama:**
        3. **Entitas yang Disebutkan (Jika ada):**
        4. **Langkah Berikutnya Berdasarkan Bahasan (Jika ada): **

        Transkrip:
        \"\"\"{transcript}\"\"\"

        Gunakan bahasa Indonesia yang ringkas dan jelas.
    """
)

In [4]:
# langchain
chain = LLMChain(llm=llm, prompt=prompt)

  chain = LLMChain(llm=llm, prompt=prompt)


In [5]:
# Path file CSV n output folder
input_csv = r"experiment_results_with_confidence.csv"
output_folder = os.path.join(os.path.dirname(input_csv), "Ringkasan")
os.makedirs(output_folder, exist_ok=True)


In [6]:
df = pd.read_csv(input_csv)
# Filter baris dengan Scenario == 'ideal'
ideal_rows = df[df["SPL"].str.strip().str.lower() == "ideal"]

In [7]:
# ==== Ringkasan untuk transkrip referensi ground truth ====
groundtruth_path = os.path.join("Ref", "groundTruth.txt")
if os.path.exists(groundtruth_path):
    with open(groundtruth_path, "r", encoding="utf-8") as f:
        ground_truth_transcript = f.read().strip()

    if ground_truth_transcript:
        print("\n📄 Memproses transkrip ground truth...")
        try:
            ground_summary = chain.run(transcript=ground_truth_transcript)
            output_path = os.path.join(output_folder, "ringkasan_groundtruth.txt")

            with open(output_path, "w", encoding="utf-8") as f:
                f.write(ground_summary)

            print("Ringkasan ground truth disimpan: ringkasan_groundtruth.txt")
        except Exception as e:
            print(f"Gagal merangkum ground truth: {e}")
    else:
        print("File ground truth kosong.")
else:
    print("File groundTruth.txt tidak ditemukan.")



📄 Memproses transkrip ground truth...


  ground_summary = chain.run(transcript=ground_truth_transcript)


Ringkasan ground truth disimpan: ringkasan_groundtruth.txt


In [8]:
for idx, row in df.iterrows():
    filename = str(row["Filename"]).strip()
    
    # Untuk ideal.wav, ambil transkrip dari kolom 'Scenario'
    if filename == "ideal.wav":
        transcript = str(row["Scenario"]).strip()
        spl_value = "ideal"
    else:
        transcript = str(row["Transcription"]).strip()
        spl_value = str(row["SPL"]).strip().lower()

    # Proses hanya jika transkrip tidak kosong
    if pd.notna(transcript) and transcript:
        print(f"\n📄 Memproses baris {idx} - {filename} | SPL: {spl_value}")
        try:
            # Jalankan chain (misalnya LangChain atau LLM lainnya)
            summary = chain.run(transcript=transcript)

            # Simpan hasil ringkasan
            output_filename = f"ringkasan_{filename.replace('.wav', '')}_{idx}.txt"
            output_path = os.path.join(output_folder, output_filename)

            with open(output_path, "w", encoding="utf-8") as f:
                f.write(summary)

            print(f"Ringkasan disimpan: {output_filename}")
        except Exception as e:
            print(f"Gagal memproses baris ke-{idx}: {e}")
    else:
        print(f"Transkrip kosong pada baris {idx}, dilewati.")


📄 Memproses baris 0 - ideal.wav | SPL: ideal
Ringkasan disimpan: ringkasan_ideal_0.txt

📄 Memproses baris 1 - MaleVoice1_whisper_-55.0dBFS.wav | SPL: 30
Ringkasan disimpan: ringkasan_MaleVoice1_whisper_-55.0dBFS_1.txt

📄 Memproses baris 2 - MaleVoice1_whisper_-50.0dBFS.wav | SPL: 35
Ringkasan disimpan: ringkasan_MaleVoice1_whisper_-50.0dBFS_2.txt

📄 Memproses baris 3 - MaleVoice1_whisper_-45.0dBFS.wav | SPL: 40
Ringkasan disimpan: ringkasan_MaleVoice1_whisper_-45.0dBFS_3.txt

📄 Memproses baris 4 - MaleVoice1_whisper_-40.0dBFS.wav | SPL: 45
Ringkasan disimpan: ringkasan_MaleVoice1_whisper_-40.0dBFS_4.txt

📄 Memproses baris 5 - MaleVoice1_equal.wav | SPL: 45
Ringkasan disimpan: ringkasan_MaleVoice1_equal_5.txt

📄 Memproses baris 6 - MaleVoice1_overpower_-14dBFS.wav | SPL: 45
Ringkasan disimpan: ringkasan_MaleVoice1_overpower_-14dBFS_6.txt

📄 Memproses baris 7 - ideal.wav | SPL: ideal
Ringkasan disimpan: ringkasan_ideal_7.txt

📄 Memproses baris 8 - MaleVoice2_whisper_-55.0dBFS.wav | SPL: