# Proyek Capstone: AI-Powered Interview Assessment System
**Tim A25-CS358**

- **Muhammad Rayhan**, M262D5Y1357, sebagai PIC Model & Training (Streamlit/Interface)
- **Hafiz Putra Mahesta**, M262D5Y0714, sebagai PIC Integrasi,Model STT, & Fitur (Confidence Score)
- **Fahri Rasyidin**, M262D5Y0566, sebagai PIC Data & Evaluasi (Dataset, Kunci Jawaban, WER)

# Environment Setup & Dependencies

In [None]:
!pip install git+https://github.com/openai/whisper.git
!pip install transformers accelerate
!pip install jiwer moviepy librosa soundfile

In [None]:
import os
import json
import time
import shutil
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import moviepy.editor as mp
import librosa
import soundfile as sf
import torch
import whisper
import jiwer
from datetime import datetime
from tqdm.notebook import tqdm
from transformers import pipeline
from google.colab import drive

In [None]:
drive.mount('/content/drive')

# System Configuration & Model Initialization

In [None]:
BASE_DIR = "/content/drive/MyDrive/Dataset"
VIDEO_INPUT_DIR = os.path.join(BASE_DIR, "Video")
AUDIO_OUTPUT_DIR = os.path.join(BASE_DIR, "Audio")
GROUND_TRUTH_FILE = os.path.join(BASE_DIR, "Transkrip_Manual")

# Cek Folder Video
if os.path.exists(VIDEO_INPUT_DIR):
    video_files = [f for f in os.listdir(VIDEO_INPUT_DIR) if f.lower().endswith(('.mp4', '.webm', '.avi', '.mov', '.mkv'))]
    print(f"[INFO] Folder Video ditemukan.")
    print(f"[INFO] Jumlah video yang siap diproses: {len(video_files)} file")
else:
    print(f" Folder Video TIDAK ditemukan di: {VIDEO_INPUT_DIR}")

if os.path.exists(GROUND_TRUTH_FILE):
    print(f"File Transkrip Manual (Kunci Jawaban) ditemukan.")
else:
    print(f"File Transkrip Manual tidak ditemukan di: {GROUND_TRUTH_FILE}")

try:
    # Opsi lain: 'tiny', 'small', 'medium' (semakin besar semakin lambat tapi akurat)
    model = whisper.load_model("small")
    print("Model Whisper berhasil dimuat ke dalam sistem.")
except Exception as e:
    print(f" Gagal memuat model. Detail error: {e}")

## Core Utility Functions

In [None]:
def convert_video_to_audio(video_path, audio_path):
    try:
        video_clip = mp.VideoFileClip(video_path)
        video_clip.audio.write_audiofile(audio_path, codec='pcm_s16le', verbose=False, logger=None)
        video_clip.close()
        return True
    except Exception:
        return False

def transcribe_audio(audio_path):
    try:
        # Menentukan konteks spesifik domain untuk meningkatkan akurasi istilah teknis.
        technical_prompt = (
            "Transcribe strictly in English. Context: Machine Learning interview. "
            "Keywords: TensorFlow, Scikit-learn, CNN, Dropout, Overfitting, Transfer Learning. "
            "Do not include filler words like umm, uh, ah."
        )

        result = model.transcribe(
            audio_path,
            fp16=False,
            language="en",
            initial_prompt=technical_prompt
        )
        return result["text"].strip()
    except Exception as e:
        print(f" Transkripsi Error: {e}")
        return ""

def remove_fillers(text):
    # Daftar kata filler yang mau dihapus (bisa ditambah)
    fillers = [
        r"\bum\b", r"\buh\b", r"\buhh\b", r"\bah\b", r"\ber\b", r"\bhmm\b",
        r"\bmhm\b", r"\buh-huh\b", r"\bokay\b",
        r"\byou know\b", r"\bi mean\b", r"\bkind of\b", r"\bsort of\b",
        r"\bso\b", r"\blike\b", r"\byeah\b", r"\bright\b",
    ]

    clean_text = text.lower()
    for filler in fillers:
        clean_text = re.sub(filler, "", clean_text)

    # Hapus spasi ganda sisa penghapusan
    clean_text = re.sub(r'\s+', ' ', clean_text).strip()
    return clean_text

def calculate_metrics(reference_text, hypothesis_text):
    if not reference_text or not hypothesis_text:
        return {"wer": 1.0, "accuracy": 0.0}

    # Bersihkan tanda baca dasar
    transformation = jiwer.Compose([
        jiwer.ToLowerCase(),
        jiwer.RemovePunctuation(),
        jiwer.RemoveMultipleSpaces(),
        jiwer.Strip(),
    ])

    ref_basic = transformation(reference_text)
    hyp_basic = transformation(hypothesis_text)

    # Hapus filler words
    ref_clean = remove_fillers(ref_basic)
    hyp_clean = remove_fillers(hyp_basic)

    # Hitung Akurasi
    wer_score = jiwer.wer(ref_clean, hyp_clean)
    accuracy = max(0, 1 - wer_score) * 100

    return {"wer": wer_score, "accuracy": round(accuracy, 2)}

## Data Processing Pipeline (ETL)

In [None]:
TRANSCRIPT_DIR = os.path.join(BASE_DIR, "Transkrip_Manual")

# Ekstraksi Audio (Video -> Audio)
video_files = [f for f in os.listdir(VIDEO_INPUT_DIR) if f.lower().endswith(('.mp4', '.avi', '.webm'))]
print(f"Memulai pemrosesan untuk {len(video_files)} file video...")

for video in tqdm(video_files, desc="Converting Videos"):
    v_path = os.path.join(VIDEO_INPUT_DIR, video)
    a_path = os.path.join(AUDIO_OUTPUT_DIR, os.path.splitext(video)[0] + ".wav")

    if not os.path.exists(a_path):
        convert_video_to_audio(v_path, a_path)

# Mengumpulkan semua file audio dari hasil konversi maupun yang sudah ada di folder
all_audio_files = []

for f in os.listdir(AUDIO_OUTPUT_DIR):
    if f.lower().endswith('.wav'):
        all_audio_files.append(os.path.join(AUDIO_OUTPUT_DIR, f))

# Menghapus duplikasi path jika ada
all_audio_files = list(set(all_audio_files))
print(f"Total file audio yang siap diproses: {len(all_audio_files)} file")

# Proses Transkripsi & Evaluasi
processing_results = []
total_accuracy = 0
count_evaluated = 0

for audio_path in tqdm(all_audio_files, desc="AI Transcribing"):
    filename = os.path.basename(audio_path)
    base_name = os.path.splitext(filename)[0]

    pred_text = transcribe_audio(audio_path)

    metrics = {"accuracy": 0.0}
    truth_text = "N/A"

    txt_path = os.path.join(TRANSCRIPT_DIR, base_name + ".txt")

    if os.path.exists(txt_path):
        try:
            with open(txt_path, 'r', encoding='utf-8') as f:
                truth_text = f.read().strip()
            # Hitung akurasi
            metrics = calculate_metrics(truth_text, pred_text)
            total_accuracy += metrics["accuracy"]
            count_evaluated += 1
        except: pass

    # Simpan hasil ke list
    processing_results.append({
        "filename": filename,
        "prediction": pred_text,
        "ground_truth": truth_text[:100],
        "accuracy": metrics["accuracy"]
    })

# Laporan Ringkasan
print("-" * 50)
df_results = pd.DataFrame(processing_results)

if count_evaluated > 0:
    avg_accuracy = total_accuracy / count_evaluated
else:
    avg_accuracy = 0.0

# Tampilkan statistik akhir
print(f"Total Data Diproses       : {len(processing_results)}")
print(f"Data dengan Kunci Jawaban : {count_evaluated}")
print(f"Rata-rata Akurasi         : {avg_accuracy:.2f}%")

# Validasi terhadap target keberhasilan proyek
if avg_accuracy >= 90:
    print("STATUS: TERCAPAI")
else:
    print("STATUS: BELUM TERCAPAI")

df_results[["filename", "accuracy"]]

# AI-Powered Assessment Engine (LLM Reasoning) dan Final Reporting & Export

In [None]:
# Konfigurasi Model Penilai (LLM)
# Menggunakan Google Flan-T5 Large untuk memberikan skor dan alasan penilaian
device = 0 if torch.cuda.is_available() else -1
assessor_llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    device=device,
    max_length=512
)
print(f"Model LLM berhasil dimuat pada device: {device}")

# Basis Pengetahuan Pertanyaan
# Digunakan untuk memberikan konteks kepada AI saat menilai jawaban
QUESTION_DB = {
    1: "Share specific challenges you faced in certification and how you overcame them.",
    2: "Describe your experience with transfer learning in TensorFlow.",
    3: "Describe a complex TensorFlow model you built and how you ensured accuracy.",
    4: "Explain how to implement dropout and its effect on training.",
    5: "Describe the process of building a CNN for image classification.",
    6: "Tell us about yourself and why you want to become an AI Engineer.",
    7: "What AI/ML tools or skills have you learned so far?",
    8: "Explain what Machine Learning is in simple words.",
    9: "How do you solve problems when your model does not work well?",
    10: "What personal qualities do you have to support you as an AI Engineer?",
    11: "Why did you choose architecture as your field?",
    12: "What architectural software are you familiar with?",
    13: "How do you approach designing a building or space?",
    14: "How do you handle design criticism or multiple revisions?",
    15: "Why should we select you for this position?",
    16: "Walk me through your resume and tell me about yourself.",
    17: "Tell me about the most challenging technical problem you've faced.",
    18: "Why are you specifically interested in our company (Traveloka)?",
    19: "Tell me about a time you had a significant disagreement with a colleague.",
    20: "Explain Transfer Learning to a non-technical person."
}

# Logika Penilaian Utama
def assess_with_llm(answer_text, question_id):
    # Validasi jawaban yang terlalu pendek
    if not answer_text or len(answer_text) < 10:
        return 0, "Candidate did not provide a meaningful answer."

    question_text = QUESTION_DB.get(question_id, "General interview question")

    # Instruksi (Prompt) untuk LLM agar bertindak sebagai Recruiter
    prompt = f"""
    You are a Senior Technical Recruiter. Evaluate this interview answer.

    Question: "{question_text}"
    Candidate Answer: "{answer_text}"

    Task:
    1. Give a Score (1-4) based on depth, clarity, and technical correctness.
    2. Write a short Reason (max 1 sentence).

    Rubric:
    1: Poor/Irrelevant.
    2: Basic/General.
    3: Good/Specific.
    4: Excellent/Detailed.

    Output Format: Score | Reason
    """

    try:
        # Menjalankan inferensi model
        output = assessor_llm(prompt, max_length=128, do_sample=False)[0]['generated_text']

        # Memisahkan skor dan alasan dari teks output model
        if "|" in output:
            parts = output.split("|", 1)
            score_str = re.search(r'\d+', parts[0])
            score = int(score_str.group()) if score_str else 2
            reason = parts[1].strip()
        else:
            # Nilai default jika format output tidak sesuai
            score = 3
            reason = output

        return max(1, min(4, score)), reason

    except Exception as e:
        print(f"Gagal menilai pertanyaan ID {question_id}: {e}")
        return 2, "AI could not process this specific answer."

def extract_id_from_filename(filename):
    # Mengambil angka dari nama file untuk keperluan pengurutan
    numbers = re.findall(r'\d+', filename)
    return int(numbers[0]) if numbers else 999

# Pembuatan Laporan Akhir
def generate_final_report(results_data):
    # Struktur data JSON disesuaikan dengan kebutuhan sistem backend (payload)
    final_output = {
        "success": True,
        "data": {
            "id": 131,
            "candidate": {
                "name": "Hafiz Putra Mahesta",
                "email": "hafiz123@gmail.com",
                "photoUrl": "https://path/to/photo.png"
            },
            "certification": {
                "normalType": "DEV_CERTIFICATION_MACHINE_LEARNING",
                "status": "FINISHED"
            },
            "pastReviews": []
        }
    }

    # Objek untuk menyimpan hasil review saat ini
    current_review = {
        "assessorProfile": {
            "id": 47,
            "name": "XXX",
            "photoUrl": "XXX"
        },
        "decision": "PENDING",
        "reviewedAt": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "scoresOverview": {"project": 100, "interview": 0, "total": 0},
        "reviewChecklistResult": {
            "project": [],
            "interviews": {"minScore": 0, "maxScore": 4, "scores": []}
        },
        "notes": "Automated assessment utilizing OpenAI Whisper for transcription and Google Flan-T5 for reasoning."
    }

    total_score = 0
    count = 0

    # Mengurutkan data berdasarkan ID pertanyaan
    sorted_results = sorted(results_data, key=lambda x: extract_id_from_filename(x['filename']))

    print(f"Memulai penilaian otomatis untuk {len(sorted_results)} jawaban...")

    for item in tqdm(sorted_results, desc="Penilaian AI"):
        filename = item['filename']
        q_id = extract_id_from_filename(filename)

        # Proses penilaian menggunakan LLM
        score, reason = assess_with_llm(item['prediction'], q_id)

        checklist_item = {
            "id": q_id,
            "score": score,
            "reason": reason,
            "transcript_preview": item['prediction']
        }

        current_review["reviewChecklistResult"]["interviews"]["scores"].append(checklist_item)

        if 1 <= q_id <= 20:
            total_score += score
            count += 1

    # Menghitung skor akhir interview
    if count > 0:
        interview_score = (total_score / (count * 4)) * 100
    else:
        interview_score = 0

    current_review["scoresOverview"]["interview"] = round(interview_score, 2)

    # Menghitung skor total gabungan
    project_score = 100
    total_final = (project_score + interview_score) / 2
    current_review["scoresOverview"]["total"] = round(total_final, 2)

    # Menentukan keputusan akhir
    current_review["decision"] = "PASSED" if total_final >= 75 else "NEED REVISION"

    # Menggabungkan hasil review ke struktur utama
    final_output["data"]["pastReviews"].append(current_review)

    return final_output

# Eksekusi dan Ekspor Data
# Memastikan variabel hasil transkripsi tersedia sebelum melanjutkan
if 'processing_results' in locals() and processing_results:
    json_report = generate_final_report(processing_results)

    # Menyimpan hasil ke file JSON
    output_path = os.path.join(BASE_DIR, "final_assessment_result.json")
    with open(output_path, "w") as f:
        json.dump(json_report, f, indent=2)

    print(f"Laporan berhasil dibuat dan disimpan di: {output_path}")
else:
    print("Data transkripsi tidak ditemukan. Pastikan proses ETL sudah dijalankan.")