# FastAPI upload server (payload_video.ipynb)

Notebook ini menyediakan server FastAPI yang menerima upload video (multipart) di `/upload` dan menerima JSON payload di `/upload`.

Langkah eksekusi:
1. Jalankan cell instalasi dependensi
2. Jalankan cell setup direktori
3. Jalankan cell definisi server
4. Jalankan cell start server (ngrok akan dicoba jika tersedia)

Hasil: file yang diupload akan disimpan di folder `uploads/` dan payload JSON yang dikirim ke `/upload` akan disimpan di `received_payloads/`. Video akan diproses dengan Whisper untuk speech-to-text.

In [141]:
#kalo pake colab jangan lupa install dulu di terminal
#!pip install --quiet numpy==1.26.4
#!pip install --quiet --upgrade torch torchaudio faster-whisper


#kalo lokal download ffmpeg nya
#https://github.com/GyanD/codexffmpeg/releases/download/2025-11-27-git-61b034a47c/ffmpeg-2025-11-27-git-61b034a47c-full_build.zip
#simpen di c

In [142]:
import os
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"

# ============================================================================
# üîß CELL 1: INSTALL SAFE DEPENDENCIES (FIXED - NO CONFLICTS!)
# ============================================================================

# ‚úÖ TIER 0: JUPYTER WIDGETS (fixes tqdm warning)
!pip install --quiet ipywidgets jupyter
# ‚úÖ TIER 1: AMAN (Tidak touch numpy)
!pip install --quiet fastapi uvicorn nest-asyncio pyngrok python-multipart
!pip install --quiet tqdm
!pip install --quiet imageio-ffmpeg
!pip install --quiet deepl

# ‚úÖ TIER 2: AMAN (Pure torch-based, no numpy dependency)
#!pip install --quiet torch torchaudio
!pip install --quiet silero-vad

# ‚úÖ TIER 3: AMAN (Minimal numpy, tidak upgrade)
!pip install --quiet pydub
!pip install --quiet soundfile
!pip install --quiet scipy
!pip install --quiet scikit-learn

# ‚úÖ TIER 4: AMAN (Cloud-based, no local deps)
#!pip install --quiet faster-whisper
!pip install --quiet huggingface-hub

# ‚úÖ TIER 5: MEDIAPIPE (sudah include opencv internally!)
!pip install --quiet mediapipe
# ‚úÖ TIER 6: TORCHCODEC (video codec support)
!pip install --quiet torchcodec
!pip install --quiet librosa

print('\n‚úÖ All safe packages installed')
print('   ‚úÖ No numpy version conflicts')
print('   ‚úÖ Jupyter widgets installed (fixes tqdm warning)')
print('   ‚úÖ FFmpeg required for audio - verify with next cell')


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new 


‚úÖ All safe packages installed
   ‚úÖ No numpy version conflicts
   ‚úÖ FFmpeg required for audio - verify with next cell



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


<b><h2> Import Library

In [143]:
# ==========================
# Standard Library
# ==========================
import asyncio
import gc
import getpass
import hashlib
import json
import json as json_module
import os
import random
import re
import shutil
import subprocess
import sys
import tempfile
import threading
import threading as th
import time
import traceback
import uuid
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone
from typing import List
from urllib.parse import urlparse
import urllib.request
import torch
import torchaudio
from silero_vad import load_silero_vad
import numpy as np
from pydub import AudioSegment
from pydub.silence import detect_nonsilent

# ==========================
# Third-Party Libraries
# ==========================
import deepl
import nest_asyncio
import torch
import uvicorn
from faster_whisper import WhisperModel
from huggingface_hub import InferenceClient
from pyngrok import conf, ngrok
from tqdm import tqdm
import cv2
import mediapipe as mp

# ==========================
# FastAPI & Middleware
# ==========================
from fastapi import (
    BackgroundTasks,
    FastAPI,
    File,
    Form,
    HTTPException,
    Request,
    UploadFile
)
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles

<b><h2> Siapkan direktori untuk upload dan transcription

In [144]:
# Siapkan direktori untuk upload dan transcription
ROOT_DIR = os.getcwd()
UPLOAD_DIR = os.path.join(ROOT_DIR, 'uploads')
TRANSCRIPTION_DIR = os.path.join(ROOT_DIR, 'transcriptions')
AUDIO_DIR = os.path.join(ROOT_DIR, 'audio')
RESULTS_DIR = os.path.join(ROOT_DIR, 'results')  # NEW: hasil assessment
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(TRANSCRIPTION_DIR, exist_ok=True)
os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

print('üìÅ Directories:')
print(f'   Upload: {UPLOAD_DIR}')
print(f'   Transcription: {TRANSCRIPTION_DIR}')
print(f'   AUDIO: {AUDIO_DIR}')
print(f'   Results: {RESULTS_DIR}')

# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"

print(f'\nüéØ Device Configuration:')
print(f'   Device: {device.upper()}')
print(f'   Compute Type: {compute_type}')
if device == "cuda":
    print(f'   GPU: {torch.cuda.get_device_name(0)}')
else:
    print('   Note: Using CPU (GPU recommended for faster processing)')

# DeepL Configuration
DEEPL_API_KEY = "02a88edf-4fcb-4786-ba3d-a137fb143760:fx"

print('\nüåê Translation Configuration:')
print(f'   DeepL API: {"Configured" if DEEPL_API_KEY != "YOUR_DEEPL_API_KEY_HERE" else "‚ö†Ô∏è  NOT CONFIGURED - Set DEEPL_API_KEY"}')

üìÅ Directories:
   Upload: d:\Interview_Assesment_System-ngrok-raifal\uploads
   Transcription: d:\Interview_Assesment_System-ngrok-raifal\transcriptions
   AUDIO: d:\Interview_Assesment_System-ngrok-raifal\audio
   Results: d:\Interview_Assesment_System-ngrok-raifal\results

üéØ Device Configuration:
   Device: CPU
   Compute Type: int8
   Note: Using CPU (GPU recommended for faster processing)

üåê Translation Configuration:
   DeepL API: Configured


In [145]:
app = FastAPI(title='AI Interview Assessment System')

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
    expose_headers=['*'],
    max_age=3600,
)

# Mount static folders
app.mount('/uploads', StaticFiles(directory=UPLOAD_DIR), name='uploads')
app.mount('/transcriptions', StaticFiles(directory=TRANSCRIPTION_DIR), name='transcriptions')
app.mount('/results', StaticFiles(directory=RESULTS_DIR), name='results')

In [146]:
# Background processing
executor = ThreadPoolExecutor(max_workers=2)
processing_status = {}
processing_lock = th.Lock()

# HELPER FUNCTIONS - ONLY ONE INSTANCE EACH

def get_local_file_path(url):
    """Extract local file path from URL if it's a local upload"""
    try:
        parsed = urlparse(url)
        if '/uploads/' in parsed.path:
            filename = parsed.path.split('/uploads/')[-1]
            local_path = os.path.join(UPLOAD_DIR, filename)
            if os.path.exists(local_path):
                return local_path
    except Exception as e:
        print(f'Error parsing URL: {e}')
    return None

<b><h2> **Initialize** Whisper Model

In [147]:
# Load faster-whisper model with BEST ACCURACY settings
print('\nüì• Loading Whisper model...')
print('‚ÑπÔ∏è  Using faster-whisper "large-v3" model')
print('   This is the MOST ACCURATE model available')
print('   Speed: 4-5x faster than openai-whisper')
print('   Accuracy: ~98% for clear English speech')
print('   First run will download ~3GB model...\n')

# Detect device
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"

print(f'üéØ Configuration:')
print(f'   Device: {device.upper()}')
print(f'   Compute Type: {compute_type}')

# Load model with best accuracy settings
whisper_model = WhisperModel(
    "large-v3",
    device=device,
    compute_type=compute_type,
    cpu_threads=4,
    num_workers=1
)

print('‚úÖ Whisper model loaded successfully\n')


üì• Loading Whisper model...
‚ÑπÔ∏è  Using faster-whisper "large-v3" model
   This is the MOST ACCURATE model available
   Speed: 4-5x faster than openai-whisper
   Accuracy: ~98% for clear English speech
   First run will download ~3GB model...

üéØ Configuration:
   Device: CPU
   Compute Type: int8
‚úÖ Whisper model loaded successfully



<b><h2> Initialize DeepL translator

In [148]:
# Initialize DeepL translator
translator = None
if DEEPL_API_KEY and DEEPL_API_KEY != "YOUR_DEEPL_API_KEY_HERE":
    try:
        translator = deepl.Translator(DEEPL_API_KEY)
        print('‚úÖ DeepL translator initialized successfully\n')
    except Exception as e:
        print(f'‚ö†Ô∏è  DeepL initialization failed: {e}')
        print('   Translation to Indonesian will be skipped\n')
else:
    print('‚ö†Ô∏è  DeepL API key not configured')
    print('   Translation to Indonesian will be skipped\n')

‚úÖ DeepL translator initialized successfully



<b><h2> Fungsi Cheating Detector

In [149]:
def perform_speaker_diarization_silero(video_path):
    """
    Detect multiple speakers using Silero VAD (Voice Activity Detection)
    FIXED: Better algorithm to distinguish between natural pauses vs multiple speakers
    """
    try:
        print('   üé§ Performing speaker diarization (Silero VAD)...')
        # Load Silero VAD model
        try:
            model = load_silero_vad()
            print('   ‚îÇ ‚úÖ Silero VAD model loaded')
        except Exception as e:
            print(f'   ‚îÇ ‚ö†Ô∏è  Could not load Silero VAD: {str(e)[:50]}')
            return {
                'is_single_speaker': True,
                'speaker_count': 1,
                'duration': 0,
                'method': 'silero_vad_unavailable',
                'error': str(e)
            }

        # Try to load audio
        try:
            print('   ‚îÇ Attempting to load audio...')
            waveform, sample_rate = torchaudio.load(video_path)

            if waveform.shape[0] > 1:
                waveform = waveform.mean(dim=0, keepdim=True)

            if sample_rate != 16000:
                resampler = torchaudio.transforms.Resample(sample_rate, 16000)
                waveform = resampler(waveform)
                sample_rate = 16000

            print(f'   ‚îÇ ‚úÖ Audio loaded: {waveform.shape[0]} channels @ {sample_rate}Hz')

        except Exception as e:
            print(f'   ‚îÇ ‚ö†Ô∏è  torchaudio load failed: {str(e)[:50]}')

            # Fallback: Use pydub + ffmpeg
            try:
                print('   ‚îÇ Fallback: Using pydub to extract audio...')

                audio = AudioSegment.from_file(video_path)
                samples = np.array(audio.get_array_of_samples(), dtype=np.float32)

                if audio.channels == 2:
                    samples = samples.reshape((-1, 2))
                    samples = samples.mean(axis=1)

                samples = samples / 32768.0
                waveform = torch.from_numpy(samples).unsqueeze(0)
                sample_rate = audio.frame_rate

                if sample_rate != 16000:
                    resampler = torchaudio.transforms.Resample(sample_rate, 16000)
                    waveform = resampler(waveform)
                    sample_rate = 16000

                print(f'   ‚îÇ ‚úÖ Audio extracted via pydub: {waveform.shape[0]} channels @ {sample_rate}Hz')

            except Exception as e2:
                print(f'   ‚îÇ ‚ö†Ô∏è  All audio loading methods failed')
                return {
                    'is_single_speaker': True,
                    'speaker_count': 1,
                    'duration': 0,
                    'method': 'audio_loading_failed',
                    'error': f'{str(e)[:30]} | {str(e2)[:30]}'
                }

        duration_seconds = waveform.shape[1] / sample_rate
        print(f'   ‚îÇ ‚ÑπÔ∏è  Audio duration: {duration_seconds:.1f}s')

        # Apply Silero VAD
        print('   ‚îÇ Analyzing speech patterns...')

        CHUNK_SIZE = int(sample_rate * 0.032)  # 32ms chunks
        chunks = waveform.squeeze(0).split(CHUNK_SIZE)

        speech_segments = []  # List of (start_idx, end_idx) tuples
        current_speech_start = None

        for i, chunk in enumerate(chunks):
            if len(chunk) < CHUNK_SIZE:
                chunk = torch.nn.functional.pad(chunk, (0, CHUNK_SIZE - len(chunk)))

            try:
                speech_prob = model(chunk.unsqueeze(0), sample_rate)
                is_speech = speech_prob > 0.5

                if is_speech and current_speech_start is None:
                    # Start of speech segment
                    current_speech_start = i
                elif not is_speech and current_speech_start is not None:
                    # End of speech segment
                    speech_segments.append((current_speech_start, i))
                    current_speech_start = None
            except:
                pass

        # Close last segment if still open
        if current_speech_start is not None:
            speech_segments.append((current_speech_start, len(chunks)))

        print(f'   ‚îÇ ‚ÑπÔ∏è  Detected {len(speech_segments)} speech segments')

        # ‚úÖ FIXED: Better multiple speaker detection logic
        # Key indicators:
        # 1. Number of distinct speech segments (pauses > 2s indicate speaker change)
        # 2. Average segment length (short segments = conversation, long = monologue)
        # 3. Variance in segment lengths (varied = conversation, uniform = single speaker)

        if len(speech_segments) == 0:
            speaker_count = 1
            confidence = 'low'
            print(f'   ‚îÇ    ‚ö†Ô∏è  No speech segments detected')
        else:
            # Calculate segment statistics
            segment_lengths = [(end - start) * 0.032 for start, end in speech_segments]  # in seconds
            avg_segment_length = np.mean(segment_lengths)
            segment_variance = np.var(segment_lengths)

            # Calculate silence gaps between segments
            silence_gaps = []
            for i in range(len(speech_segments) - 1):
                gap = (speech_segments[i+1][0] - speech_segments[i][1]) * 0.032
                silence_gaps.append(gap)

            long_pauses = sum(1 for gap in silence_gaps if gap > 2.0)  # Pauses > 2s

            print(f'   ‚îÇ ‚ÑπÔ∏è  Avg segment: {avg_segment_length:.1f}s | Long pauses: {long_pauses}')

            # ‚úÖ DECISION LOGIC (FIXED)
            # Single speaker indicators:
            # - Few long pauses (natural thinking/breathing)
            # - Relatively uniform segment lengths
            # - Average segment length > 3 seconds

            # Multiple speaker indicators:
            # - Many long pauses (turn-taking)
            # - High variance in segment lengths
            # - Many short segments (back-and-forth conversation)

            if duration_seconds < 30:
                # Short videos: likely single speaker
                speaker_count = 1
                confidence = 'medium'
            elif long_pauses < 5 and avg_segment_length > 3:
                # Few long pauses + long segments = single speaker monologue
                speaker_count = 1
                confidence = 'high'
            elif long_pauses > 15 and avg_segment_length < 2:
                # Many pauses + short segments = conversation
                speaker_count = 2
                confidence = 'high'
            elif len(speech_segments) > 30 and segment_variance > 5:
                # Many varied segments = possible conversation
                speaker_count = 2
                confidence = 'medium'
            else:
                # Default: assume single speaker
                speaker_count = 1
                confidence = 'medium'

            is_single_speaker = (speaker_count == 1)

            print(f'   ‚îÇ ‚úÖ Analysis complete: {speaker_count} speaker(s)')
            print(f'   ‚îÇ    Confidence: {confidence.upper()}')
            print(f'   ‚îÇ    Reasoning: {"Monologue pattern" if speaker_count == 1 else "Conversation pattern"}')

        return {
            'is_single_speaker': is_single_speaker,
            'speaker_count': speaker_count,
            'duration': round(duration_seconds, 2),
            'speech_segments': len(speech_segments),
            'avg_segment_length': round(avg_segment_length, 2) if len(speech_segments) > 0 else 0,
            'long_pauses': long_pauses if len(speech_segments) > 0 else 0,
            'method': 'silero_vad_fixed',
            'confidence': confidence
        }

    except Exception as e:
        print(f'   ‚ö†Ô∏è  Silero VAD error: {str(e)}')
        import traceback
        traceback.print_exc()

        return {
            'is_single_speaker': True,
            'speaker_count': 1,
            'error': str(e),
            'method': 'silero_vad_exception'
        }

In [150]:
def detect_eyes_in_video(video_path, sample_rate=5):
    """Detect eyes using MediaPipe - FIXED for compatibility"""
    try:
        # ‚úÖ OpenCV dari MediaPipe (sudah compatible)
        print('   üëÅÔ∏è  Eye detection analysis...')

        # MediaPipe solutions
        mp_face_detection = mp.solutions.face_detection

        # Open video dengan OpenCV
        cap = cv2.VideoCapture(video_path)

        if not cap.isOpened():
            print('   ‚ö†Ô∏è  Could not open video file')
            return {
                'is_suspicious': False,
                'error': 'Video could not be opened',
                'message': 'Eye detection failed - video read error'
            }

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        if fps == 0 or fps > 120:
            fps = 30  # Fallback

        if total_frames == 0:
            cap.release()
            print('   ‚ö†Ô∏è  Could not determine total frames')
            return {
                'is_suspicious': False,
                'error': 'Could not determine frame count',
                'message': 'Eye detection skipped - frame count unknown'
            }

        frame_count = 0
        eye_detected_frames = 0
        eyes_open_frames = 0
        eyes_closed_frames = 0
        suspicious_frames = 0

        sample_interval = max(1, int(fps / sample_rate))

        print(f'   ‚îÇ FPS: {fps:.1f} | Total Frames: {total_frames} | Interval: {sample_interval}')

        try:
            # ‚úÖ MediaPipe FaceDetection
            with mp_face_detection.FaceDetection(
                model_selection=0,
                min_detection_confidence=0.5
            ) as face_detection:

                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break

                    if frame_count % sample_interval != 0:
                        frame_count += 1
                        continue

                    try:
                        # Convert BGR to RGB
                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                        # Detect faces
                        results = face_detection.process(rgb_frame)

                        if results.detections:
                            eye_detected_frames += 1

                            for detection in results.detections:
                                bbox = detection.location_data.relative_bounding_box
                                h, w, c = frame.shape

                                # Face position
                                face_center_y = (bbox.ymin + bbox.height) * h

                                # Check if looking down (suspicious)
                                if face_center_y > h * 0.6:
                                    suspicious_frames += 1

                                # Check eyes visibility
                                # MediaPipe detects 6 keypoints (left eye, right eye, nose, mouth, etc)
                                if len(detection.location_data.relative_keypoints) >= 2:
                                    eyes_open_frames += 1
                                else:
                                    eyes_closed_frames += 1

                    except Exception as e:
                        print(f'   ‚îÇ ‚ö†Ô∏è  Frame {frame_count} error: {str(e)[:40]}')
                        continue

                    frame_count += 1

                    # Progress update
                    if frame_count % (sample_interval * 30) == 0 and total_frames > 0:
                        progress = (frame_count / total_frames) * 100
                        print(f'   ‚îÇ ‚è≥ Processing: {progress:.1f}%', end='\r')

        except Exception as e:
            print(f'   ‚ö†Ô∏è  Face detection error: {str(e)}')

        finally:
            cap.release()

        # Calculate statistics
        total_sampled_frames = frame_count
        face_detection_rate = (eye_detected_frames / total_sampled_frames * 100) if total_sampled_frames > 0 else 0
        suspicious_rate = (suspicious_frames / eye_detected_frames * 100) if eye_detected_frames > 0 else 0

        print(f'\n   ‚úÖ Eye Detection Complete:')
        print(f'      Face: {face_detection_rate:.1f}% | Eyes open: {eyes_open_frames} | Eyes closed: {eyes_closed_frames}')

        # Determine if suspicious
        is_suspicious = False
        suspicious_reasons = []

        if face_detection_rate < 50:
            is_suspicious = True
            suspicious_reasons.append("Face not consistently visible")

        if suspicious_rate > 30:
            is_suspicious = True
            suspicious_reasons.append("Frequent downward gaze (reading)")

        if eyes_closed_frames > eyes_open_frames and eyes_open_frames > 0:
            is_suspicious = True
            suspicious_reasons.append("Eyes frequently closed")

        return {
            'face_detection_rate': round(face_detection_rate, 2),
            'eyes_open_frames': eyes_open_frames,
            'eyes_closed_frames': eyes_closed_frames,
            'suspicious_gaze_rate': round(suspicious_rate, 2),
            'is_suspicious': is_suspicious,
            'suspicious_reasons': suspicious_reasons,
            'total_frames_analyzed': total_sampled_frames
        }

    except Exception as e:
        print(f'   ‚ö†Ô∏è  Eye detection error: {str(e)}')
        import traceback
        traceback.print_exc()

        return {
            'is_suspicious': False,
            'error': str(e),
            'message': 'Eye detection failed - using conservative assessment'
        }

print('‚úÖ Eye detection function loaded (Fixed)')

‚úÖ Eye detection function loaded (Fixed)


In [151]:
def advanced_cheating_detection(video_path, transcription_text):
    """‚úÖ FIXED: Proper cheating score calculation with baseline"""
    try:
        print('   üö® Advanced Cheating Detection:')

        cheating_indicators = []
        cheating_score = 100  # ‚úÖ START at 100 (assume clean), DEDUCT for suspicious behavior
        
        confidence_components = {
            'diarization_confidence': 0,
            'diarization_data_quality': 0,
            'eye_detection_confidence': 0,
            'eye_detection_coverage': 0,
            'text_pattern_confidence': 0,
            'text_pattern_diversity': 0,
            'audio_quality_confidence': 0,
            'audio_snr': 0
        }
        
        total_checks = 4

        # ============================================================
        # 1Ô∏è‚É£ DIARIZATION CHECK
        # ============================================================
        print('   ‚îÇ 1Ô∏è‚É£  Speaker Diarization Check')
        diar_result = perform_speaker_diarization_silero(video_path)
        
        if 'confidence' in diar_result:
            conf_map = {'high': 90, 'medium': 70, 'low': 50}
            base_conf = conf_map.get(diar_result['confidence'], 50)
            
            duration = diar_result.get('duration', 0)
            speech_segments = diar_result.get('speech_segments', 0)
            avg_segment_length = diar_result.get('avg_segment_length', 0)
            
            data_quality = 50
            if duration > 10:
                data_quality += 20
            if speech_segments > 5:
                data_quality += 15
            if avg_segment_length > 2:
                data_quality += 15
            
            confidence_components['diarization_confidence'] = int(
                (base_conf * 0.7) + (data_quality * 0.3)
            )
            confidence_components['diarization_data_quality'] = data_quality
            
            print(f'   ‚îÇ    üìä Diarization: {confidence_components["diarization_confidence"]}% (base: {base_conf}, quality: {data_quality})')
        else:
            confidence_components['diarization_confidence'] = 50
            confidence_components['diarization_data_quality'] = 30

        # ‚úÖ DEDUCT score if multiple speakers detected
        if not diar_result.get('is_single_speaker', True):
            cheating_indicators.append(
                f"Multiple speakers detected ({diar_result.get('speaker_count', 2)} speakers)"
            )
            cheating_score -= 40  # ‚úÖ DEDUCT from 100
            print(f'   ‚îÇ    ‚ö†Ô∏è  Multiple speakers: {diar_result.get("speaker_count", 2)} (-40 points)')
        else:
            print(f'   ‚îÇ    ‚úÖ Single speaker confirmed')

        # ============================================================
        # 2Ô∏è‚É£ EYE DETECTION CHECK
        # ============================================================
        print('   ‚îÇ 2Ô∏è‚É£  Eye Detection & Gaze Analysis')
        eye_result = detect_eyes_in_video(video_path, sample_rate=5)
        
        if 'face_detection_rate' in eye_result:
            face_rate = eye_result['face_detection_rate']
            frames_analyzed = eye_result.get('total_frames_analyzed', 0)
            eyes_open = eye_result.get('eyes_open_frames', 0)
            
            if face_rate > 90:
                base_eye_conf = 95
            elif face_rate > 75:
                base_eye_conf = 85
            elif face_rate > 60:
                base_eye_conf = 75
            elif face_rate > 45:
                base_eye_conf = 65
            elif face_rate > 30:
                base_eye_conf = 55
            else:
                base_eye_conf = 40
            
            coverage_quality = min(100, (frames_analyzed / 300) * 100)
            
            visibility_quality = 50
            if eyes_open > 100:
                visibility_quality = 90
            elif eyes_open > 50:
                visibility_quality = 75
            elif eyes_open > 20:
                visibility_quality = 60
            
            confidence_components['eye_detection_confidence'] = int(
                (base_eye_conf * 0.5) + (coverage_quality * 0.25) + (visibility_quality * 0.25)
            )
            confidence_components['eye_detection_coverage'] = int(coverage_quality)
            
            print(f'   ‚îÇ    üìä Eye Detection: {confidence_components["eye_detection_confidence"]}% (base: {base_eye_conf}, coverage: {coverage_quality:.0f}, visibility: {visibility_quality})')
        else:
            confidence_components['eye_detection_confidence'] = 50
            confidence_components['eye_detection_coverage'] = 30

        # ‚úÖ DEDUCT score for suspicious eye behavior
        if eye_result.get('is_suspicious'):
            suspicious_count = 0
            
            if eye_result.get('face_detection_rate', 100) < 30:
                cheating_indicators.append("Eye detection: Very low face visibility")
                suspicious_count += 1
                cheating_score -= 15  # ‚úÖ DEDUCT
                print(f'   ‚îÇ    ‚ö†Ô∏è  Low face visibility (-15 points)')
            
            if eye_result.get('suspicious_gaze_rate', 0) > 50:
                cheating_indicators.append("Eye detection: Frequent downward gaze")
                suspicious_count += 1
                cheating_score -= 15  # ‚úÖ DEDUCT
                print(f'   ‚îÇ    ‚ö†Ô∏è  Downward gaze (-15 points)')
            
            if suspicious_count == 0:
                print(f'   ‚îÇ    ‚úÖ Eye gaze analysis normal')
        else:
            print(f'   ‚îÇ    ‚úÖ Eye gaze analysis normal')

        # ============================================================
        # 3Ô∏è‚É£ TEXT PATTERN CHECK
        # ============================================================
        print('   ‚îÇ 3Ô∏è‚É£  Text Pattern Analysis')
        words = transcription_text.split()
        word_count = len(words)
        
        unique_words = len(set(word.lower() for word in words))
        repetition_ratio = (len(words) - unique_words) / len(words) if words else 1
        
        if word_count >= 100:
            base_text_conf = 95
        elif word_count >= 50:
            base_text_conf = 85
        elif word_count >= 30:
            base_text_conf = 75
        elif word_count >= 20:
            base_text_conf = 65
        elif word_count >= 10:
            base_text_conf = 55
        elif word_count >= 5:
            base_text_conf = 45
        else:
            base_text_conf = 30
        
        diversity_score = int((1 - repetition_ratio) * 100)
        
        confidence_components['text_pattern_confidence'] = int(
            (base_text_conf * 0.6) + (diversity_score * 0.4)
        )
        confidence_components['text_pattern_diversity'] = diversity_score
        
        print(f'   ‚îÇ    üìä Text Pattern: {confidence_components["text_pattern_confidence"]}% (base: {base_text_conf}, diversity: {diversity_score})')

        # ‚úÖ DEDUCT score for suspicious text patterns
        if len(words) < 3:
            cheating_indicators.append("Answer extremely short (possible AI generation)")
            cheating_score -= 20  # ‚úÖ DEDUCT
            print(f'   ‚îÇ    ‚ö†Ô∏è  Extremely short answer: {len(words)} words (-20 points)')

        if repetition_ratio > 0.65:
            cheating_indicators.append(f"Very high word repetition ({repetition_ratio*100:.1f}%)")
            cheating_score -= 15  # ‚úÖ DEDUCT
            print(f'   ‚îÇ    ‚ö†Ô∏è  High repetition rate: {repetition_ratio*100:.1f}% (-15 points)')
        else:
            print(f'   ‚îÇ    ‚úÖ Text pattern normal')

        # ============================================================
        # 4Ô∏è‚É£ AUDIO QUALITY CHECK
        # ============================================================
        print('   ‚îÇ 4Ô∏è‚É£  Audio Quality Check')
        try:
            import librosa
            import numpy as np
            y, sr = librosa.load(video_path, sr=16000, duration=30)

            S = librosa.feature.melspectrogram(y=y, sr=sr)
            noise_level = np.mean(S)
            signal_level = np.max(S)
            snr = signal_level / (noise_level + 1e-10)
            
            if snr > 50:
                base_audio_conf = 95
            elif snr > 30:
                base_audio_conf = 85
            elif snr > 20:
                base_audio_conf = 75
            elif snr > 10:
                base_audio_conf = 65
            else:
                base_audio_conf = 50
            
            if noise_level < 20:
                noise_penalty = 0
            elif noise_level < 40:
                noise_penalty = 10
            elif noise_level < 60:
                noise_penalty = 20
            else:
                noise_penalty = 30
            
            final_audio_conf = max(30, base_audio_conf - noise_penalty)
            
            confidence_components['audio_quality_confidence'] = int(final_audio_conf)
            confidence_components['audio_snr'] = int(min(100, snr))
            
            print(f'   ‚îÇ    üìä Audio Quality: {final_audio_conf}% (SNR: {snr:.1f}, noise: {noise_level:.1f})')

            # ‚úÖ DEDUCT score for high noise
            if noise_level > 80:
                cheating_indicators.append(f"Very high background noise detected")
                cheating_score -= 10  # ‚úÖ DEDUCT
                print(f'   ‚îÇ    ‚ö†Ô∏è  High noise level: {noise_level:.1f} (-10 points)')
            else:
                print(f'   ‚îÇ    ‚úÖ Audio quality normal (noise: {noise_level:.1f})')
                
        except Exception as e:
            print(f'   ‚îÇ    ‚ÑπÔ∏è  Audio analysis skipped: {str(e)}')
            fallback_audio = 50 + min(20, word_count // 5)
            confidence_components['audio_quality_confidence'] = fallback_audio
            confidence_components['audio_snr'] = 30

        # ============================================================
        # ‚úÖ FINALIZE CHEATING SCORE (ensure 0-100 range)
        # ============================================================
        cheating_score = max(0, min(100, cheating_score))
        
        # ‚úÖ INVERT score: High score = High cheating risk
        # Current: 100 (clean) ‚Üí Want: 0 (clean)
        cheating_score = 100 - cheating_score  # ‚úÖ INVERT!
        
        # ============================================================
        # CALCULATE OVERALL CONFIDENCE SCORE
        # ============================================================
        weighted_confidence = (
            confidence_components['diarization_confidence'] * 0.25 +
            confidence_components['eye_detection_confidence'] * 0.25 +
            confidence_components['text_pattern_confidence'] * 0.25 +
            confidence_components['audio_quality_confidence'] * 0.25
        )
        
        quality_adjustment = (
            confidence_components['diarization_data_quality'] * 0.1 +
            confidence_components['eye_detection_coverage'] * 0.1 +
            confidence_components['text_pattern_diversity'] * 0.1 +
            confidence_components['audio_snr'] * 0.1
        ) / 4
        
        overall_confidence = min(100, weighted_confidence + quality_adjustment)
        
        if overall_confidence >= 85:
            confidence_level = "Very High"
        elif overall_confidence >= 75:
            confidence_level = "High"
        elif overall_confidence >= 60:
            confidence_level = "Medium"
        elif overall_confidence >= 45:
            confidence_level = "Low"
        else:
            confidence_level = "Very Low"

        # ‚úÖ Determine cheating status (FIXED thresholds)
        is_cheating = cheating_score > 40  # ‚úÖ Lower threshold (was 60)
        cheating_status = "Ya" if is_cheating else "Tidak"

        print(f'   ‚îÇ üìä Final Cheating Score: {cheating_score}/100')
        print(f'   ‚îÇ üéØ Overall Confidence: {overall_confidence:.1f}% ({confidence_level})')
        print(f'   ‚îÇ üö® Cheating Detection: {cheating_status}')

        if cheating_indicators:
            print(f'   ‚îÇ ‚ö†Ô∏è  Indicators ({len(cheating_indicators)}):')
            for indicator in cheating_indicators:
                print(f'   ‚îÇ    - {indicator}')
        else:
            print(f'   ‚îÇ ‚úÖ No suspicious indicators found')

        return {
            'is_cheating': is_cheating,
            'cheating_status': cheating_status,
            'cheating_score': cheating_score,
            'indicators': cheating_indicators,
            'confidence_score': round(overall_confidence, 2),
            'confidence_level': confidence_level,
            'confidence_components': confidence_components,
            'details': {
                'diarization': diar_result,
                'eye_detection': eye_result,
                'word_count': len(words),
                'repetition_ratio': round(repetition_ratio, 3),
                'unique_words': unique_words,
                'diversity_score': diversity_score
            }
        }

    except Exception as e:
        print(f'   ‚ö†Ô∏è  Cheating detection error: {str(e)}')
        return {
            'is_cheating': False,
            'cheating_status': 'Tidak',
            'cheating_score': 0,
            'indicators': [],
            'confidence_score': 0,
            'confidence_level': 'N/A',
            'confidence_components': {},
            'error': str(e)
        }

In [152]:
def calculate_aggregate_cheating_analysis(assessment_results):
    """Enhanced aggregate analysis with MORE LENIENT thresholds"""
    if not assessment_results:
        return {
            "overall_cheating_status": "Tidak",
            "overall_cheating_score": 0,
            "total_videos": 0,
            "videos_flagged": 0,
            "confidence_level": "N/A",
            "average_confidence_score": 0
        }

    total_videos = len(assessment_results)
    cheating_scores = []
    confidence_scores = []
    videos_flagged = 0
    flagged_video_ids = []
    cheating_indicators_summary = {}

    for video in assessment_results:
        result = video.get("result", {})

        cheating_score = result.get("cheating_score", 0)
        cheating_scores.append(cheating_score)
        
        confidence_score = result.get("cheating_confidence_score", 0)
        confidence_scores.append(confidence_score)

        if result.get("cheating_detection") == "Ya":
            videos_flagged += 1
            flagged_video_ids.append(video.get("id"))

            indicators = result.get("cheating_details", {}).get("diarization", {})
            if not indicators.get("is_single_speaker", True):
                cheating_indicators_summary["multiple_speakers"] = \
                    cheating_indicators_summary.get("multiple_speakers", 0) + 1

            eye_data = result.get("cheating_details", {}).get("eye_detection", {})
            if eye_data.get("is_suspicious", False):
                cheating_indicators_summary["suspicious_eye_behavior"] = \
                    cheating_indicators_summary.get("suspicious_eye_behavior", 0) + 1

    avg_cheating_score = sum(cheating_scores) / total_videos if total_videos > 0 else 0
    avg_confidence_score = sum(confidence_scores) / total_videos if total_videos > 0 else 0
    max_cheating_score = max(cheating_scores) if cheating_scores else 0
    flagged_percentage = (videos_flagged / total_videos * 100) if total_videos > 0 else 0

    if avg_confidence_score >= 85:
        overall_confidence_level = "Very High"
    elif avg_confidence_score >= 75:
        overall_confidence_level = "High"
    elif avg_confidence_score >= 60:
        overall_confidence_level = "Medium"
    elif avg_confidence_score >= 45:
        overall_confidence_level = "Low"
    else:
        overall_confidence_level = "Very Low"

    # ‚úÖ FIXED: More lenient decision thresholds
    if flagged_percentage >= 70 or avg_cheating_score > 65 or max_cheating_score > 80:  # Changed from 50/50/70
        overall_status = "Ya"
        confidence = "High"
        risk_level = "HIGH RISK"
        recommendation = "TIDAK LULUS - Strong evidence of cheating"
    elif flagged_percentage >= 50 or avg_cheating_score >= 50:  # Changed from 30/30
        overall_status = "Ya"
        confidence = "Medium"
        risk_level = "MEDIUM RISK"
        recommendation = "PERTIMBANGAN - Suspicious patterns detected"
    else:
        overall_status = "Tidak"
        confidence = "High" if flagged_percentage == 0 else "Medium"
        risk_level = "LOW RISK"
        recommendation = "LULUS - No significant cheating indicators"

    # ‚úÖ FIXED: Don't show specific video IDs in summary (for web display)
    if videos_flagged > 0:
        summary_text = f"{videos_flagged}/{total_videos} video(s) menunjukkan indikasi kecurangan ({flagged_percentage:.1f}%). "
        
        if "multiple_speakers" in cheating_indicators_summary:
            count = cheating_indicators_summary["multiple_speakers"]
            summary_text += f"Terdeteksi multiple speakers di {count} video. "

        if "suspicious_eye_behavior" in cheating_indicators_summary:
            count = cheating_indicators_summary["suspicious_eye_behavior"]
            summary_text += f"Perilaku mata mencurigakan di {count} video. "
    else:
        summary_text = "Tidak ditemukan indikasi kecurangan yang signifikan di semua video."

    return {
        "overall_cheating_status": overall_status,
        "overall_cheating_score": round(avg_cheating_score, 2),
        "average_confidence_score": round(avg_confidence_score, 2),
        "overall_confidence_level": overall_confidence_level,
        "max_cheating_score": max_cheating_score,
        "total_videos": total_videos,
        "videos_flagged": videos_flagged,
        # ‚úÖ REMOVED: Don't include flagged_video_ids for web display
        # "flagged_video_ids": flagged_video_ids,  
        "flagged_percentage": round(flagged_percentage, 2),
        "confidence_level": confidence,
        "risk_level": risk_level,
        "recommendation": recommendation,
        "summary": summary_text,
        "pattern_analysis": {
            "multiple_speakers_count": cheating_indicators_summary.get("multiple_speakers", 0),
            "suspicious_eye_behavior_count": cheating_indicators_summary.get("suspicious_eye_behavior", 0),
            "avg_score_per_video": round(avg_cheating_score, 2)
        }
    }

In [153]:
def get_confidence_improvement_tips(confidence_components):
    """
    Provides actionable tips to improve confidence score
    """
    tips = []
    
    diar = confidence_components.get('diarization_confidence', 0)
    eye = confidence_components.get('eye_detection_confidence', 0)
    text = confidence_components.get('text_pattern_confidence', 0)
    audio = confidence_components.get('audio_quality_confidence', 0)
    
    if diar < 80:
        tips.append({
            'component': 'Speaker Detection',
            'current': f'{diar:.1f}%',
            'tips': [
                '‚úÖ Record in quiet environment',
                '‚úÖ Ensure only one person speaks',
                '‚úÖ Avoid background conversations'
            ]
        })
    
    if eye < 80:
        tips.append({
            'component': 'Eye Detection',
            'current': f'{eye:.1f}%',
            'tips': [
                '‚úÖ Position camera at eye level',
                '‚úÖ Good lighting on face',
                '‚úÖ Look at camera frequently',
                '‚úÖ Avoid reading from notes'
            ]
        })
    
    if text < 80:
        tips.append({
            'component': 'Text Pattern',
            'current': f'{text:.1f}%',
            'tips': [
                '‚úÖ Speak more (aim for 50+ words)',
                '‚úÖ Use varied vocabulary',
                '‚úÖ Avoid repeating same words',
                '‚úÖ Speak clearly and naturally'
            ]
        })
    
    if audio < 80:
        tips.append({
            'component': 'Audio Quality',
            'current': f'{audio:.1f}%',
            'tips': [
                '‚úÖ Use good microphone',
                '‚úÖ Record in quiet room',
                '‚úÖ Reduce background noise',
                '‚úÖ Maintain consistent volume'
            ]
        })
    
    return tips

<b><h2> Fungsi Analisis Non Verbal

In [154]:
def extract_audio_fixed(video_path, audio_output_path="temp_audio.wav"):
    """
    Ekstrak audio menggunakan FFmpeg langsung untuk menghindari masalah MoviePy
    dengan file WebM yang memiliki Duration: N/A
    """
    try:
        print(f"   ‚è≥ Mengekstrak audio dari {video_path}...")

        # Gunakan FFmpeg langsung via subprocess
        command = [
            'ffmpeg',
            '-i', video_path,
            '-vn',  # No video
            '-acodec', 'pcm_s16le',  # Audio codec
            '-ar', '44100',  # Sample rate
            '-ac', '2',  # Audio channels
            '-y',  # Overwrite output
            audio_output_path
        ]

        result = subprocess.run(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        os.makedirs(AUDIO_DIR, exist_ok=True)

        if os.path.exists(audio_output_path):
            print(f"   ‚úÖ Audio berhasil diekstrak: {audio_output_path}")
            return audio_output_path
        else:
            raise Exception("Audio extraction failed")

    except Exception as e:
        print(f"   ‚ùå Error ekstraksi audio: {str(e)}")
        return None

In [155]:
def analyze_speech_tempo(audio_path):
    # Load audio
    audio = AudioSegment.from_file(audio_path)

    # Deteksi segmen non-silent (ketika berbicara)
    nonsilent_ranges = detect_nonsilent(
        audio,
        min_silence_len=500,  # Jeda minimal 500ms dianggap pause
        silence_thresh=-40     # Threshold volume untuk silence
    )

    # Hitung durasi total bicara
    total_speaking_time = sum([(end - start) for start, end in nonsilent_ranges]) / 1000
    total_duration = len(audio) / 1000

    # Hitung jumlah jeda
    num_pauses = len(nonsilent_ranges) - 1

    # Hitung speech rate (kata per menit - estimasi)
    # Asumsi: 1 detik bicara ‚âà 2-3 kata
    estimated_words = total_speaking_time * 2.5
    speech_rate = (estimated_words / total_speaking_time) * 60 if total_speaking_time > 0 else 0

    return {
        "total_duration_seconds": round(total_duration, 2),
        "speaking_time_seconds": round(total_speaking_time, 2),
        "silence_time_seconds": round(total_duration - total_speaking_time, 2),
        "number_of_pauses": num_pauses,
        "speech_rate_wpm": round(speech_rate, 2),
        "speaking_ratio": round(total_speaking_time / total_duration, 2)
    }

In [156]:
def analyze_facial_expressions(video_path):
    # Initialize MediaPipe Face Mesh
    mp_face_mesh = mp.solutions.face_mesh

    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=False,
        max_num_faces=1,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )

    cap = cv2.VideoCapture(video_path)

    expression_data = {
        "smile_intensity": [],
        "eyebrow_movement": [],
        "head_pose": []
    }

    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1

        # Convert BGR to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb_frame)

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]

            # Ekstrak landmark penting
            # Mouth corners (senyum): 61, 291
            # Lips: 13, 14
            left_mouth = landmarks.landmark[61]
            right_mouth = landmarks.landmark[291]
            upper_lip = landmarks.landmark[13]
            lower_lip = landmarks.landmark[14]

            # Hitung intensitas senyum (jarak horizontal mouth corners)
            smile_width = abs(right_mouth.x - left_mouth.x)
            expression_data["smile_intensity"].append(smile_width)

            # Eyebrow position (landmark 70, 300 untuk alis)
            left_eyebrow = landmarks.landmark[70]
            right_eyebrow = landmarks.landmark[300]
            eyebrow_height = (left_eyebrow.y + right_eyebrow.y) / 2
            expression_data["eyebrow_movement"].append(eyebrow_height)

            # Head pose (estimasi dari nose tip: 1)
            nose_tip = landmarks.landmark[1]
            expression_data["head_pose"].append({
                "x": nose_tip.x,
                "y": nose_tip.y,
                "z": nose_tip.z
            })

    cap.release()

    # Analisis statistik
    return {
        "average_smile_intensity": round(np.mean(expression_data["smile_intensity"]), 4),
        "smile_variation": round(np.std(expression_data["smile_intensity"]), 4),
        "eyebrow_movement_range": round(np.std(expression_data["eyebrow_movement"]), 4),
        "total_frames_analyzed": frame_count,
        "face_detected_percentage": round(len(expression_data["smile_intensity"]) / frame_count * 100, 2)
    }

In [157]:
def analyze_eye_movement(video_path):
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=False,
        max_num_faces=1,
        refine_landmarks=True  # Penting untuk deteksi iris
    )

    cap = cv2.VideoCapture(video_path)

    eye_data = {
        "gaze_positions": [],
        "blink_count": 0,
        "eye_contact_percentage": 0
    }

    prev_eye_closed = False
    frame_count = 0
    direct_gaze_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb_frame)

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]

            # Eye landmarks (mata kiri: 33, 133; mata kanan: 362, 263)
            left_eye_top = landmarks.landmark[159]
            left_eye_bottom = landmarks.landmark[145]
            right_eye_top = landmarks.landmark[386]
            right_eye_bottom = landmarks.landmark[374]

            # Deteksi kedipan (Eye Aspect Ratio)
            left_eye_height = abs(left_eye_top.y - left_eye_bottom.y)
            right_eye_height = abs(right_eye_top.y - right_eye_bottom.y)
            avg_eye_height = (left_eye_height + right_eye_height) / 2

            # Threshold untuk mata tertutup
            eye_closed = avg_eye_height < 0.01

            if eye_closed and not prev_eye_closed:
                eye_data["blink_count"] += 1

            prev_eye_closed = eye_closed

            # Iris tracking untuk gaze direction
            # Iris center landmarks: 468-473
            if len(landmarks.landmark) > 473:
                left_iris = landmarks.landmark[468]
                right_iris = landmarks.landmark[473]

                # Simpan posisi gaze
                gaze_x = (left_iris.x + right_iris.x) / 2
                gaze_y = (left_iris.y + right_iris.y) / 2
                eye_data["gaze_positions"].append({"x": gaze_x, "y": gaze_y})

                # Deteksi eye contact (gaze ke tengah frame)
                if 0.4 < gaze_x < 0.6 and 0.3 < gaze_y < 0.7:
                    direct_gaze_count += 1

    cap.release()

    if frame_count > 0:
        eye_data["eye_contact_percentage"] = round((direct_gaze_count / frame_count) * 100, 2)
        eye_data["blink_rate_per_minute"] = round((eye_data["blink_count"] / frame_count) * (30 * 60), 2)

    return {
        "total_blinks": eye_data["blink_count"],
        "blink_rate_per_minute": eye_data.get("blink_rate_per_minute", 0),
        "eye_contact_percentage": eye_data["eye_contact_percentage"],
        "gaze_stability": round(np.std([g["x"] for g in eye_data["gaze_positions"]]), 4) if eye_data["gaze_positions"] else 0
    }

In [158]:
def interpret_non_verbal_analysis(analysis_json):
    interpretations = {}

    # --- Speech Analysis ---
    speech = analysis_json.get("speech_analysis", {})
    if speech:
        speaking_ratio = speech.get("speaking_ratio", 0)
        pauses = speech.get("number_of_pauses", 0)
        rate = speech.get("speech_rate_wpm", 0)

        if speaking_ratio > 0.6:
            speech_summary = "Kandidat cukup aktif berbicara."
        else:
            speech_summary = "Kandidat lebih banyak diam dibanding berbicara."

        if pauses > 30:
            speech_summary += " Namun sering berhenti sejenak, mungkin sedang berpikir."
        else:
            speech_summary += " Jeda bicara relatif sedikit."

        if 120 <= rate <= 160:
            speech_summary += " Kecepatan bicara normal."
        elif rate > 160:
            speech_summary += " Bicara agak cepat."
        else:
            speech_summary += " Bicara agak lambat."

        interpretations["speech_analysis"] = speech_summary

    # --- Facial Expression Analysis ---
    facial = analysis_json.get("facial_expression_analysis", {})
    if facial:
        smile_intensity = facial.get("average_smile_intensity", 0)
        eyebrow_range = facial.get("eyebrow_movement_range", 0)

        if smile_intensity < 0.1:
            facial_summary = "Ekspresi wajah minim senyum, terlihat serius."
        else:
            facial_summary = "Sering tersenyum, terlihat ramah."

        if eyebrow_range < 0.01:
            facial_summary += " Gerakan alis minim, ekspresi emosional kurang."
        else:
            facial_summary += " Gerakan alis cukup bervariasi."

        interpretations["facial_expression_analysis"] = facial_summary

    # --- Eye Movement Analysis ---
    eye = analysis_json.get("eye_movement_analysis", {})
    if eye:
        blink_rate = eye.get("blink_rate_per_minute", 0)
        eye_contact = eye.get("eye_contact_percentage", 0)

        if eye_contact > 80:
            eye_summary = "Kontak mata sangat baik."
        else:
            eye_summary = "Kontak mata kurang konsisten."

        if blink_rate > 60:
            eye_summary += " Kedipan cukup sering, mungkin gugup."
        else:
            eye_summary += " Kedipan normal."

        interpretations["eye_movement_analysis"] = eye_summary

    return interpretations


def analyze_interview_video(video_path, audio_path=None):
    print("üé¨ Memulai analisis interview...")

    # Ekstrak audio jika belum ada
    if audio_path is None:
        print("üì§ Mengekstrak audio dari video...")
        filename = os.path.splitext(os.path.basename(video_path))[0]
        audio_path = os.path.join(AUDIO_DIR, f"{filename}.wav")
        extract_audio_fixed(video_path, audio_path)

    # Analisis non verbal
    print("üé§ Analisis tempo bicara...")
    speech_analysis = analyze_speech_tempo(audio_path)

    print("üòä Analisis ekspresi wajah...")
    facial_analysis = analyze_facial_expressions(video_path)

    print("üëÅÔ∏è Analisis gerakan mata...")
    eye_analysis = analyze_eye_movement(video_path)

    # Gabungan hasil
    final_result = {
        "speech_analysis": speech_analysis,
        "facial_expression_analysis": facial_analysis,
        "eye_movement_analysis": eye_analysis,
    }

    # Tambahkan interpretasi
    final_result["interpretation"] = interpret_non_verbal_analysis(final_result)

    return final_result

In [159]:
def analyze_videos_in_batch(video_paths):
    """
    Melakukan analisis non-verbal untuk banyak video.
    Mengembalikan list hasil (setiap item adalah hasil analisis 1 video).
    """

    batch_results = []  # <--- LIST penyimpanan hasil

    for video_path in video_paths:  # <--- LOOP semua video
        print(f"üîç Memproses video: {video_path}")

        result = analyze_interview_video(video_path)

        batch_results.append({
            "video_path": video_path,
            "result": result
        })

    return batch_results

In [160]:
def summarize_non_verbal_batch(assessment_results):
    import numpy as np

    speaking_ratios = []
    pauses = []
    speech_rates = []
    smiles = []
    eyebrows = []
    eye_contacts = []
    blink_rates = []
    
    # ‚úÖ NEW: Collect confidence scores
    confidence_scores = []

    for item in assessment_results:
        nv = item["result"]["non_verbal_analysis"]

        sp = nv["speech_analysis"]
        speaking_ratios.append(sp["speaking_ratio"])
        pauses.append(sp["number_of_pauses"])
        speech_rates.append(sp["speech_rate_wpm"])

        fc = nv["facial_expression_analysis"]
        smiles.append(fc["average_smile_intensity"])
        eyebrows.append(fc["eyebrow_movement_range"])

        ey = nv["eye_movement_analysis"]
        eye_contacts.append(ey["eye_contact_percentage"])
        blink_rates.append(ey["blink_rate_per_minute"])
        
        # ‚úÖ NEW: Collect confidence scores
        conf_score = item["result"].get("non_verbal_confidence_score", 0)
        confidence_scores.append(conf_score)

    # ‚úÖ Calculate average confidence
    avg_confidence = round(np.mean(confidence_scores), 2) if confidence_scores else 0
    
    # ‚úÖ Determine confidence level
    if avg_confidence >= 85:
        confidence_level = "Very High"
    elif avg_confidence >= 75:
        confidence_level = "High"
    elif avg_confidence >= 60:
        confidence_level = "Medium"
    elif avg_confidence >= 45:
        confidence_level = "Low"
    else:
        confidence_level = "Very Low"

    aggregated = {
        "speech_analysis": {
            "avg_speaking_ratio": round(np.mean(speaking_ratios), 3),
            "avg_pauses": round(np.mean(pauses), 2),
            "avg_speech_rate": round(np.mean(speech_rates), 2)
        },
        "facial_expression_analysis": {
            "avg_smile_intensity": round(np.mean(smiles), 4),
            "avg_eyebrow_movement_range": round(np.mean(eyebrows), 4)
        },
        "eye_movement_analysis": {
            "avg_eye_contact": round(np.mean(eye_contacts), 2),
            "avg_blink_rate": round(np.mean(blink_rates), 2)
        },
    }

    # Build summary text (existing code)
    summary_parts = []

    ratio = aggregated["speech_analysis"]["avg_speaking_ratio"]
    pauses_avg = aggregated["speech_analysis"]["avg_pauses"]
    speed = aggregated["speech_analysis"]["avg_speech_rate"]

    if ratio > 0.6:
        summary_parts.append("Kandidat cukup aktif berbicara")
    else:
        summary_parts.append("Kandidat cenderung pasif dalam berbicara")

    if pauses_avg > 30:
        summary_parts.append("dengan jeda bicara yang sering")
    else:
        summary_parts.append("dengan jeda bicara yang jarang")

    if speed > 160:
        summary_parts.append("dan berbicara dengan kecepatan cukup cepat.")
    elif speed < 120:
        summary_parts.append("dan berbicara dengan kecepatan cenderung lambat.")
    else:
        summary_parts.append("dan kecepatan bicara normal.")

    smile = aggregated["facial_expression_analysis"]["avg_smile_intensity"]
    eyebrow = aggregated["facial_expression_analysis"]["avg_eyebrow_movement_range"]

    if smile < 0.1:
        summary_parts.append("Ekspresi wajah terlihat serius dan minim senyum.")
    else:
        summary_parts.append("Ekspresi wajah cukup positif dan sering tersenyum.")

    if eyebrow < 0.01:
        summary_parts.append("Gerakan alis minim, menunjukkan ekspresi emosional yang rendah.")
    else:
        summary_parts.append("Gerakan alis cukup variatif.")

    eye_contact = aggregated["eye_movement_analysis"]["avg_eye_contact"]
    blink = aggregated["eye_movement_analysis"]["avg_blink_rate"]

    if eye_contact > 80:
        summary_parts.append("Kontak mata sangat baik.")
    else:
        summary_parts.append("Kontak mata kurang konsisten.")

    if blink > 60:
        summary_parts.append("Tingkat kedipan cukup tinggi yang dapat menandakan ketegangan.")
    else:
        summary_parts.append("Tingkat kedipan normal.")

    summary_text = " ".join(summary_parts)
    aggregated["summary"] = summary_text

    # ‚úÖ NEW: Return with confidence scores
    return {
        "total_videos": len(assessment_results),
        "aggregated_non_verbal": aggregated,
        "average_confidence_score": avg_confidence,  # ‚úÖ ADDED
        "overall_confidence_level": confidence_level  # ‚úÖ ADDED
    }

In [161]:
def analyze_interview_video_with_confidence(video_path, audio_path=None):
    """
    Analyze interview video with confidence scoring
    Returns analysis + confidence score (0-100)
    """
    print("üé¨ Memulai analisis interview...")

    # Ekstrak audio jika belum ada
    if audio_path is None:
        print("üì§ Mengekstrak audio dari video...")
        filename = os.path.splitext(os.path.basename(video_path))[0]
        audio_path = os.path.join(AUDIO_DIR, f"{filename}.wav")
        audio_extracted = extract_audio_fixed(video_path, audio_path)
        
        if not audio_extracted:
            return {
                'analysis': {
                    'speech_analysis': {},
                    'facial_expression_analysis': {},
                    'eye_movement_analysis': {},
                    'interpretation': {}
                },
                'confidence_score': 0,
                'confidence_level': 'Failed',
                'confidence_components': {}
            }

    # Analisis non verbal
    print("üé§ Analisis tempo bicara...")
    speech_analysis = analyze_speech_tempo(audio_path)

    print("üòä Analisis ekspresi wajah...")
    facial_analysis = analyze_facial_expressions(video_path)

    print("üëÅÔ∏è Analisis gerakan mata...")
    eye_analysis = analyze_eye_movement(video_path)

    # Gabungan hasil
    analysis_result = {
        "speech_analysis": speech_analysis,
        "facial_expression_analysis": facial_analysis,
        "eye_movement_analysis": eye_analysis,
    }

    # Tambahkan interpretasi
    analysis_result["interpretation"] = interpret_non_verbal_analysis(analysis_result)

    # ‚úÖ CALCULATE CONFIDENCE SCORE
    confidence_components = {}

    # 1Ô∏è‚É£ Speech Analysis Confidence
    speaking_ratio = speech_analysis.get('speaking_ratio', 0)
    speech_rate = speech_analysis.get('speech_rate_wpm', 0)
    
    if speaking_ratio > 0.6 and 100 < speech_rate < 180:
        speech_conf = 95
    elif speaking_ratio > 0.4 and 80 < speech_rate < 200:
        speech_conf = 80
    elif speaking_ratio > 0.2:
        speech_conf = 60
    else:
        speech_conf = 40
    
    confidence_components['speech_confidence'] = speech_conf

    # 2Ô∏è‚É£ Facial Analysis Confidence
    face_detected_pct = facial_analysis.get('face_detected_percentage', 0)
    frames_analyzed = facial_analysis.get('total_frames_analyzed', 0)
    
    if face_detected_pct > 80 and frames_analyzed > 100:
        facial_conf = 95
    elif face_detected_pct > 60 and frames_analyzed > 50:
        facial_conf = 80
    elif face_detected_pct > 40 and frames_analyzed > 20:
        facial_conf = 65
    else:
        facial_conf = 50
    
    confidence_components['facial_confidence'] = facial_conf

    # 3Ô∏è‚É£ Eye Movement Confidence
    eye_contact_pct = eye_analysis.get('eye_contact_percentage', 0)
    blink_rate = eye_analysis.get('blink_rate_per_minute', 0)
    
    if eye_contact_pct > 50 and 15 < blink_rate < 40:
        eye_conf = 90
    elif eye_contact_pct > 30 and 10 < blink_rate < 50:
        eye_conf = 75
    elif eye_contact_pct > 10:
        eye_conf = 60
    else:
        eye_conf = 45
    
    confidence_components['eye_confidence'] = eye_conf

    # 4Ô∏è‚É£ Overall Data Quality
    total_duration = speech_analysis.get('total_duration_seconds', 0)
    
    if total_duration > 30:
        duration_conf = 100
    elif total_duration > 15:
        duration_conf = 85
    elif total_duration > 5:
        duration_conf = 70
    else:
        duration_conf = 50
    
    confidence_components['duration_confidence'] = duration_conf

    # ‚úÖ CALCULATE OVERALL CONFIDENCE
    overall_confidence = int(
        (speech_conf * 0.3) +
        (facial_conf * 0.3) +
        (eye_conf * 0.25) +
        (duration_conf * 0.15)
    )

    # Determine confidence level
    if overall_confidence >= 85:
        confidence_level = "Very High"
    elif overall_confidence >= 75:
        confidence_level = "High"
    elif overall_confidence >= 60:
        confidence_level = "Medium"
    elif overall_confidence >= 45:
        confidence_level = "Low"
    else:
        confidence_level = "Very Low"

    print(f'\n‚úÖ Non-Verbal Analysis Complete')
    print(f'   Confidence: {overall_confidence}% ({confidence_level})')
    print(f'   Components: Speech={speech_conf}%, Face={facial_conf}%, Eye={eye_conf}%, Duration={duration_conf}%\n')

    return {
        'analysis': analysis_result,
        'confidence_score': overall_confidence,
        'confidence_level': confidence_level,
        'confidence_components': confidence_components
    }

<b><h2> Pengecekan model analisis non verbal

In [162]:
!find / -type f -name "*.tflite" 2>/dev/null

The system cannot find the path specified.


<b><h2> Fungsi Transkrip Video

In [163]:
def clean_repetitive_text(text, max_repetitions=3):
    """Remove repetitive patterns at the end of transcription"""
    # Remove excessive repetitions (more than max_repetitions)
    words = text.split()
    if len(words) < 10:
        return text

    # Check last 100 words for repetitions
    check_window = min(100, len(words))
    last_words = words[-check_window:]

    # Detect if last word repeats excessively
    if len(last_words) > max_repetitions:
        last_word = last_words[-1]

        # Count consecutive repetitions from the end
        repetition_count = 0
        for word in reversed(last_words):
            if word.lower() == last_word.lower():
                repetition_count += 1
            else:
                break

        # If repetition exceeds threshold, remove them
        if repetition_count > max_repetitions:
            # Keep only max_repetitions of the repeated word
            words = words[:-repetition_count] + [last_word] * max_repetitions
            print(f'   üßπ Cleaned {repetition_count - max_repetitions} repetitive words')

    # Remove common hallucination patterns
    cleaned_text = ' '.join(words)

    # Pattern: word repeated 5+ times in a row
    cleaned_text = re.sub(r'\b(\w+)(?:\s+\1){4,}\b', r'\1', cleaned_text)

    return cleaned_text.strip()

In [164]:
def transcribe_video(video_path):
    """Transcribe video using faster-whisper with MAXIMUM ACCURACY settings"""
    try:
        if not os.path.exists(video_path):
            raise Exception(f"Video file not found: {video_path}")

        if not os.access(video_path, os.R_OK):
            raise Exception(f"Video file is not readable: {video_path}")

        file_size = os.path.getsize(video_path) / (1024 * 1024)
        print(f'üìÅ Video: {os.path.basename(video_path)} ({file_size:.2f} MB)')

        print('üîÑ Starting transcription...')
        start_time = time.time()

        # Dynamic parameters based on file size
        if file_size > 30:
            print('   ‚ö° Large file - using balanced mode')
            beam_size = 3
            best_of = 3
        else:
            beam_size = 5
            best_of = 5

        # Transcribe with improved hallucination prevention
        segments, info = whisper_model.transcribe(
            video_path,
            language="en",
            task="transcribe",
            beam_size=beam_size,
            best_of=best_of,
            patience=2.0,
            length_penalty=1.0,
            repetition_penalty=1.2,  # INCREASED from 1.0 to 1.2
            temperature=0.0,
            compression_ratio_threshold=2.4,
            log_prob_threshold=-1.0,
            no_speech_threshold=0.6,
            condition_on_previous_text=False,  # CHANGED to False to prevent repetition
            initial_prompt="This is a professional interview conversation in clear English. The speaker is answering interview questions.",
            vad_filter=True,
            vad_parameters=dict(
                threshold=0.5,
                min_speech_duration_ms=250,
                max_speech_duration_s=float('inf'),
                min_silence_duration_ms=2000,
                speech_pad_ms=400
            ),
            word_timestamps=False,
            hallucination_silence_threshold=2.0  # CHANGED from None to 2.0
        )

        # Collect segments with progress bar
        print('   üìù Collecting segments...')
        transcription_text = ""
        segments_list = list(segments)

        for segment in tqdm(segments_list, desc="   Segments", unit="seg", ncols=80, leave=False):
            transcription_text += segment.text + " "

        transcription_text = transcription_text.strip()

        if not transcription_text:
            print('   ‚ö†Ô∏è  No speech detected')
            return "[No speech detected in video]"

        # CLEAN REPETITIVE TEXT
        original_length = len(transcription_text)
        transcription_text = clean_repetitive_text(transcription_text, max_repetitions=3)

        if len(transcription_text) < original_length:
            print(f'   üßπ Cleaned: {original_length} ‚Üí {len(transcription_text)} chars')

        total_time = time.time() - start_time
        words = transcription_text.split()

        print(f'   ‚úÖ Completed in {total_time:.1f}s | {len(segments_list)} segments | {len(words)} words')

        # Cleanup

        gc.collect()

        return transcription_text

    except Exception as e:
        print(f'   ‚ùå Error: {str(e)}')
        gc.collect()
        raise Exception(f"Transcription failed: {str(e)}")

<b><h2> Fungsi Translate to Indonesia

In [165]:
def translate_to_indonesian(text):
    """Translate English text to Indonesian using DeepL"""
    if not translator:
        print('   ‚ö†Ô∏è  Translation skipped (no API key)')
        return "[Translation not available]"

    try:
        max_chunk_size = 5000

        if len(text) <= max_chunk_size:
            result = translator.translate_text(text, source_lang="EN", target_lang="ID")
            translated_text = result.text
        else:
            sentences = text.split('. ')
            translated_sentences = []
            current_chunk = ""

            # Progress bar for translation chunks
            for sentence in tqdm(sentences, desc="   Translation", unit="sent", ncols=80, leave=False):
                if len(current_chunk) + len(sentence) < max_chunk_size:
                    current_chunk += sentence + ". "
                else:
                    if current_chunk:
                        result = translator.translate_text(current_chunk.strip(), source_lang="EN", target_lang="ID")
                        translated_sentences.append(result.text)
                    current_chunk = sentence + ". "

            if current_chunk:
                result = translator.translate_text(current_chunk.strip(), source_lang="EN", target_lang="ID")
                translated_sentences.append(result.text)

            translated_text = " ".join(translated_sentences)

        print(f'   ‚úÖ Translation: {len(text)} ‚Üí {len(translated_text)} chars')
        return translated_text

    except Exception as e:
        print(f'   ‚ùå Translation failed: {str(e)}')
        return f"[Translation failed: {str(e)}]"

In [166]:
def translate_to_indonesian_with_confidence(text):
    """Translate English text to Indonesian using DeepL with confidence scoring"""
    if not translator:
        print('   ‚ö†Ô∏è  Translation skipped (no API key)')
        return {
            'translated_text': "[Translation not available]",
            'confidence_score': 0,
            'confidence_level': 'N/A',
            'quality_metrics': {}
        }

    try:
        max_chunk_size = 5000
        translation_start = time.time()

        if len(text) <= max_chunk_size:
            result = translator.translate_text(text, source_lang="EN", target_lang="ID")
            translated_text = result.text
            chunks_processed = 1
        else:
            sentences = text.split('. ')
            translated_sentences = []
            current_chunk = ""
            chunks_processed = 0

            for sentence in tqdm(sentences, desc="   Translation", unit="sent", ncols=80, leave=False):
                if len(current_chunk) + len(sentence) < max_chunk_size:
                    current_chunk += sentence + ". "
                else:
                    if current_chunk:
                        result = translator.translate_text(current_chunk.strip(), source_lang="EN", target_lang="ID")
                        translated_sentences.append(result.text)
                        chunks_processed += 1
                    current_chunk = sentence + ". "

            if current_chunk:
                result = translator.translate_text(current_chunk.strip(), source_lang="EN", target_lang="ID")
                translated_sentences.append(result.text)
                chunks_processed += 1

            translated_text = " ".join(translated_sentences)

        translation_time = time.time() - translation_start

        # ‚úÖ Calculate confidence score based on:
        # 1. Length similarity (source vs target)
        # 2. Processing time (faster = more confident API response)
        # 3. Character count coverage
        
        source_len = len(text)
        target_len = len(translated_text)
        
        # Length ratio (ideal: 0.8-1.2 for EN‚ÜíID)
        length_ratio = target_len / source_len if source_len > 0 else 0
        if 0.8 <= length_ratio <= 1.2:
            length_confidence = 100
        elif 0.6 <= length_ratio <= 1.4:
            length_confidence = 80
        elif 0.4 <= length_ratio <= 1.6:
            length_confidence = 60
        else:
            length_confidence = 40

        # Processing speed confidence
        chars_per_second = source_len / translation_time if translation_time > 0 else 0
        if chars_per_second > 1000:
            speed_confidence = 100
        elif chars_per_second > 500:
            speed_confidence = 90
        elif chars_per_second > 200:
            speed_confidence = 80
        else:
            speed_confidence = 70

        # API reliability (based on successful chunks)
        if chunks_processed == 1:
            api_confidence = 100  # Single chunk = direct API call
        else:
            api_confidence = 95  # Multiple chunks still reliable

        # Coverage (how much of source was translated)
        if target_len > 0:
            coverage_confidence = min(100, (target_len / source_len) * 100)
        else:
            coverage_confidence = 0

        # ‚úÖ Weighted average
        overall_confidence = int(
            (length_confidence * 0.3) +
            (speed_confidence * 0.2) +
            (api_confidence * 0.3) +
            (coverage_confidence * 0.2)
        )

        # Determine confidence level
        if overall_confidence >= 90:
            confidence_level = "Very High"
        elif overall_confidence >= 80:
            confidence_level = "High"
        elif overall_confidence >= 70:
            confidence_level = "Medium"
        elif overall_confidence >= 50:
            confidence_level = "Low"
        else:
            confidence_level = "Very Low"

        print(f'   ‚úÖ Translation: {source_len} ‚Üí {target_len} chars')
        print(f'   üìä Confidence: {overall_confidence}% ({confidence_level})')
        print(f'      Length: {length_confidence}% | Speed: {speed_confidence}% | API: {api_confidence}% | Coverage: {coverage_confidence:.0f}%')

        return {
            'translated_text': translated_text,
            'confidence_score': overall_confidence,
            'confidence_level': confidence_level,
            'quality_metrics': {
                'length_confidence': length_confidence,
                'speed_confidence': speed_confidence,
                'api_confidence': api_confidence,
                'coverage_confidence': int(coverage_confidence),
                'length_ratio': round(length_ratio, 2),
                'chars_per_second': int(chars_per_second),
                'chunks_processed': chunks_processed,
                'translation_time': round(translation_time, 2)
            }
        }

    except Exception as e:
        print(f'   ‚ùå Translation failed: {str(e)}')
        return {
            'translated_text': f"[Translation failed: {str(e)}]",
            'confidence_score': 0,
            'confidence_level': 'Failed',
            'quality_metrics': {'error': str(e)}
        }

<b><h2> Fungsi Pembuatan Dummy data ( sementara )

In [167]:
def generate_dummy_assessment(transcription_text, position_id, transcription_id=None, question=""):
    """Generate dummy assessment data untuk testing - DEPRECATED, use LLM evaluation instead"""
    words = transcription_text.split()
    word_count = len(words)
    char_count = len(transcription_text)

    confidence_score = random.randint(85, 98)
    kualitas_jawaban = random.randint(80, 100)
    relevansi = random.randint(75, 95)
    koherensi = random.randint(70, 90)
    tempo_bicara = random.randint(80, 100)

    total = round((confidence_score + kualitas_jawaban + relevansi + koherensi + tempo_bicara) / 5)

    if total >= 90:
        penilaian_akhir = 5
    elif total >= 80:
        penilaian_akhir = 4
    elif total >= 70:
        penilaian_akhir = 3
    elif total >= 60:
        penilaian_akhir = 2
    else:
        penilaian_akhir = 1

    has_cheating = random.choice([True, False, False, False])

    if has_cheating:
        cheating_detection = "Ya"
        alasan_cheating = random.choice([
            "Terdeteksi adanya manipulasi suara",
            "Terdeteksi multiple speakers",
            "Pola jawaban tidak konsisten",
            "Kecepatan bicara tidak natural"
        ])
    else:
        cheating_detection = "Tidak"
        alasan_cheating = "Tidak ada indikasi kecurangan"

    analisis_options = [
        "Lancar dan tidak mencurigakan",
        "Sedikit gugup namun natural",
        "Sangat percaya diri",
        "Tempo bicara konsisten",
        "Artikulasi jelas"
    ]
    analisis_non_verbal = random.choice(analisis_options)

    if penilaian_akhir >= 4 and not has_cheating:
        keputusan_akhir = "Lulus"
    elif penilaian_akhir >= 3 and not has_cheating:
        keputusan_akhir = "Pertimbangan"
    else:
        keputusan_akhir = "Tidak Lulus"

    return {
        "penilaian": {
            "confidence_score": confidence_score,
            "kualitas_jawaban": kualitas_jawaban,
            "relevansi": relevansi,
            "koherensi": koherensi,
            "tempo_bicara": tempo_bicara,
            "total": total
        },
        "penilaian_akhir": penilaian_akhir,
        "cheating_detection": cheating_detection,
        "alasan_cheating": alasan_cheating,
        "analisis_non_verbal": analisis_non_verbal,
        "keputusan_akhir": keputusan_akhir,
        "transkripsi_en": transcription_text,
        "transkripsi_id": transcription_id,
        "metadata": {
            "word_count": word_count,
            "char_count": char_count,
            "processed_at": datetime.now(timezone.utc).isoformat(),
            "translation_available": transcription_id is not None  # NEW
        }
    }

<b><h2> Initialize HuggingFace

In [None]:
# ‚úÖ HuggingFace API Token
HF_TOKEN = "TOKENTOKENTOKEN"  # Replace with your actual token
os.environ["HF_TOKEN"] = HF_TOKEN


# Initialize Inference Client
print('üì• Initializing HuggingFace Inference API...')
print('‚ÑπÔ∏è  Using meta-llama/Llama-3.1-8B-Instruct via Inference API')
print('   No model download required - uses cloud API')

client = InferenceClient(api_key=HF_TOKEN)

print('‚úÖ Inference API initialized successfully\n')

def evaluate_with_llm(transcription_text: str, question: str, position_id: int):
    """Evaluate interview answer using Llama-3.1-8B-Instruct via Inference API"""
    try:
        # Construct evaluation prompt
        user_message = f"""You are an expert interview evaluator. Analyze the candidate's answer objectively and provide scores.

Question: "{question}"

Candidate's Answer: "{transcription_text}"

Evaluate the answer on these 3 criteria (score 1-100 for each):
1. Quality of answer (clarity, completeness, depth of knowledge)
2. Coherence (logical flow, consistency, structure)
3. Relevance (alignment with the question, staying on topic)

Return ONLY valid JSON in this exact format:
{{
  "kualitas_jawaban": <score 1-100>,
  "koherensi": <score 1-100>,
  "relevansi": <score 1-100>,
  "analysis": "<brief explanation of the 3 scores>"
}}"""

        print(f'‚îÇ ü§ñ Llama-3.1 Inference API Evaluation (3 criteria)...')

        # Call Inference API
        completion = client.chat.completions.create(
            model="meta-llama/Llama-3.1-8B-Instruct",
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert interview evaluator. Always respond with valid JSON only."
                },
                {
                    "role": "user",
                    "content": user_message
                }
            ],
            max_tokens=500,
            temperature=0.7,
        )

        # Extract response
        response = completion.choices[0].message.content.strip()
        print(f'‚îÇ üì® API Response received ({len(response)} chars)')

        # Extract JSON from response
        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)

        if json_match:
            json_str = json_match.group(0)
            evaluation = json_module.loads(json_str)
        else:
            raise ValueError("No valid JSON found in API response")

        # Validate LLM scores (only 3 criteria)
        required_keys = ['kualitas_jawaban', 'koherensi', 'relevansi']
        for key in required_keys:
            if key not in evaluation:
                raise ValueError(f"Missing required key: {key}")
            # Ensure scores are in valid range
            evaluation[key] = max(1, min(100, int(evaluation[key])))

        # STATIC DUMMY VALUES for tempo_bicara and confidence_score
        evaluation['tempo_bicara'] = 85
        evaluation['confidence_score'] = 82

        print(f'‚îÇ üìä LLM Scores: Quality={evaluation["kualitas_jawaban"]}, Coherence={evaluation["koherensi"]}, Relevance={evaluation["relevansi"]}')
        print(f'‚îÇ üìå Static: Tempo={evaluation["tempo_bicara"]}, Confidence={evaluation["confidence_score"]}')

        # Calculate total from all 5 scores
        total = round((
            evaluation['confidence_score'] +
            evaluation['kualitas_jawaban'] +
            evaluation['relevansi'] +
            evaluation['koherensi'] +
            evaluation['tempo_bicara']
        ) / 5)

        if total >= 90:
            penilaian_akhir = 5
        elif total >= 80:
            penilaian_akhir = 4
        elif total >= 70:
            penilaian_akhir = 3
        elif total >= 60:
            penilaian_akhir = 2
        else:
            penilaian_akhir = 1

        cheating_detected = False
        cheating_reason = "Tidak ada indikasi kecurangan"
        if penilaian_akhir >= 4 and not cheating_detected:
            keputusan_akhir = "Lulus"
        elif penilaian_akhir >= 3 and not cheating_detected:
            keputusan_akhir = "Pertimbangan"
        else:
            keputusan_akhir = "Tidak Lulus"

        print(f'‚îÇ ‚úÖ Total Score: {total}/100 | Rating: {penilaian_akhir}/5 | Decision: {keputusan_akhir}')

        return {
            "scores": evaluation,
            "total": total,
            "penilaian_akhir": penilaian_akhir,
            "cheating_detected": "Ya" if cheating_detected else "Tidak",
            "cheating_reason": cheating_reason,
            "analysis": evaluation.get('analysis', 'No analysis provided'),
            "keputusan_akhir": keputusan_akhir,
            "scoring_method": {
                "llm_evaluated": ["kualitas_jawaban", "koherensi", "relevansi"],
                "static_dummy": ["tempo_bicara", "confidence_score"]
            }
        }

    except Exception as e:
        print(f'‚îÇ ‚ö†Ô∏è  Inference API evaluation failed: {str(e)}')
        print(f'‚îÇ üîÑ Falling back to rule-based assessment...')

        # Fallback
        word_count = len(transcription_text.split())

        if word_count < 10:
            quality_score = 30
            coherence_score = 25
            relevance_score = 20
        elif word_count < 30:
            quality_score = 50
            coherence_score = 48
            relevance_score = 45
        elif word_count < 50:
            quality_score = 70
            coherence_score = 68
            relevance_score = 65
        else:
            quality_score = 85
            coherence_score = 83
            relevance_score = 80

        tempo_bicara = 85
        confidence_score = 82

        total = round((quality_score + coherence_score + relevance_score + tempo_bicara + confidence_score) / 5)

        return {
            "scores": {
                "kualitas_jawaban": quality_score,
                "koherensi": coherence_score,
                "relevansi": relevance_score,
                "tempo_bicara": tempo_bicara,
                "confidence_score": confidence_score
            },
            "total": total,
            "penilaian_akhir": 3 if total >= 70 else 2,
            "cheating_detected": "Tidak",
            "cheating_reason": "Tidak ada indikasi kecurangan",
            "analysis": f"Fallback assessment based on word count ({word_count} words). Inference API evaluation failed.",
            "keputusan_akhir": "Pertimbangan" if total >= 70 else "Tidak Lulus",
            "scoring_method": {
                "llm_evaluated": [],
                "static_dummy": ["kualitas_jawaban", "koherensi", "relevansi", "tempo_bicara", "confidence_score"],
                "fallback": True
            }
        }

üì• Initializing HuggingFace Inference API...
‚ÑπÔ∏è  Using meta-llama/Llama-3.1-8B-Instruct via Inference API
   No model download required - uses cloud API
‚úÖ Inference API initialized successfully



<b><h2> Pembuatan Json Final

In [169]:
def process_transcriptions_sync(session_id: str, candidate_name: str, uploaded_videos: list, base_url: str):
    """Background transcription processing"""
    try:
        print(f'\n{"="*70}')
        print(f'üéôÔ∏è  SESSION: {session_id}')
        print(f'üë§ CANDIDATE: {candidate_name}')
        print(f'üìπ VIDEOS: {len(uploaded_videos)}')
        print(f'{"="*70}\n')

        transcriptions = []
        assessment_results = []

        with processing_lock:
            processing_status[session_id] = {'status': 'processing', 'progress': '0/0'}

        # Process each video with overall progress bar
        for idx, interview in enumerate(tqdm(uploaded_videos, desc="üé¨ Overall Progress", unit="video", ncols=80), 1):
            if not interview.get('isVideoExist') or not interview.get('recordedVideoUrl'):
                transcriptions.append({
                    'positionId': interview['positionId'],
                    'error': interview.get('error', 'Video upload failed')
                })
                continue

            position_id = interview['positionId']
            video_url = interview['recordedVideoUrl']
            question = interview.get('question', '')

            try:
                print(f'\n‚îå‚îÄ Video {position_id}/{len(uploaded_videos)} ‚îÄ{"‚îÄ"*50}‚îê')
                if question:
                    print(f'‚îÇ ‚ùì Question: {question[:60]}{"..." if len(question) > 60 else ""}')

                local_file = get_local_file_path(video_url)
                if not local_file:
                    raise Exception(f"Local file not found")

                file_size_mb = os.path.getsize(local_file) / (1024 * 1024)

                with processing_lock:
                    processing_status[session_id] = {
                        'status': 'processing',
                        'progress': f'{position_id}/{len(uploaded_videos)}',
                        'current_video': position_id,
                        'message': f'Processing video {position_id}/{len(uploaded_videos)}...'
                    }

                video_start = time.time()

                # Step 1: Transcribe
                print(f'‚îÇ 1Ô∏è‚É£  TRANSCRIPTION ({file_size_mb:.1f} MB)')
                transcription_text = transcribe_video(local_file)
                transcribe_time = time.time() - video_start

                # Step 2: Translate WITH CONFIDENCE SCORE
                print(f'‚îÇ 2Ô∏è‚É£  TRANSLATION')
                translate_start = time.time()
                with processing_lock:
                    processing_status[session_id]['message'] = f'Translating video {position_id}...'

                # ‚úÖ NEW: Get translation with confidence
                translation_result = translate_to_indonesian_with_confidence(transcription_text)
                transcription_id = translation_result['translated_text']
                translation_confidence = translation_result['confidence_score']
                translation_confidence_level = translation_result['confidence_level']
                
                translate_time = time.time() - translate_start
                print(f'‚îÇ    üìä Translation Confidence: {translation_confidence}% ({translation_confidence_level})')

                # Step 3: CHEATING DETECTION
                print(f'‚îÇ 2Ô∏è‚É£¬Ω CHEATING DETECTION')
                cheating_start = time.time()
                with processing_lock:
                    processing_status[session_id]['message'] = f'Analyzing cheating patterns in video {position_id}...'

                cheating_result = advanced_cheating_detection(local_file, transcription_text)
                cheating_time = time.time() - cheating_start

                # Step 4: NON-VERBAL ANALYSIS WITH CONFIDENCE SCORE
                print(f'‚îÇ 2Ô∏è‚É£¬æ NON-VERBAL ANALYSIS')
                non_verbal_start = time.time()
                with processing_lock:
                    processing_status[session_id]['message'] = f'Analyzing non verbal in video {position_id}...'

                # ‚úÖ NEW: Get non-verbal with confidence
                non_verbal_result = analyze_interview_video_with_confidence(
                    video_path=local_file,
                    audio_path=None
                )

                non_verbal_time = time.time() - non_verbal_start
                print(f'‚îÇ    üìä Non-Verbal Confidence: {non_verbal_result["confidence_score"]}% ({non_verbal_result["confidence_level"]})')

                # Step 5: LLM Evaluation
                print(f'‚îÇ 3Ô∏è‚É£  AI ASSESSMENT')
                llm_start = time.time()
                with processing_lock:
                    processing_status[session_id]['message'] = f'Evaluating video {position_id} with AI...'

                llm_evaluation = evaluate_with_llm(transcription_text, question, position_id)
                llm_time = time.time() - llm_start

                # Step 6: Save
                print(f'‚îÇ 4Ô∏è‚É£  SAVING FILES')
                trans_fname = f"transcription_pos{position_id}_{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex}.txt"
                trans_path = os.path.join(TRANSCRIPTION_DIR, trans_fname)

                with open(trans_path, 'w', encoding='utf-8') as f:
                    f.write(f"Candidate: {candidate_name}\n")
                    f.write(f"Position ID: {position_id}\n")
                    f.write(f"Question: {question}\n")
                    f.write(f"Video URL: {video_url}\n")
                    f.write(f"Transcribed at: {datetime.now(timezone.utc).isoformat()}\n")
                    f.write(f"Model: faster-whisper large-v3\n")
                    f.write(f"Processing time: {transcribe_time:.1f}s\n")
                    f.write(f"\n{'='*50}\n")
                    f.write(f"ENGLISH TRANSCRIPTION:\n")
                    f.write(f"{'='*50}\n\n")
                    f.write(transcription_text)
                    f.write(f"\n\n{'='*50}\n")
                    f.write(f"INDONESIAN TRANSLATION (DeepL):\n")
                    f.write(f"Confidence: {translation_confidence}% ({translation_confidence_level})\n")
                    f.write(f"{'='*50}\n\n")
                    f.write(transcription_id)
                    f.write(f"\n\n{'='*50}\n")
                    f.write(f"CHEATING DETECTION RESULTS:\n")
                    f.write(f"{'='*50}\n\n")
                    f.write(json.dumps(cheating_result, indent=2, ensure_ascii=False))
                    f.write(f"\n\n{'='*50}\n")
                    f.write(f"NON-VERBAL ANALYSIS:\n")
                    f.write(f"Confidence: {non_verbal_result['confidence_score']}% ({non_verbal_result['confidence_level']})\n")
                    f.write(f"{'='*50}\n\n")
                    f.write(json.dumps(non_verbal_result, indent=2, ensure_ascii=False))
                    f.write(f"\n\n{'='*50}\n")
                    f.write(f"AI ASSESSMENT:\n")
                    f.write(f"{'='*50}\n\n")
                    f.write(json.dumps(llm_evaluation, indent=2, ensure_ascii=False))

                transcription_url = f"{base_url}/transcriptions/{trans_fname}"

                # ‚úÖ Build final assessment
                words = transcription_text.split()

                assessment = {
                    "penilaian": {
                        "confidence_score": llm_evaluation['scores']['confidence_score'],
                        "kualitas_jawaban": llm_evaluation['scores']['kualitas_jawaban'],
                        "relevansi": llm_evaluation['scores']['relevansi'],
                        "koherensi": llm_evaluation['scores']['koherensi'],
                        "tempo_bicara": llm_evaluation['scores']['tempo_bicara'],
                        "analisis_llm": llm_evaluation['analysis'],
                        "total": llm_evaluation['total']
                    },
                    "penilaian_akhir": llm_evaluation['penilaian_akhir'],

                    # Cheating Detection
                    "cheating_detection": cheating_result.get('cheating_status', 'Tidak'),
                    "cheating_score": cheating_result.get('cheating_score', 0),
                    "cheating_confidence_score": cheating_result.get('confidence_score', 0),
                    "cheating_confidence_level": cheating_result.get('confidence_level', 'N/A'),
                    "alasan_cheating": ', '.join(cheating_result.get('indicators', [])) if cheating_result.get('indicators') else 'Tidak ada indikasi kecurangan',
                    "cheating_details": {
                        **cheating_result.get('details', {}),
                        "confidence_components": cheating_result.get('confidence_components', {})
                    },

                    # ‚úÖ Non-Verbal Analysis (with confidence)
                    "non_verbal_analysis": non_verbal_result['analysis'],
                    "non_verbal_confidence_score": non_verbal_result['confidence_score'],
                    "non_verbal_confidence_level": non_verbal_result['confidence_level'],
                    "non_verbal_confidence_components": non_verbal_result['confidence_components'],

                    "keputusan_akhir": llm_evaluation['keputusan_akhir'],
                    "transkripsi_en": transcription_text,
                    "transkripsi_id": transcription_id,
                    
                    # ‚úÖ Translation Confidence
                    "translation_confidence_score": translation_confidence,
                    "translation_confidence_level": translation_confidence_level,
                    
                    "metadata": {
                        "word_count": len(words),
                        "char_count": len(transcription_text),
                        "processed_at": datetime.now(timezone.utc).isoformat(),
                        "translation_available": True,
                        "llm_evaluation_time": round(llm_time, 2),
                        "cheating_detection_time": round(cheating_time, 2),
                        "non_verbal_analysis_time": round(non_verbal_time, 2),
                        "assessment_method": "Hybrid (LLM + Non-Verbal + Cheating Detection)",
                        "llm_evaluated_criteria": llm_evaluation.get('scoring_method', {}).get('llm_evaluated', []),
                        "static_criteria": llm_evaluation.get('scoring_method', {}).get('static_dummy', []),
                        "non_verbal_features": ["Speech Tempo", "Facial Expression", "Eye Movement"],
                        "cheating_methods": ["Diarization", "Eye Detection", "Text Pattern", "Audio Quality"],
                        "cheating_indicators_count": len(cheating_result.get('indicators', [])),
                        "cheating_confidence_breakdown": cheating_result.get('confidence_components', {}),
                        "non_verbal_confidence_breakdown": non_verbal_result['confidence_components'],
                        "translation_quality_metrics": translation_result.get('quality_metrics', {})
                    }
                }

                assessment_results.append({
                    "id": position_id,
                    "question": question,
                    "result": assessment
                })

                transcriptions.append({
                    'positionId': position_id,
                    'question': question,
                    'videoUrl': video_url,
                    'transcription': transcription_text,
                    'transcription_id': transcription_id,
                    'transcriptionUrl': transcription_url,
                    'transcriptionFile': trans_fname,
                    'assessment': assessment
                })

                # Delete video
                if os.path.exists(local_file):
                    os.remove(local_file)
                    print(f'‚îÇ üóëÔ∏è  Video deleted ({file_size_mb:.1f} MB freed)')

                total_time = time.time() - video_start
                print(f'‚îÇ ‚è±Ô∏è  Total: {total_time:.1f}s')
                print(f'‚îÇ üìä Confidence Scores:')
                print(f'‚îÇ    Translation: {translation_confidence}%')
                print(f'‚îÇ    Non-Verbal: {non_verbal_result["confidence_score"]}%')
                print(f'‚îÇ    Cheating: {cheating_result.get("confidence_score", 0)}%')
                print(f'‚îî‚îÄ{"‚îÄ"*68}‚îò')

                gc.collect()

            except Exception as e:
                print(f'‚îÇ ‚ùå ERROR: {str(e)}')
                print(f'‚îî‚îÄ{"‚îÄ"*68}‚îò')

                transcriptions.append({
                    'positionId': position_id,
                    'question': question,
                    'videoUrl': video_url,
                    'error': str(e)
                })


        # ============================================================================
        # ‚úÖ NEW: Calculate aggregate cheating analysis
        # ============================================================================
        aggregate_cheating = calculate_aggregate_cheating_analysis(assessment_results)

        print(f'\n{"="*70}')
        print(f'üö® AGGREGATE CHEATING ANALYSIS')
        print(f'{"="*70}')
        print(f'Overall Status: {aggregate_cheating["overall_cheating_status"]} ({aggregate_cheating["risk_level"]})')
        print(f'Confidence: {aggregate_cheating["confidence_level"]}')
        print(f'Videos Flagged: {aggregate_cheating["videos_flagged"]}/{aggregate_cheating["total_videos"]} ({aggregate_cheating["flagged_percentage"]}%)')
        print(f'Average Score: {aggregate_cheating["overall_cheating_score"]}/100')
        print(f'Recommendation: {aggregate_cheating["recommendation"]}')
        print(f'Summary: {aggregate_cheating["summary"]}')
        print(f'{"="*70}\n')


        aggregate_non_verbal = summarize_non_verbal_batch(assessment_results)

        # Save final results
        if assessment_results:
            results_json = {
               "success": True,
                "name": candidate_name,
                "session": session_id,
                "content": assessment_results,
                "aggregate_cheating_analysis": aggregate_cheating,
                "aggregate_non_verbal_analysis": aggregate_non_verbal,
                "metadata": {
                    "total_videos": len(uploaded_videos),
                    "successful_videos": len(assessment_results),
                    "processed_at": datetime.now(timezone.utc).isoformat(),
                    "model": "faster-whisper large-v3",
                    "llm_model": "meta-llama/Llama-3.1-8B-Instruct",
                    "assessment_method": "Hybrid (LLM + Diarization + Eye Detection + Aggregate Analysis)",
                    "llm_criteria": ["kualitas_jawaban", "koherensi", "relevansi"],
                    "static_criteria": ["tempo_bicara", "confidence_score"],
                    "cheating_detection_methods": [
                    "Per-Video: Diarization, Eye Detection, Text Pattern, Audio Quality",
                    "Aggregate: Cross-video pattern analysis, Risk scoring"],
                    "videos_deleted": True,
                    "translation_provider": "DeepL",
                    "translation_language": "Indonesian (ID)"
                }
            }

            results_filename = f"{session_id}.json"
            results_path = os.path.join(RESULTS_DIR, results_filename)

            with open(results_path, 'w', encoding='utf-8') as f:
                json.dump(results_json, f, ensure_ascii=False, indent=2)

            results_url = f"{base_url}/results/{results_filename}"
            print(f'\nüíæ Results saved: {results_url}')

        successful_count = sum(1 for t in transcriptions if 'transcription' in t)

        with processing_lock:
            processing_status[session_id] = {
                'status': 'completed',
                'result': {
                    'success': True,
                    'transcriptions': transcriptions,
                    'processed_videos': len(transcriptions),
                    'successful_videos': successful_count,
                    'failed_videos': len(transcriptions) - successful_count,
                    'results_url': f"{base_url}/results/{session_id}.json" if assessment_results else None
                }
            }

        print(f'\n{"="*70}')
        print(f'‚úÖ SESSION COMPLETED')
        print(f'   Success: {successful_count}/{len(transcriptions)} videos')
        print(f'{"="*70}\n')

    except Exception as e:
        print(f'\n‚ùå SESSION ERROR:\n{traceback.format_exc()}')

        with processing_lock:
            processing_status[session_id] = {
                'status': 'error',
                'error': str(e),
                'error_detail': traceback.format_exc()
            }

<b><h2> ENDPOINT

In [170]:
# ENDPOINTS
@app.post('/upload')
async def receive_videos_and_process(
    request: Request,
    candidate_name: str = Form(...),
    videos: List[UploadFile] = File(...),
    questions: List[str] = Form(...)  # NEW: Accept questions array
):
    """Upload videos and start background transcription"""
    session_id = uuid.uuid4().hex
    print(f'\nüîµ NEW UPLOAD REQUEST - Session: {session_id}')
    print(f'   Candidate: {candidate_name}')
    print(f'   Videos: {len(videos)} file(s)')
    print(f'   Questions: {len(questions)} question(s)')  # NEW

    # NEW: Validate questions count matches videos count
    if len(questions) != len(videos):
        return JSONResponse(
            content={
                'success': False,
                'error': f'Questions count ({len(questions)}) must match videos count ({len(videos)})'
            },
            status_code=400,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
            }
        )

    # Initialize status FIRST
    with processing_lock:
        processing_status[session_id] = {
            'status': 'uploading',
            'progress': '0/0',
            'message': 'Uploading videos...'
        }

    try:
        # 1. Upload semua video (fast)
        base_url = str(request.base_url).rstrip('/')
        uploaded_videos = []

        print(f'\nüì§ Uploading {len(videos)} video(s)...')
        for idx, (video, question) in enumerate(zip(videos, questions), 1):  # NEW: zip with questions
            try:
                ext = os.path.splitext(video.filename)[1] or '.webm'
                safe_name = f"{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex}{ext}"
                dest_path = os.path.join(UPLOAD_DIR, safe_name)

                # Update upload progress
                with processing_lock:
                    processing_status[session_id]['message'] = f'Uploading video {idx}/{len(videos)}...'
                    processing_status[session_id]['progress'] = f'{idx}/{len(videos)}'

                with open(dest_path, 'wb') as buffer:
                    shutil.copyfileobj(video.file, buffer)

                file_url = f"{base_url}/uploads/{safe_name}"
                uploaded_videos.append({
                    'positionId': idx,
                    'question': question,  # NEW: Include question
                    'isVideoExist': True,
                    'recordedVideoUrl': file_url,
                    'filename': safe_name
                })
                print(f'   ‚úÖ Uploaded: {safe_name} | Q: {question[:50]}{"..." if len(question) > 50 else ""}')  # NEW

            except Exception as e:
                print(f'   ‚ùå Failed: {str(e)}')
                uploaded_videos.append({
                    'positionId': idx,
                    'question': question if idx <= len(questions) else '',  # NEW: Include question even on error
                    'isVideoExist': False,
                    'recordedVideoUrl': None,
                    'error': str(e)
                })

        # 2. Update status to processing
        with processing_lock:
            processing_status[session_id] = {
                'status': 'processing',
                'progress': '0/' + str(len(uploaded_videos)),
                'message': 'Starting transcription...',
                'uploaded_videos': len(uploaded_videos)
            }

        # 3. Start background thread
        thread = th.Thread(
            target=process_transcriptions_sync,
            args=(session_id, candidate_name, uploaded_videos, base_url),
            daemon=True
        )
        thread.start()

        print(f'‚úÖ Upload complete. Background thread started.')
        print(f'üì§ Returning immediate response with session_id: {session_id}')

        # 4. RETURN IMMEDIATELY - no waiting!
        return JSONResponse(
            content={
                'success': True,
                'session_id': session_id,
                'message': 'Videos uploaded successfully. Processing started.',
                'uploaded_videos': len(uploaded_videos)
            },
            status_code=200,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
            }
        )

    except Exception as e:
        error_detail = traceback.format_exc()
        print(f'‚ùå Error:\n{error_detail}')

        # Update status to error
        with processing_lock:
            processing_status[session_id] = {
                'status': 'error',
                'error': str(e),
                'error_detail': error_detail
            }

        return JSONResponse(
            content={
                'success': False,
                'session_id': session_id,
                'error': str(e)
            },
            status_code=500,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
            }
        )

In [171]:
@app.get('/status/{session_id}')
async def get_processing_status(session_id: str):
    """Check processing status"""
    with processing_lock:
        if session_id not in processing_status:
            return JSONResponse(
                {
                    'status': 'not_found',
                    'message': 'Session not found'
                },
                status_code=404,
                headers={
                    'Access-Control-Allow-Origin': '*',
                    'Access-Control-Allow-Methods': 'GET, OPTIONS',
                    'Access-Control-Allow-Headers': '*',
                    'Cache-Control': 'no-cache, no-store, must-revalidate',
                }
            )

        status_copy = processing_status[session_id].copy()

    # Add redirect URL if completed
    if status_copy.get('status') == 'completed':
        status_copy['redirect'] = f"halaman_dasboard.html?session={session_id}"

    return JSONResponse(
        status_copy,
        headers={
            'Access-Control-Allow-Origin': '*',
            'Access-Control-Allow-Methods': 'GET, OPTIONS',
            'Access-Control-Allow-Headers': '*',
            'Cache-Control': 'no-cache, no-store, must-revalidate',
        }
    )


In [172]:
@app.get('/results/{session_id}')
async def get_results(session_id: str):
    """Get assessment results for a session"""
    results_filename = f"{session_id}.json"
    results_path = os.path.join(RESULTS_DIR, results_filename)

    if not os.path.exists(results_path):
        return JSONResponse(
            {
                'success': False,
                'message': 'Results not found for this session',
                'session_id': session_id
            },
            status_code=404,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
            }
        )

    try:
        with open(results_path, 'r', encoding='utf-8') as f:
            results_data = json.load(f)

        return JSONResponse(
            results_data,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
                'Cache-Control': 'no-cache, no-store, must-revalidate',
            }
        )
    except Exception as e:
        return JSONResponse(
            {
                'success': False,
                'message': f'Error reading results: {str(e)}',
                'session_id': session_id
            },
            status_code=500,
            headers={
                'Access-Control-Allow-Origin': '*',
                'Access-Control-Allow-Methods': 'GET, OPTIONS',
                'Access-Control-Allow-Headers': '*',
            }
        )


In [173]:
@app.get('/')
async def index():
    return {
        'message': 'AI Interview Assessment System',
        'model': 'faster-whisper large-v3',
        'accuracy': '98%+ for clear English speech',
        'speed': '4-5x faster than standard Whisper',
        'endpoints': {
            'upload': 'POST /upload',
            'status': 'GET /status/{session_id}',
            'results': 'GET /results/{session_id}',
            'test_form': 'GET /upload_form'
        }
    }

<b><h2> LOCAL SERVER

In [174]:
# Jalankan server uvicorn di dalam notebook (tanpa ngrok)
nest_asyncio.apply()
PORT = 8888

# Hentikan server sebelumnya jika ada
if 'server_thread' in globals() and server_thread is not None:
    try:
        print('‚è∏Ô∏è  Stopping previous server...')
        if 'server' in globals() and server is not None:
            server.should_exit = True
        # Tunggu thread selesai (dengan timeout)
        if server_thread.is_alive():
            server_thread.join(timeout=2)
        print('‚úÖ Previous server stopped.')
    except Exception as e:
        print(f'‚ö†Ô∏è  Error stopping previous server: {e}')

# Buat server instance baru dengan log level yang lebih rendah
config = uvicorn.Config(
    app=app,
    host='0.0.0.0',
    port=PORT,
    log_level='warning',  # Kurangi verbosity untuk menghindari duplikasi log
    access_log=False  # Nonaktifkan access log di console
)
server = uvicorn.Server(config=config)

# Fungsi untuk menjalankan server di thread
def run_server_in_thread():
    # Buat event loop baru untuk thread ini
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
        loop.run_until_complete(server.serve())
    except Exception as e:
        print(f'‚ùå Server error: {e}')
    finally:
        loop.close()

# Jalankan server di background thread
server_thread = threading.Thread(target=run_server_in_thread, daemon=True)
server_thread.start()

print('‚îÅ' * 60)
print('üöÄ Server started successfully!')
print(f'üìç Local URL: http://127.0.0.1:{PORT}')
print(f'üìç Network URL: http://0.0.0.0:{PORT}')
print(f'üîß Endpoints:')
print(f'   - POST /upload       (upload videos & process)')
print(f'   - POST /upload_json  (upload JSON & download videos)')
print(f'   - GET  /status/{{id}}  (check processing status)')
print(f'   - GET  /results/{{id}} (get assessment results)')
print(f'   - GET  /upload_form  (test form)')
print('‚ÑπÔ∏è  Use Interrupt Kernel to stop the server')
print('‚îÅ' * 60)

‚è∏Ô∏è  Stopping previous server...
‚úÖ Previous server stopped.
‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ
üöÄ Server started successfully!
üìç Local URL: http://127.0.0.1:8888
üìç Network URL: http://0.0.0.0:8888
üîß Endpoints:
   - POST /upload       (upload videos & process)
   - POST /upload_json  (upload JSON & download videos)
   - GET  /status/{id}  (check processing status)
   - GET  /results/{id} (get assessment results)
   - GET  /upload_form  (test form)
‚ÑπÔ∏è  Use Interrupt Kernel to stop the server
‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ


<b><h2> NGROK

In [175]:
# Configure ngrok
# Set ngrok authtoken (dapatkan dari https://dashboard.ngrok.com/get-started/your-authtoken)
NGROK_AUTH_TOKEN = getpass.getpass('Enter your ngrok authtoken: ')
conf.get_default().auth_token = NGROK_AUTH_TOKEN

print('‚úÖ Ngrok configured successfully')

‚úÖ Ngrok configured successfully


In [None]:
# Start server with ngrok
nest_asyncio.apply()
PORT = 8888

# Stop previous server if exists
if 'server_thread' in globals() and server_thread is not None:
    try:
        print('‚è∏Ô∏è  Stopping previous server...')
        if 'server' in globals() and server is not None:
            server.should_exit = True
        if server_thread.is_alive():
            server_thread.join(timeout=2)
        print('‚úÖ Previous server stopped.')
    except Exception as e:
        print(f'‚ö†Ô∏è  Error stopping previous server: {e}')

# Close previous ngrok tunnels
try:
    ngrok.kill()
except:
    pass

# Create server instance
config = uvicorn.Config(
    app=app,
    host='0.0.0.0',
    port=PORT,
    log_level='warning',
    access_log=False
)
server = uvicorn.Server(config=config)

# Run server in thread
def run_server_in_thread():
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
        loop.run_until_complete(server.serve())
    except Exception as e:
        print(f'‚ùå Server error: {e}')
    finally:
        loop.close()

server_thread = threading.Thread(target=run_server_in_thread, daemon=True)
server_thread.start()

# Wait for server to start
time.sleep(2)

# Start ngrok tunnel
public_url = ngrok.connect(PORT, bind_tls=True)
ngrok_url = public_url.public_url

print('‚îè' + '‚îÅ' * 70 + '‚îì')
print('üöÄ Server started successfully with ngrok!')
print(f'üìç Local URL: http://127.0.0.1:{PORT}')
print(f'üåê Public URL (ngrok): {ngrok_url}')
print(f'üìã Copy this URL to use in Upload.js:')
print(f'   const VIDEO_ENDPOINT = "{ngrok_url}/upload";')
print(f'üìß Endpoints:')
print(f'   - POST {ngrok_url}/upload')
print(f'   - GET  {ngrok_url}/status/{{id}}')
print(f'   - GET  {ngrok_url}/results/{{id}}')
print(f'   - GET  {ngrok_url}/upload_form')
print('‚ÑπÔ∏è  Ngrok tunnel will stay active while notebook is running')
print('‚ÑπÔ∏è  Use Interrupt Kernel to stop the server')
print('‚îó' + '‚îÅ' * 70 + '‚îõ')

‚è∏Ô∏è  Stopping previous server...
‚úÖ Previous server stopped.


t=2025-12-03T08:35:46+0700 lvl=eror msg="unable to evaluate ngrok agent binary path for symlinks" obj=tunnels.session err="CreateFile C:\\Users\\NFSYNX\\AppData\\Local\\ngrok: The system cannot find the file specified."


‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
üöÄ Server started successfully with ngrok!
üìç Local URL: http://127.0.0.1:8888
üåê Public URL (ngrok): https://redemptory-lavern-fiendishly.ngrok-free.dev
üìã Copy this URL to use in Upload.js:
   const VIDEO_ENDPOINT = "https://redemptory-lavern-fiendishly.ngrok-free.dev/upload";
üìß Endpoints:
   - POST https://redemptory-lavern-fiendishly.ngrok-free.dev/upload
   - GET  https://redemptory-lavern-fiendishly.ngrok-free.dev/status/{id}
   - GET  https://redemptory-lavern-fiendishly.ngrok-free.dev/results/{id}
   - GET  https://redemptory-lavern-fiendishly.ngrok-free.dev/upload_form
‚ÑπÔ∏è  Ngrok tunnel will stay active while notebook is running
‚ÑπÔ∏è  Use Interrupt Kernel to stop the server
‚îó‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ


üîµ NEW UPLOAD REQUEST - Session: a59dfd4cf38a4349ad097976cb8d5ac3
   Candidate: test
   Videos: 2 file(s)
   Questions: 2 question(s)

üì§ Uploading 2 video(s)...
   ‚úÖ Uploaded: 20251203013630_517eb07acb3e4afea3b79aac4f68db58.webm | Q: Can you share any specific challenges you faced wh...
   ‚úÖ Uploaded: 20251203013630_2ad2ae136bee4f47be1a23f34e0be1a4.webm | Q: Can you describe your experience with transfer lea...

üéôÔ∏è  SESSION: a59dfd4cf38a4349ad097976cb8d5ac3
üë§ CANDIDATE: test
üìπ VIDEOS: 2

‚úÖ Upload complete. Background thread started.
üì§ Returning immediate response with session_id: a59dfd4cf38a4349ad097976cb8d5ac3


üé¨ Overall Progress:   0%|                             | 0/2 [00:00<?, ?video/s]


‚îå‚îÄ Video 1/2 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ ‚ùì Question: Can you share any specific challenges you faced while workin...
‚îÇ 1Ô∏è‚É£  TRANSCRIPTION (17.1 MB)
üìÅ Video: 20251203013630_517eb07acb3e4afea3b79aac4f68db58.webm (17.12 MB)
üîÑ Starting transcription...
   üìù Collecting segments...




   üßπ Cleaned: 763 ‚Üí 755 chars
   ‚úÖ Completed in 51.9s | 9 segments | 128 words
‚îÇ 2Ô∏è‚É£  TRANSLATION
   ‚úÖ Translation: 755 ‚Üí 817 chars
   üìä Confidence: 98% (Very High)
      Length: 100% | Speed: 90% | API: 100% | Coverage: 100%
‚îÇ    üìä Translation Confidence: 98% (Very High)
‚îÇ 2Ô∏è‚É£¬Ω CHEATING DETECTION
   üö® Advanced Cheating Detection:
   ‚îÇ 1Ô∏è‚É£  Speaker Diarization Check
   üé§ Performing speaker diarization (Silero VAD)...
   ‚îÇ ‚úÖ Silero VAD model loaded
   ‚îÇ Attempting to load audio...
   ‚îÇ ‚ö†Ô∏è  torchaudio load failed: Could not load libtorchcodec. Likely causes:
     
   ‚îÇ Fallback: Using pydub to extract audio...
   ‚îÇ ‚úÖ Audio extracted via pydub: 1 channels @ 16000Hz
   ‚îÇ ‚ÑπÔ∏è  Audio duration: 93.0s
   ‚îÇ Analyzing speech patterns...
   ‚îÇ ‚ÑπÔ∏è  Detected 161 speech segments
   ‚îÇ ‚ÑπÔ∏è  Avg segment: 0.1s | Long pauses: 10
   ‚îÇ ‚úÖ Analysis complete: 1 speaker(s)
   ‚îÇ    Confidence: MEDIUM
   ‚îÇ    Reasoning: Monolo

  y, sr = librosa.load(video_path, sr=16000, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


   ‚úÖ Audio berhasil diekstrak: d:\Interview_Assesment_System-ngrok-raifal\audio\20251203013630_517eb07acb3e4afea3b79aac4f68db58.wav
üé§ Analisis tempo bicara...
üòä Analisis ekspresi wajah...
üëÅÔ∏è Analisis gerakan mata...

‚úÖ Non-Verbal Analysis Complete
   Confidence: 87% (Very High)
   Components: Speech=95%, Face=95%, Eye=60%, Duration=100%

‚îÇ    üìä Non-Verbal Confidence: 87% (Very High)
‚îÇ 3Ô∏è‚É£  AI ASSESSMENT
‚îÇ ü§ñ Llama-3.1 Inference API Evaluation (3 criteria)...


üé¨ Overall Progress:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå          | 1/2 [01:24<01:24, 84.76s/video]

‚îÇ üì® API Response received (583 chars)
‚îÇ üìä LLM Scores: Quality=60, Coherence=30, Relevance=40
‚îÇ üìå Static: Tempo=85, Confidence=82
‚îÇ ‚úÖ Total Score: 59/100 | Rating: 1/5 | Decision: Tidak Lulus
‚îÇ 4Ô∏è‚É£  SAVING FILES
‚îÇ üóëÔ∏è  Video deleted (17.1 MB freed)
‚îÇ ‚è±Ô∏è  Total: 84.6s
‚îÇ üìä Confidence Scores:
‚îÇ    Translation: 98%
‚îÇ    Non-Verbal: 87%
‚îÇ    Cheating: 90.5%
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò

‚îå‚îÄ Video 2/2 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ ‚ùì Question: Can you describe your experience with transfer learning in T...
‚îÇ 1Ô∏è‚É£  TRANSCRIPTION (21.0 MB)
üìÅ Video: 20251203013630_2ad2ae136bee4f47be1a23f34e0be1a4.webm (20.96 MB)
üîÑ Starting 



   üßπ Cleaned: 1008 ‚Üí 992 chars
   ‚úÖ Completed in 76.3s | 17 segments | 163 words
‚îÇ 2Ô∏è‚É£  TRANSLATION
   ‚úÖ Translation: 992 ‚Üí 1109 chars
   üìä Confidence: 98% (Very High)
      Length: 100% | Speed: 90% | API: 100% | Coverage: 100%
‚îÇ    üìä Translation Confidence: 98% (Very High)
‚îÇ 2Ô∏è‚É£¬Ω CHEATING DETECTION
   üö® Advanced Cheating Detection:
   ‚îÇ 1Ô∏è‚É£  Speaker Diarization Check
   üé§ Performing speaker diarization (Silero VAD)...
   ‚îÇ ‚úÖ Silero VAD model loaded
   ‚îÇ Attempting to load audio...
   ‚îÇ ‚ö†Ô∏è  torchaudio load failed: Could not load libtorchcodec. Likely causes:
     
   ‚îÇ Fallback: Using pydub to extract audio...
   ‚îÇ ‚úÖ Audio extracted via pydub: 1 channels @ 16000Hz
   ‚îÇ ‚ÑπÔ∏è  Audio duration: 114.7s
   ‚îÇ Analyzing speech patterns...
   ‚îÇ ‚ÑπÔ∏è  Detected 258 speech segments
   ‚îÇ ‚ÑπÔ∏è  Avg segment: 0.1s | Long pauses: 7
   ‚îÇ ‚úÖ Analysis complete: 1 speaker(s)
   ‚îÇ    Confidence: MEDIUM
   ‚îÇ    Reasoning: Mon

üé¨ Overall Progress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [03:25<00:00, 102.58s/video]

‚îÇ üì® API Response received (551 chars)
‚îÇ üìä LLM Scores: Quality=60, Coherence=40, Relevance=70
‚îÇ üìå Static: Tempo=85, Confidence=82
‚îÇ ‚úÖ Total Score: 67/100 | Rating: 2/5 | Decision: Tidak Lulus
‚îÇ 4Ô∏è‚É£  SAVING FILES
‚îÇ üóëÔ∏è  Video deleted (21.0 MB freed)
‚îÇ ‚è±Ô∏è  Total: 120.3s
‚îÇ üìä Confidence Scores:
‚îÇ    Translation: 98%
‚îÇ    Non-Verbal: 87%
‚îÇ    Cheating: 89.3%
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò

üö® AGGREGATE CHEATING ANALYSIS
Overall Status: Tidak (LOW RISK)
Confidence: High
Videos Flagged: 0/2 (0.0%)
Average Score: 15.0/100
Recommendation: LULUS - No significant cheating indicators
Summary: Tidak ditemukan indikasi kecurangan yang signifikan di semua video.


üíæ Results saved: https://redemptory-lavern-fiendishly.ngrok-free.dev/results/a59dfd4cf38a4349ad097976cb8d5a




## System Information

### Whisper Model
- **Library**: `faster-whisper` (optimized implementation)
- **Model**: `large-v3` (most accurate available)
- **Accuracy**: ~98% for clear English speech
- **Speed**: 4-5x faster than `openai-whisper`

### Translation
- **Provider**: DeepL API
- **Target Language**: Indonesian (ID)
- **Source Language**: English (EN)
- **Character Limit**: 5,000 per chunk
- **Setup**: Set `DEEPL_API_KEY` in cell 4
- **Get API Key**: https://www.deepl.com/pro-api (Free tier: 500,000 chars/month)

### LLM Assessment
- **Model**: meta-llama/Llama-2-7b-chat-hf
- **Method**: Hybrid (LLM + Static)
- **LLM Evaluated Criteria** (3):
  1. **Kualitas Jawaban** - Quality of answer (clarity, completeness, depth)
  2. **Koherensi** - Coherence (logical flow, consistency, structure)
  3. **Relevansi** - Relevance (alignment with question, staying on topic)
- **Static Dummy Values** (2):
  4. **Tempo Bicara** - Speaking tempo (fixed at 85/100) üîß *TODO: Replace with audio analysis model*
  5. **Confidence Score** - Confidence (fixed at 82/100) üîß *TODO: Replace with voice analysis model*
- **Cheating Detection**: LLM analyzes for multiple speakers, artificial voice, reading patterns
- **Fallback**: Rule-based assessment if LLM fails

### Performance
- **Device**: Automatically detects CUDA GPU (if available) or CPU
- **Compute Type**:
  - GPU: `float16` (faster with high accuracy)
  - CPU: `int8` (optimized for CPU)
- **VAD Filter**: Enabled (skips silence for efficiency)

### Settings
- **Beam Size**: 5 (higher = more accurate)
- **Best Of**: 5 (samples multiple candidates)
- **Patience**: 2.0 (thorough beam search)
- **Temperature**: 0.0 (deterministic output)
- **Context**: Uses previous text for better accuracy

### Storage Management
- **Auto-delete videos**: ‚úÖ Videos are automatically deleted after successful transcription
- **Storage saved**: Only transcriptions and results are kept
- **Safety**: Deletion only happens after successful transcription
- **Error handling**: If deletion fails, processing continues normally

### Endpoints
- `POST /upload` - Upload videos and start transcription
- `GET /status/{session_id}` - Check processing status
- **`GET /results/{session_id}`** - **Get assessment results**
- `GET /upload_form` - Test form interface
- `GET /` - System information

### Files
- ~~Uploaded videos: `uploads/`~~ (deleted after transcription) ‚ôªÔ∏è
- Transcriptions: `transcriptions/` ‚úÖ (includes English + Indonesian + Assessment)
- **Assessment results: `results/`** ‚úÖ

### Assessment Data Structure
```json
{
  "success": true,
  "name": "Candidate Name",
  "session": "session_id_here",
  "content": [
    {
      "id": 1,
      "question": "What is your experience with Python?",
      "result": {
        "penilaian": {
          "kualitas_jawaban": 85,    // ‚úÖ LLM evaluated
          "koherensi": 83,            // ‚úÖ LLM evaluated
          "relevansi": 80,            // ‚úÖ LLM evaluated
          "tempo_bicara": 85,         // üîß Static dummy (TODO: audio model)
          "confidence_score": 82,     // üîß Static dummy (TODO: voice model)
          "total": 83
        },
        "penilaian_akhir": 4,
        "cheating_detection": "Tidak",
        "keputusan_akhir": "Lulus",
        "transkripsi_en": "...",
        "transkripsi_id": "...",
        "metadata": {
          "assessment_method": "Hybrid (LLM + Static)",
          "llm_evaluated_criteria": ["kualitas_jawaban", "koherensi", "relevansi"],
          "static_criteria": ["tempo_bicara", "confidence_score"]
        }
      }
    }
  ],
  "metadata": {
    "assessment_method": "Hybrid (LLM + Static)",
    "llm_criteria": ["kualitas_jawaban", "koherensi", "relevansi"],
    "static_criteria": ["tempo_bicara", "confidence_score"]
  }
}
```

### Roadmap
- ‚úÖ **Phase 1**: LLM Assessment (kualitas, koherensi, relevansi)
- üîß **Phase 2**: Audio Analysis Model (tempo_bicara) - *Coming Soon*
- üîß **Phase 3**: Voice Analysis Model (confidence_score) - *Coming Soon*
- üîß **Phase 4**: Video Analysis (eye contact, body language) - *Future*

### Notes
- **3 criteria** evaluated by LLM with real intelligence
- **2 criteria** use static dummy values (will be replaced with specialized models)
- Static values: `tempo_bicara=85`, `confidence_score=82`
- Results saved automatically after transcription completes
- **Original video files are deleted after transcription to save storage**
- DeepL API key required for translation (free tier available)
- Access via: `http://127.0.0.1:8888/results/{session_id}`

### DeepL Setup
1. Sign up at https://www.deepl.com/pro-api
2. Get your free API key (500,000 chars/month)
3. Set `DEEPL_API_KEY` in cell 4
4. Restart kernel and run all cells