1. Extraction using Python script OR Git Bash (faster)

In [None]:
"""
import tarfile
import os
import glob

# Set the path to where your .tar files are located
base_path = "C:/Users/Bharat/Videos"  # UPDATE THIS PATH if needed

def extract_tar_files(directory, pattern):
    tar_files = glob.glob(os.path.join(directory, pattern))
    print(f"Found {len(tar_files)} files for pattern: {pattern}")
    
    for file_path in tar_files:
        try:
            print(f"Extracting {file_path}...")
            with tarfile.open(file_path, "r:") as tar:
                tar.extractall(path=directory)
            print(f"Successfully extracted {file_path}")
        except Exception as e:
            print(f"Error extracting {file_path}: {e}")

# 1. Protocols (Commented out to skip)
# extract_tar_files(base_path, "ASVspoof5_protocols.tar.gz")

# 2. Extract ONLY flac_T_ac file
# Changed pattern from "flac_T_*.tar" to specific file "flac_T_ac.tar"
extract_tar_files(base_path, "flac_T_ac.tar")

# 3. Dev Data (Commented out to skip)
# extract_tar_files(base_path, "flac_D_*.tar")

print("Extraction complete.")
"""

2. Data Pre-processing

In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

FIXED_WIDTH = 400  # Adjust as needed
DATASET_FOLDER = "flac_T" # Path to your extracted training audio
PROTOCOL_FILE = "ASVspoof5.train.tsv" # Path to your protocol file

def audio_to_spectrogram(file_path):
    try:
        y, sr = librosa.load(file_path, sr=16000) # ASVspoof5 is 16kHz
        spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)

        # Resize spectrogram to a fixed width
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
        return spec_db
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def load_dataset(dataset_path, protocol_path, max_files=1000):
    spectrograms, labels = [], []
    
    # 1. Load the protocol (labels)
    # Columns: SPEAKER_ID FLAC_FILE_NAME ... KEY ...
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(protocol_path, sep=' ', names=cols)
    
    # Limit to max_files for testing
    df = df.head(max_files)
    
    print(f"Processing {len(df)} files...")

    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        filename = row['FLAC_FILE_NAME'] + ".flac"
        label_str = row['KEY'] # 'spoof' or 'bonafide'
        
        file_path = os.path.join(dataset_path, filename)
        
        # Check if file exists before processing
        if os.path.exists(file_path):
            spec = audio_to_spectrogram(file_path)
            if spec is not None:
                spectrograms.append(spec)
                # 1 for spoof, 0 for bonafide
                labels.append(1 if label_str == "spoof" else 0)
        else:
            print(f"Warning: File not found {file_path}")

    X = np.array(spectrograms)[..., np.newaxis]
    y = np.array(labels)
    return X, y

if __name__ == "__main__":
    # Ensure you extract the data first!
    if not os.path.exists(DATASET_FOLDER):
        print(f"Error: Folder {DATASET_FOLDER} not found. Please run the extraction script first.")
    else:
        X, y = load_dataset(DATASET_FOLDER, PROTOCOL_FILE, max_files=100) # Low number for test
        print(f"Shape of X: {X.shape}")
        print(f"Shape of y: {y.shape}")
        np.save("X.npy", X)
        np.save("y.npy", y)

Processing 100 files...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:04<00:00, 20.76it/s]

Shape of X: (100, 128, 400, 1)
Shape of y: (100,)





3. Model Training

Gemini

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf  # Faster audio loading
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight  # <--- NEW IMPORT

# ================= CONFIGURATION =================
# Double-check these paths match your SSD location
DATASET_DIR = "flac_T" 
PROTOCOL_PATH = "ASVspoof5.train.tsv"

# Batch size 64 is efficient for your 16GB RAM
BATCH_SIZE = 64  
FIXED_WIDTH = 400
# =================================================

class ASVspoofGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size, base_dir):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.base_dir = base_dir

    def __len__(self):
        return int(np.ceil(len(self.x) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_specs = []
        for file_name in batch_x:
            file_path = os.path.join(self.base_dir, file_name + ".flac")
            spec = self.process_audio(file_path)
            batch_specs.append(spec)

        return np.array(batch_specs)[..., np.newaxis], np.array(batch_y)

    def process_audio(self, file_path):
        if not os.path.exists(file_path):
            return np.zeros((128, FIXED_WIDTH))
            
        try:
            # OPTIMIZATION: Use soundfile for speed
            y, sr = sf.read(file_path)
            
            # Safety check for sampling rate
            if sr != 16000:
                y = librosa.resample(y, orig_sr=sr, target_sr=16000)
                
            spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
            spec_db = librosa.power_to_db(spec, ref=np.max)

            # Resize to FIXED_WIDTH
            if spec_db.shape[1] < FIXED_WIDTH:
                pad_width = FIXED_WIDTH - spec_db.shape[1]
                spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
            else:
                spec_db = spec_db[:, :FIXED_WIDTH]
            return spec_db
        except:
            return np.zeros((128, FIXED_WIDTH))

# ================= MAIN EXECUTION =================
if __name__ == "__main__":
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

    print("Loading file list...")
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    
    # Read CSV
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
    df['target'] = df['KEY'].apply(lambda x: 1 if x == 'spoof' else 0)
    
    print(f"Total Dataset Size: {len(df)}")

    X_train, X_val, y_train, y_val = train_test_split(
        df['FLAC_FILE_NAME'].values, 
        df['target'].values, 
        test_size=0.2, 
        random_state=42
    )

    # --- CRITICAL FIX: CALCULATE CLASS WEIGHTS ---
    # This prevents the model from ignoring 'Real' files
    weights = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(y_train),
        y=y_train
    )
    class_weights = dict(enumerate(weights))
    print(f"Class Weights (0=Real, 1=Spoof): {class_weights}")
    # ---------------------------------------------

    print(f"Training samples: {len(X_train)}")
    
    # Generator Init
    train_gen = ASVspoofGenerator(X_train, y_train, BATCH_SIZE, DATASET_DIR)
    val_gen = ASVspoofGenerator(X_val, y_val, BATCH_SIZE, DATASET_DIR)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 400, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    print("Starting FULL Training (This will take hours)...")
    
    # FIX APPLIED HERE: use_multiprocessing=False prevents Windows crash
    model.fit(
        train_gen, 
        validation_data=val_gen, 
        epochs=5, 
        workers=4, 
        use_multiprocessing=False, 
        max_queue_size=20,
        class_weight=class_weights  # <--- APPLY WEIGHTS HERE
    )
    
    # Save with a NEW name to avoid overwriting your quick test
    model.save("asvspoof5_full_model.h5")
    print("Full Model saved!")

Num GPUs Available:  0
Loading file list...
Total Dataset Size: 182357
Class Weights (0=Real, 1=Spoof): {0: 4.850222754172485, 1: 0.557468321538297}
Training samples: 145885
Starting FULL Training (This will take hours)...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


Full Model saved!


In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split

# ================= CONFIGURATION =================
DATASET_DIR = "flac_T" 
PROTOCOL_PATH = "ASVspoof5.train.tsv"
BATCH_SIZE = 64  
FIXED_WIDTH = 400
# =================================================

class ASVspoofGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size, base_dir):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.base_dir = base_dir

    def __len__(self):
        return int(np.ceil(len(self.x) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_specs = []
        for file_name in batch_x:
            file_path = os.path.join(self.base_dir, file_name + ".flac")
            spec = self.process_audio(file_path)
            batch_specs.append(spec)

        return np.array(batch_specs)[..., np.newaxis], np.array(batch_y)

    def process_audio(self, file_path):
        if not os.path.exists(file_path):
            return np.zeros((128, FIXED_WIDTH))
        try:
            y, sr = sf.read(file_path)
            if sr != 16000:
                y = librosa.resample(y, orig_sr=sr, target_sr=16000)
            spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
            spec_db = librosa.power_to_db(spec, ref=np.max)
            if spec_db.shape[1] < FIXED_WIDTH:
                pad_width = FIXED_WIDTH - spec_db.shape[1]
                spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
            else:
                spec_db = spec_db[:, :FIXED_WIDTH]
            return spec_db
        except:
            return np.zeros((128, FIXED_WIDTH))

if __name__ == "__main__":
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    
    # 1. Load Protocol
    print("Loading full protocol...")
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
    df['target'] = df['KEY'].apply(lambda x: 1 if x == 'spoof' else 0)
    
    # 2. CREATE BALANCED SUBSET (The Fix)
    df_real = df[df['target'] == 0]
    df_spoof = df[df['target'] == 1]
    
    print(f"Original Count -> Real: {len(df_real)}, Spoof: {len(df_spoof)}")
    
    # Take all Reals, and match that count with Spoofs
    n_samples = len(df_real) 
    df_spoof_balanced = df_spoof.sample(n=n_samples, random_state=42)
    
    # Combine and Shuffle
    df_balanced = pd.concat([df_real, df_spoof_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)
    
    print(f"Balanced Dataset Size: {len(df_balanced)} (50% Real / 50% Spoof)")
    
    # 3. Split
    X_train, X_val, y_train, y_val = train_test_split(
        df_balanced['FLAC_FILE_NAME'].values, 
        df_balanced['target'].values, 
        test_size=0.2, 
        random_state=42
    )

    print(f"Training on {len(X_train)} files (Balanced)")

    train_gen = ASVspoofGenerator(X_train, y_train, BATCH_SIZE, DATASET_DIR)
    val_gen = ASVspoofGenerator(X_val, y_val, BATCH_SIZE, DATASET_DIR)

    # 4. Model (Same architecture, cleaner training)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 400, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    print("Starting BALANCED Training (Fast Mode)...")
    
    # Note: No 'class_weights' needed because data is balanced!
    model.fit(
        train_gen, 
        validation_data=val_gen, 
        epochs=5, 
        workers=4, 
        use_multiprocessing=False
    )
    
    model.save("asvspoof5_balanced.h5")
    print("Balanced Model saved!")

Num GPUs Available:  0
Loading full protocol...
Original Count -> Real: 18797, Spoof: 163560
Balanced Dataset Size: 37594 (50% Real / 50% Spoof)
Training on 30075 files (Balanced)
Starting BALANCED Training (Fast Mode)...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


Balanced Model saved!


In [4]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split

# ================= CONFIGURATION =================
DATASET_DIR = "flac_T" 
PROTOCOL_PATH = "ASVspoof5.train.tsv"
BATCH_SIZE = 64  
FIXED_WIDTH = 400
# =================================================

class ASVspoofGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size, base_dir):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.base_dir = base_dir

    def __len__(self):
        return int(np.ceil(len(self.x) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_specs = []
        for file_name in batch_x:
            file_path = os.path.join(self.base_dir, file_name + ".flac")
            spec = self.process_audio(file_path)
            batch_specs.append(spec)

        return np.array(batch_specs)[..., np.newaxis], np.array(batch_y)

    def process_audio(self, file_path):
        if not os.path.exists(file_path):
            return np.zeros((128, FIXED_WIDTH))
        try:
            y, sr = sf.read(file_path)
            if sr != 16000:
                y = librosa.resample(y, orig_sr=sr, target_sr=16000)
                
            # Generate Mel Spectrogram
            spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
            spec_db = librosa.power_to_db(spec, ref=np.max) # Range: -80 to 0

            # Resize
            if spec_db.shape[1] < FIXED_WIDTH:
                pad_width = FIXED_WIDTH - spec_db.shape[1]
                spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
            else:
                spec_db = spec_db[:, :FIXED_WIDTH]
            
            # --- NORMALIZATION FIX ---
            # Map -80dB...0dB to 0.0...1.0
            # This prevents the model from getting saturated
            spec_norm = (spec_db + 80.0) / 80.0
            return spec_norm
            # -------------------------
        except:
            return np.zeros((128, FIXED_WIDTH))

if __name__ == "__main__":
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    
    # 1. Load Protocol
    print("Loading full protocol...")
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
    df['target'] = df['KEY'].apply(lambda x: 1 if x == 'spoof' else 0)
    
    # 2. CREATE BALANCED SUBSET
    df_real = df[df['target'] == 0]
    df_spoof = df[df['target'] == 1]
    
    print(f"Original Count -> Real: {len(df_real)}, Spoof: {len(df_spoof)}")
    
    n_samples = len(df_real) 
    df_spoof_balanced = df_spoof.sample(n=n_samples, random_state=42)
    df_balanced = pd.concat([df_real, df_spoof_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)
    
    print(f"Balanced Dataset Size: {len(df_balanced)} (50% Real / 50% Spoof)")
    
    X_train, X_val, y_train, y_val = train_test_split(
        df_balanced['FLAC_FILE_NAME'].values, 
        df_balanced['target'].values, 
        test_size=0.2, 
        random_state=42
    )

    print(f"Training on {len(X_train)} files (Balanced)")

    train_gen = ASVspoofGenerator(X_train, y_train, BATCH_SIZE, DATASET_DIR)
    val_gen = ASVspoofGenerator(X_val, y_val, BATCH_SIZE, DATASET_DIR)

    # 3. PRO MODEL ARCHITECTURE
    # Includes BatchNormalization to fix saturation
    model = tf.keras.models.Sequential([
        # Layer 1
        tf.keras.layers.Conv2D(32, (3, 3), input_shape=(128, 400, 1)),
        tf.keras.layers.BatchNormalization(), # <--- Stabilizes training
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        # Layer 2
        tf.keras.layers.Conv2D(64, (3, 3)),
        tf.keras.layers.BatchNormalization(), # <--- Stabilizes training
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        # Classifier
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    print("Starting NORMALIZED Training...")
    
    model.fit(
        train_gen, 
        validation_data=val_gen, 
        epochs=5, 
        workers=4, 
        use_multiprocessing=False
    )
    
    model.save("asvspoof5_normalized.h5")
    print("Normalized Model saved!")

Num GPUs Available:  0
Loading full protocol...
Original Count -> Real: 18797, Spoof: 163560
Balanced Dataset Size: 37594 (50% Real / 50% Spoof)
Training on 30075 files (Balanced)
Starting NORMALIZED Training...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


Normalized Model saved!


In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tqdm import tqdm
import soundfile as sf
import librosa
from sklearn.metrics import roc_curve

# ================= CONFIGURATION =================
MODEL_PATH = "asvspoof5_normalized.h5"  # <--- NEW FILE
PROTOCOL_PATH = "ASVspoof5.dev.track_1.tsv"
AUDIO_DIR = "flac_D"
FIXED_WIDTH = 400
# =================================================

def compute_eer(y_true, y_score):
    fpr, tpr, thresholds = roc_curve(y_true, y_score, pos_label=1)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    return eer * 100

def preprocess_for_eval(file_path):
    if not os.path.exists(file_path): return None
    try:
        y, sr = sf.read(file_path)
        if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        
        # SAME PREPROCESSING AS TRAINING
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # *** CRITICAL NORMALIZATION STEP ***
        spec_norm = (spec_db + 80.0) / 80.0
        return spec_norm[..., np.newaxis]
    except: return None

print(f"Loading model: {MODEL_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH)
print("‚úÖ Model loaded successfully!")

print(f"Loading Dev Protocol: {PROTOCOL_PATH}...")
cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
        "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)

# Test on 1000 random files
test_df = df.sample(2000, random_state=42)
print(f"Evaluating on {len(test_df)} files from Dev set...")

y_true = []
y_scores = []

print("Starting predictions...")
for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
    file_path = os.path.join(AUDIO_DIR, row['FLAC_FILE_NAME'] + ".flac")
    
    spec = preprocess_for_eval(file_path)
    if spec is not None:
        # Get probability (0.0 to 1.0)
        score = model.predict(np.array([spec]), verbose=0)[0][0]
        
        # KEY: spoof=1, bonafide=0
        true_label = 1 if row['KEY'] == 'spoof' else 0
        
        y_true.append(true_label)
        y_scores.append(score)

final_eer = compute_eer(y_true, y_scores)

print("="*40)
print(f"FINAL EER: {final_eer:.2f}%")
print("="*40)

Loading model: asvspoof5_normalized.h5...
‚úÖ Model loaded successfully!
Loading Dev Protocol: ASVspoof5.dev.track_1.tsv...
Evaluating on 2000 files from Dev set...
Starting predictions...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2000/2000 [06:11<00:00,  5.38it/s]

FINAL EER: 40.14%





In [15]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tqdm import tqdm
import soundfile as sf
import librosa
from sklearn.metrics import roc_curve

# ================= CONFIGURATION =================
MODEL_PATH = "asvspoof5_normalized.h5"
PROTOCOL_PATH = "ASVspoof5.dev.track_1.tsv"  # Using DEV protocol
AUDIO_DIR = "flac_D"                         # Using DEV audio folder
OUTPUT_FILE = "dev_full_scores.txt"
FIXED_WIDTH = 400
# =================================================

def preprocess_for_eval(file_path):
    if not os.path.exists(file_path): return None
    try:
        y, sr = sf.read(file_path)
        if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        
        # Standard Mel Spectrogram
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        # Padding / Trimming
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # Normalization (Crucial)
        spec_norm = (spec_db + 80.0) / 80.0
        return spec_norm[..., np.newaxis]
    except: return None

# 1. Load Model
print(f"Loading model: {MODEL_PATH}...")
if not os.path.exists(MODEL_PATH):
    print(f"üö® ERROR: Model file '{MODEL_PATH}' not found.")
    exit()
model = tf.keras.models.load_model(MODEL_PATH)
print("‚úÖ Model loaded successfully!")

# 2. Load Protocol
print(f"Loading Dev Protocol: {PROTOCOL_PATH}...")
try:
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
except:
    print("Warning: Standard column headers failed. Reading as raw list...")
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', header=None)
    df.rename(columns={1: 'FLAC_FILE_NAME'}, inplace=True)

print(f"Processing ALL {len(df)} files from Dev set...")

# 3. Processing Loop
y_true = []
y_scores = []
has_labels = 'KEY' in df.columns # Dev set usually has labels

print(f"Starting predictions (saving to {OUTPUT_FILE})...")

with open(OUTPUT_FILE, 'w') as f:
    # iterrows is slow, but acceptable for inference script
    for index, row in tqdm(df.iterrows(), total=len(df)):
        file_name = str(row['FLAC_FILE_NAME'])
        file_path = os.path.join(AUDIO_DIR, file_name + ".flac")
        
        spec = preprocess_for_eval(file_path)
        
        if spec is not None:
            score = model.predict(np.array([spec]), verbose=0)[0][0]
            f.write(f"{file_name} {score}\n")
            
            if has_labels:
                y_scores.append(score)
                # 'spoof' = 1, 'bonafide' = 0
                true_label = 1 if row['KEY'] == 'spoof' else 0
                y_true.append(true_label)
        else:
            # If file missing or corrupt, write 0.0
            f.write(f"{file_name} 0.0\n")

print(f"‚úÖ Full Dev evaluation saved to {OUTPUT_FILE}")

# 4. Calculate EER
if has_labels and len(y_scores) > 0:
    fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=1)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] * 100
    
    print("="*40)
    print(f"FINAL FULL DEV SET EER: {eer:.2f}%")
    print("="*40)

Loading model: asvspoof5_normalized.h5...
‚úÖ Model loaded successfully!
Loading Dev Protocol: ASVspoof5.dev.track_1.tsv...
Processing ALL 140950 files from Dev set...
Starting predictions (saving to dev_full_scores.txt)...


  1%|          | 1454/140950 [05:49<9:18:51,  4.16it/s] 


KeyboardInterrupt: 

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

# ================= CONFIGURATION =================
DATASET_DIR = "flac_T" 
PROTOCOL_PATH = "ASVspoof5.train.tsv"
BATCH_SIZE = 64  
FIXED_WIDTH = 400
# =================================================

class ASVspoofGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size, base_dir):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.base_dir = base_dir

    def __len__(self):
        return int(np.ceil(len(self.x) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        batch_specs = []
        for file_name in batch_x:
            file_path = os.path.join(self.base_dir, file_name + ".flac")
            spec = self.process_audio(file_path)
            batch_specs.append(spec)

        return np.array(batch_specs)[..., np.newaxis], np.array(batch_y)

    def process_audio(self, file_path):
        if not os.path.exists(file_path):
            return np.zeros((128, FIXED_WIDTH))
        try:
            y, sr = sf.read(file_path)
            if sr != 16000:
                y = librosa.resample(y, orig_sr=sr, target_sr=16000)
            
            # --- NORMALIZATION LOGIC ---
            spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
            spec_db = librosa.power_to_db(spec, ref=np.max) 
            
            if spec_db.shape[1] < FIXED_WIDTH:
                pad_width = FIXED_WIDTH - spec_db.shape[1]
                spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
            else:
                spec_db = spec_db[:, :FIXED_WIDTH]
            
            # Map -80dB...0dB to 0.0...1.0
            spec_norm = (spec_db + 80.0) / 80.0
            return spec_norm
            # ---------------------------
        except:
            return np.zeros((128, FIXED_WIDTH))

if __name__ == "__main__":
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    
    # 1. Load Protocol
    print("Loading full protocol...")
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
    df['target'] = df['KEY'].apply(lambda x: 1 if x == 'spoof' else 0)
    
    # 2. CREATE BALANCED SUBSET
    df_real = df[df['target'] == 0]
    df_spoof = df[df['target'] == 1]
    
    n_samples = len(df_real) 
    df_spoof_balanced = df_spoof.sample(n=n_samples, random_state=42)
    df_balanced = pd.concat([df_real, df_spoof_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)
    
    X_train, X_val, y_train, y_val = train_test_split(
        df_balanced['FLAC_FILE_NAME'].values, 
        df_balanced['target'].values, 
        test_size=0.2, 
        random_state=42
    )

    print(f"Training on {len(X_train)} files (Balanced)")

    train_gen = ASVspoofGenerator(X_train, y_train, BATCH_SIZE, DATASET_DIR)
    val_gen = ASVspoofGenerator(X_val, y_val, BATCH_SIZE, DATASET_DIR)

    # 3. MODEL (With Batch Norm)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), input_shape=(128, 400, 1)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        tf.keras.layers.Conv2D(64, (3, 3)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # 4. CALLBACK: SAVE EVERY EPOCH
    # This creates: asvspoof5_epoch_01.h5, asvspoof5_epoch_02.h5, etc.
    checkpoint = ModelCheckpoint(
        "asvspoof5_epoch_{epoch:02d}.h5", 
        monitor="val_loss",
        save_best_only=False, # Save EVERY file
        verbose=1
    )

    print("Starting Training (Saving SEPARATE files for each epoch)...")
    
    model.fit(
        train_gen, 
        validation_data=val_gen, 
        epochs=8, 
        callbacks=[checkpoint],
        workers=4, 
        use_multiprocessing=False
    )
    
    print("Training Complete. Check your folder for 'asvspoof5_epoch_XX.h5' files.")

Num GPUs Available:  0
Loading full protocol...
Training on 30075 files (Balanced)
Starting Training (Saving SEPARATE files for each epoch)...
Epoch 1/8
Epoch 1: saving model to asvspoof5_epoch_01.h5


  saving_api.save_model(


Epoch 2/8
Epoch 2: saving model to asvspoof5_epoch_02.h5
Epoch 3/8
Epoch 3: saving model to asvspoof5_epoch_03.h5
Epoch 4/8
Epoch 4: saving model to asvspoof5_epoch_04.h5
Epoch 5/8
Epoch 5: saving model to asvspoof5_epoch_05.h5
Epoch 6/8
 24/470 [>.............................] - ETA: 36:06 - loss: 0.0698 - accuracy: 0.9688

KeyboardInterrupt: 

4. Evaluation on Development data

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tqdm import tqdm
import soundfile as sf
import librosa
from sklearn.metrics import roc_curve

# ================= CONFIGURATION =================
# 1. THE WINNING MODEL (Check if Epoch 3 beat this!)
MODEL_PATH = "asvspoof5_epoch_04.h5" 

# 2. DATASET PATHS (Dev Set)
PROTOCOL_PATH = "ASVspoof5.dev.track_1.tsv"
AUDIO_DIR = "flac_D"

# 3. OUTPUT
OUTPUT_FILE = "dev_best_scores.txt"
FIXED_WIDTH = 400
# =================================================

def preprocess_for_eval(file_path):
    if not os.path.exists(file_path): return None
    try:
        y, sr = sf.read(file_path)
        if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        
        # --- FIX FOR SHORT AUDIO ---
        # Pad with silence if shorter than 0.12s (2048 samples)
        if len(y) < 2048:
            padding = 2048 - len(y)
            y = np.pad(y, (0, padding), mode='constant')
        # ---------------------------

        # Mel Spectrogram
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        # Resize to FIXED_WIDTH
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # *** NORMALIZATION (0.0 to 1.0) ***
        # This matches your training exactly.
        spec_norm = (spec_db + 80.0) / 80.0
        
        return spec_norm[..., np.newaxis]
    except: return None

# --- MAIN EXECUTION ---
print(f"Loading model: {MODEL_PATH}...")
if not os.path.exists(MODEL_PATH):
    print(f"üö® ERROR: Model file '{MODEL_PATH}' not found. Did you mean epoch_03?")
    exit()

model = tf.keras.models.load_model(MODEL_PATH)
print("‚úÖ Model loaded successfully!")

print(f"Loading Protocol: {PROTOCOL_PATH}...")
try:
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
except:
    print("Warning: Standard headers failed. Reading as raw list...")
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', header=None)
    df.rename(columns={1: 'FLAC_FILE_NAME'}, inplace=True)

# OPTIONAL: Test on smaller subset first to confirm low EER
# Comment this out to run the FULL set
test_df = df.sample(5000, random_state=42)
print(f"Evaluating on {len(test_df)} random files...")

y_true = []
y_scores = []
has_labels = 'KEY' in df.columns

print(f"Starting predictions...")

with open(OUTPUT_FILE, 'w') as f:
    for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
        file_name = str(row['FLAC_FILE_NAME'])
        file_path = os.path.join(AUDIO_DIR, file_name + ".flac")
        
        spec = preprocess_for_eval(file_path)
        
        if spec is not None:
            # Predict
            score = model.predict(np.array([spec]), verbose=0)[0][0]
            f.write(f"{file_name} {score}\n")
            
            if has_labels:
                y_scores.append(score)
                # 1=Spoof, 0=Bonafide
                true_label = 1 if row['KEY'] == 'spoof' else 0
                y_true.append(true_label)
        else:
            f.write(f"{file_name} 0.0\n")

print(f"‚úÖ Scores saved to {OUTPUT_FILE}")

if has_labels and len(y_scores) > 0:
    fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=1)
    fnr = 1 - tpr
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] * 100
    print("\n" + "="*40)
    print(f"üåü FINAL EER (Epoch 02): {eer:.2f}% üåü")
    print("="*40 + "\n")

Loading model: asvspoof5_epoch_04.h5...
‚úÖ Model loaded successfully!
Loading Protocol: ASVspoof5.dev.track_1.tsv...
Evaluating on 5000 random files...
Starting predictions...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5000/5000 [00:01<00:00, 3803.26it/s]

‚úÖ Scores saved to dev_best_scores.txt





In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tqdm import tqdm
import soundfile as sf
import librosa
from sklearn.metrics import roc_curve

# ================= CONFIGURATION =================
MODEL_PATH = "asvspoof5_epoch_04.h5" 
PROTOCOL_PATH = "ASVspoof5.dev.track_1.tsv"
AUDIO_DIR = "flac_D"
OUTPUT_FILE = "final_dev_full_scores_fast.txt"
FIXED_WIDTH = 400
BATCH_SIZE = 256  # Process 64 files at once
# =================================================

def preprocess_for_eval(file_path):
    if not os.path.exists(file_path): return None
    try:
        y, sr = sf.read(file_path)
        if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        
        # Pad short audio
        if len(y) < 2048:
            padding = 2048 - len(y)
            y = np.pad(y, (0, padding), mode='constant')

        # Mel Spectrogram
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        # Resize
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # Normalize
        spec_norm = (spec_db + 80.0) / 80.0
        return spec_norm[..., np.newaxis]
    except: return None

# Load Model
print(f"Loading Model: {MODEL_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH)
print("‚úÖ Model loaded!")

# Load Protocol
print("Loading Protocol...")
try:
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
except:
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', header=None)
    df.rename(columns={1: 'FLAC_FILE_NAME'}, inplace=True)

print(f"üöÄ Processing {len(df)} files in batches of {BATCH_SIZE}...")

# Data containers
filenames = []
specs = []
y_true = []
all_scores = {} # Store results to write later
has_labels = 'KEY' in df.columns

# Open file for writing results incrementally
with open(OUTPUT_FILE, 'w') as f:
    for index, row in tqdm(df.iterrows(), total=len(df)):
        file_name = str(row['FLAC_FILE_NAME'])
        file_path = os.path.join(AUDIO_DIR, file_name + ".flac")
        
        # 1. Preprocess
        s = preprocess_for_eval(file_path)
        
        if s is not None:
            specs.append(s)
            filenames.append(file_name)
            if has_labels:
                label = 1 if row['KEY'] == 'spoof' else 0
                y_true.append(label)
        else:
            # Handle error immediately
            f.write(f"{file_name} 0.0\n")

        # 2. When batch is full, PREDICT
        if len(specs) >= BATCH_SIZE:
            batch_preds = model.predict_on_batch(np.array(specs))
            
            # Write batch to file
            for i, fname in enumerate(filenames):
                score = batch_preds[i][0]
                f.write(f"{fname} {score}\n")
                all_scores[fname] = score # Keep for EER calc
            
            # Clear buffer
            specs = []
            filenames = []

    # 3. Process remaining files (leftovers)
    if len(specs) > 0:
        batch_preds = model.predict_on_batch(np.array(specs))
        for i, fname in enumerate(filenames):
            score = batch_preds[i][0]
            f.write(f"{fname} {score}\n")
            all_scores[fname] = score

print(f"‚úÖ Evaluation complete. Saved to {OUTPUT_FILE}")

# Calculate EER
if has_labels and len(y_true) > 0:
    # Re-align scores with labels (since we skipped errors)
    # This is a quick approximation using the collected lists
    # Ideally, we should match exact indices, but for this dataset, errors are rare.
    valid_scores = list(all_scores.values())
    
    # Ensure lengths match (truncate labels if errors occurred)
    if len(valid_scores) <= len(y_true):
        # We only kept labels for successful loads
        # This aligns y_true with valid_scores
        pass 
    
    fpr, tpr, thresholds = roc_curve(y_true[:len(valid_scores)], valid_scores, pos_label=1)
    fnr = 1 - tpr
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] * 100
    
    print("\n" + "="*50)
    print(f"üèÜ OFFICIAL FINAL EER (Epoch 04): {eer:.4f}% üèÜ")
    print("="*50 + "\n")

Loading Model: asvspoof5_epoch_04.h5...
‚úÖ Model loaded!
Loading Protocol...
üöÄ Processing 140950 files in batches of 256...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 140950/140950 [1:32:09<00:00, 25.49it/s]  


‚úÖ Evaluation complete. Saved to final_dev_full_scores_fast.txt

üèÜ OFFICIAL FINAL EER (Epoch 04): 21.8389% üèÜ



In [13]:
import tensorflow as tf
import numpy as np
import librosa
import os
import sys

# ================= CONFIGURATION =================
# 1. YOUR TRAINED MODEL
MODEL_PATH = "asvspoof5_epoch_04.h5"

# 2. THE FILE TO TEST (Change this filename!)
# You can use .wav, .mp3, .flac, etc.
# TEST_FILE = "bharat.wav"  # <--- CHANGE THIS
TEST_FILE = "flac_D/D_0002136877.flac" # Point to a real Dev file
#TEST_FILE = "flac_D/D_0000128101.flac" # Point to a fake Dev file
# =================================================

# Constants (Must match training)
FIXED_WIDTH = 400  # ~4 seconds of audio

def preprocess_audio(file_path):
    print(f"Processing: {file_path}...")
    if not os.path.exists(file_path):
        print(f"‚ùå ERROR: File not found: {file_path}")
        return None

    try:
        # Load audio (automatically resamples to 16kHz)
        y, sr = librosa.load(file_path, sr=16000)
        
        # Trim silence from beginning and end (optional, but helps)
        y, _ = librosa.effects.trim(y)

        # Fix Length: Pad if too short
        if len(y) < 2048:
            padding = 2048 - len(y)
            y = np.pad(y, (0, padding), mode='constant')

        # Generate Mel Spectrogram
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        # Fit to Model Input Shape (Truncate or Pad to FIXED_WIDTH)
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            # Note: We only test the FIRST 4 seconds
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # Normalize (Crucial!)
        spec_norm = (spec_db + 80.0) / 80.0
        
        # Add batch and channel dimensions: (1, 128, 400, 1)
        return spec_norm[np.newaxis, ..., np.newaxis]

    except Exception as e:
        print(f"‚ùå Error processing audio: {e}")
        return None

# --- MAIN EXECUTION ---
print("="*50)
print(" üïµÔ∏è  DEEPFAKE DETECTOR - LIVE TEST ")
print("="*50)

# 1. Load Model
if not os.path.exists(MODEL_PATH):
    print("üö® Model file not found! Check the name.")
    sys.exit()

print("Loading AI Brain...")
model = tf.keras.models.load_model(MODEL_PATH)

# 2. Process Audio
input_tensor = preprocess_audio(TEST_FILE)

if input_tensor is not None:
    # 3. Predict
    print("Analyzing audio patterns...")
    prediction = model.predict(input_tensor, verbose=0)[0][0]
    
    # 4. Interpret Result
    # In our training: 0 = Bonafide (Real), 1 = Spoof (Fake)
    
    score_percent = prediction * 100
    
    print("\n" + "-"*30)
    print(f"RAW SCORE: {prediction:.4f}")
    print("-"*30)

    if prediction < 0.50:
        confidence = (1 - prediction) * 100
        print(f"‚úÖ RESULT: REAL HUMAN VOICE")
        print(f"üí™ Confidence: {confidence:.2f}%")
    else:
        confidence = prediction * 100
        print(f"‚ö†Ô∏è RESULT: ARTIFICIAL / DEEPFAKE")
        print(f"üö® Confidence: {confidence:.2f}%")
    print("-"*30 + "\n")

 üïµÔ∏è  DEEPFAKE DETECTOR - LIVE TEST 
Loading AI Brain...
Processing: flac_D/D_0002136877.flac...
Analyzing audio patterns...

------------------------------
RAW SCORE: 0.0000
------------------------------
‚úÖ RESULT: REAL HUMAN VOICE
üí™ Confidence: 100.00%
------------------------------



In [8]:
import pandas as pd
import os

# ================= CONFIGURATION =================
PROTOCOL_PATH = "ASVspoof5.dev.track_1.tsv"
SCORES_FILE = "final_dev_full_scores_fast.txt"
# =================================================

print("üïµÔ∏è  Starting Forensic Analysis...")

# 1. Load the Truth (Protocol)
print(f"Loading Truth from {PROTOCOL_PATH}...")
try:
    # Try reading with headers
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df_truth = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
except:
    # Fallback for no headers
    df_truth = pd.read_csv(PROTOCOL_PATH, sep=' ', header=None)
    df_truth.rename(columns={1: 'FLAC_FILE_NAME', 8: 'KEY'}, inplace=True)

# Keep only what we need: Filename and Key (bonafide/spoof)
df_truth = df_truth[['FLAC_FILE_NAME', 'KEY']]
df_truth['FLAC_FILE_NAME'] = df_truth['FLAC_FILE_NAME'].astype(str)

# 2. Load the Predictions (Scores)
print(f"Loading Scores from {SCORES_FILE}...")
# The scores file is "filename score"
df_scores = pd.read_csv(SCORES_FILE, sep=' ', names=['FLAC_FILE_NAME', 'SCORE'])
df_scores['FLAC_FILE_NAME'] = df_scores['FLAC_FILE_NAME'].astype(str)

# 3. Merge them
print("Merging data...")
df = pd.merge(df_truth, df_scores, on='FLAC_FILE_NAME')

print(f"Successfully matched {len(df)} files.")

# ================= ANALYSIS =================

# --- CASE 1: FALSE POSITIVES (False Alarms) ---
# Truth = 'bonafide' (Real), but Score is HIGH (Model thinks Fake)
false_positives = df[df['KEY'] == 'bonafide'].copy()
# Sort by score descending (Highest confidence fakes)
worst_fp = false_positives.sort_values(by='SCORE', ascending=False).head(5)

print("\n" + "="*60)
print("üö® TOP 5 FALSE ALARMS (Real Humans flagged as Deepfakes)")
print("These files are likely noisy, short, or have weird microphones.")
print("="*60)
for _, row in worst_fp.iterrows():
    print(f"File: {row['FLAC_FILE_NAME']}.flac  |  Model Confidence: {row['SCORE']*100:.2f}% Fake")

# --- CASE 2: FALSE NEGATIVES (Missed Attacks) ---
# Truth = 'spoof' (Fake), but Score is LOW (Model thinks Real)
false_negatives = df[df['KEY'] == 'spoof'].copy()
# Sort by score ascending (Lowest confidence fakes -> Model thought they were very Real)
worst_fn = false_negatives.sort_values(by='SCORE', ascending=True).head(5)

print("\n" + "="*60)
print("‚ö†Ô∏è TOP 5 MISSED ATTACKS (Deepfakes that tricked the AI)")
print("These are the 'Super-Deepfakes' your model cannot detect.")
print("="*60)
for _, row in worst_fn.iterrows():
    print(f"File: {row['FLAC_FILE_NAME']}.flac  |  Model Confidence: {(1-row['SCORE'])*100:.2f}% Real")

print("\nDone. Copy these filenames and listen to them in your folder!")

üïµÔ∏è  Starting Forensic Analysis...
Loading Truth from ASVspoof5.dev.track_1.tsv...
Loading Scores from final_dev_full_scores_fast.txt...
Merging data...
Successfully matched 140950 files.

üö® TOP 5 FALSE ALARMS (Real Humans flagged as Deepfakes)
These files are likely noisy, short, or have weird microphones.
File: D_0001602028.flac  |  Model Confidence: 100.00% Fake
File: D_0001170373.flac  |  Model Confidence: 100.00% Fake
File: D_0000114661.flac  |  Model Confidence: 100.00% Fake
File: D_0002759149.flac  |  Model Confidence: 100.00% Fake
File: D_0000128101.flac  |  Model Confidence: 100.00% Fake

‚ö†Ô∏è TOP 5 MISSED ATTACKS (Deepfakes that tricked the AI)
These are the 'Super-Deepfakes' your model cannot detect.
File: D_0002534512.flac  |  Model Confidence: 100.00% Real
File: D_0000437431.flac  |  Model Confidence: 100.00% Real
File: D_0001767508.flac  |  Model Confidence: 100.00% Real
File: D_0001546420.flac  |  Model Confidence: 100.00% Real
File: D_0002136877.flac  |  Model 

5. Evaluate on Evaluation data

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tqdm import tqdm
import soundfile as sf
import librosa
from sklearn.metrics import roc_curve

# ================= CONFIGURATION =================
# 1. THE WINNING MODEL (Check if Epoch 3 beat this!)
MODEL_PATH = "asvspoof5_epoch_04.h5" 

# 2. DATASET PATHS (Dev Set)
PROTOCOL_PATH = "ASVspoof5.eval.track_1.tsv"
AUDIO_DIR = "flac_E_eval"

# 3. OUTPUT
OUTPUT_FILE = "dev_best_scores.txt"
FIXED_WIDTH = 400
# =================================================

def preprocess_for_eval(file_path):
    if not os.path.exists(file_path): return None
    try:
        y, sr = sf.read(file_path)
        if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        
        # --- FIX FOR SHORT AUDIO ---
        # Pad with silence if shorter than 0.12s (2048 samples)
        if len(y) < 2048:
            padding = 2048 - len(y)
            y = np.pad(y, (0, padding), mode='constant')
        # ---------------------------

        # Mel Spectrogram
        spec = librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        
        # Resize to FIXED_WIDTH
        if spec_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - spec_db.shape[1]
            spec_db = np.pad(spec_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            spec_db = spec_db[:, :FIXED_WIDTH]
            
        # *** NORMALIZATION (0.0 to 1.0) ***
        # This matches your training exactly.
        spec_norm = (spec_db + 80.0) / 80.0
        
        return spec_norm[..., np.newaxis]
    except: return None

# --- MAIN EXECUTION ---
print(f"Loading model: {MODEL_PATH}...")
if not os.path.exists(MODEL_PATH):
    print(f"üö® ERROR: Model file '{MODEL_PATH}' not found. Did you mean epoch_03?")
    exit()

model = tf.keras.models.load_model(MODEL_PATH)
print("‚úÖ Model loaded successfully!")

print(f"Loading Protocol: {PROTOCOL_PATH}...")
try:
    cols = ["SPEAKER_ID", "FLAC_FILE_NAME", "SPEAKER_GENDER", "CODEC", "CODEC_Q", 
            "CODEC_SEED", "ATTACK_TAG", "ATTACK_LABEL", "KEY", "TMP"]
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', names=cols)
except:
    print("Warning: Standard headers failed. Reading as raw list...")
    df = pd.read_csv(PROTOCOL_PATH, sep=' ', header=None)
    df.rename(columns={1: 'FLAC_FILE_NAME'}, inplace=True)

# OPTIONAL: Test on smaller subset first to confirm low EER
# Comment this out to run the FULL set
test_df = df.sample(5000, random_state=42)
print(f"Evaluating on {len(test_df)} random files...")

y_true = []
y_scores = []
has_labels = 'KEY' in df.columns

print(f"Starting predictions...")

with open(OUTPUT_FILE, 'w') as f:
    for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
        file_name = str(row['FLAC_FILE_NAME'])
        file_path = os.path.join(AUDIO_DIR, file_name + ".flac")
        
        spec = preprocess_for_eval(file_path)
        
        if spec is not None:
            # Predict
            score = model.predict(np.array([spec]), verbose=0)[0][0]
            f.write(f"{file_name} {score}\n")
            
            if has_labels:
                y_scores.append(score)
                # 1=Spoof, 0=Bonafide
                true_label = 1 if row['KEY'] == 'spoof' else 0
                y_true.append(true_label)
        else:
            f.write(f"{file_name} 0.0\n")

print(f"‚úÖ Scores saved to {OUTPUT_FILE}")

if has_labels and len(y_scores) > 0:
    fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=1)
    fnr = 1 - tpr
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] * 100
    print("\n" + "="*40)
    print(f"üåü FINAL EER (Epoch 02): {eer:.2f}% üåü")
    print("="*40 + "\n")