# Libraries

In [None]:
import os
import joblib
import librosa
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler

In [None]:
SPOOFED_DIR_TEST = "fake"
AUTHENTIC_DIR_TRAIN = "real"

SCALER_FILE = "anomaly_scaler.joblib"
MODEL_FILE = "voice_anomaly_detector.joblib"
TRAIN_STATS_FILE = "train_feature_stats.joblib"

SR, N_FFT, N_MFCC, HOP_LENGTH = 16000, 2048, 20, 512
FEATURE_NAMES = []

# Feature Extraction

In [3]:
def extract_features(audio_path, sr=SR, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH):
    global FEATURE_NAMES

    signal, sr_loaded = librosa.load(audio_path, sr=sr, mono=True)
    if sr_loaded != sr:
        print(f"Warning: Audio at {audio_path} has sample rate {sr_loaded}, resampling to {sr}.")

    if len(signal) < n_fft:
        signal = np.pad(signal, (0, n_fft - len(signal)), 'constant')

    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    mfccs_std = np.std(mfccs.T, axis=0)

    zcr = librosa.feature.zero_crossing_rate(y=signal, frame_length=n_fft, hop_length=hop_length)
    zcr_mean = np.mean(zcr)
    zcr_std = np.std(zcr)

    spectral_centroid = librosa.feature.spectral_centroid(y=signal, sr=sr, n_fft=n_fft, hop_length=hop_length)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_std = np.std(spectral_centroid)

    spectral_rolloff = librosa.feature.spectral_rolloff(y=signal, sr=sr, n_fft=n_fft, hop_length=hop_length)
    spectral_rolloff_mean = np.mean(spectral_rolloff)
    spectral_rolloff_std = np.std(spectral_rolloff)

    chroma = librosa.feature.chroma_stft(y=signal, sr=sr, n_fft=n_fft, hop_length=hop_length)
    chroma_mean = np.mean(chroma.T, axis=0)
    chroma_std = np.std(chroma.T, axis=0)

    f0, _, _ = librosa.pyin(signal, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sr, frame_length=n_fft, hop_length=hop_length)
    f0_mean = np.nanmean(f0)
    f0_std = np.nanstd(f0)
    f0_mean = 0 if np.isnan(f0_mean) else f0_mean
    f0_std = 0 if np.isnan(f0_std) else f0_std

    features = np.hstack([
        mfccs_mean, mfccs_std,
        zcr_mean, zcr_std,
        spectral_centroid_mean, spectral_centroid_std,
        spectral_rolloff_mean, spectral_rolloff_std,
        f0_mean, f0_std
    ])
    features = np.concatenate([features, chroma_mean, chroma_std])

    if not FEATURE_NAMES:
        for i in range(n_mfcc): FEATURE_NAMES.append(f"mfcc_mean_{i+1}")
        for i in range(n_mfcc): FEATURE_NAMES.append(f"mfcc_std_{i+1}")

        FEATURE_NAMES.extend(["zcr_mean", "zcr_std",
                                "spectral_centroid_mean", "spectral_centroid_std",
                                "spectral_rolloff_mean", "spectral_rolloff_std",
                                "f0_mean", "f0_std"])

        for i in range(chroma_mean.shape[0]): FEATURE_NAMES.append(f"chroma_mean_{i+1}")
        for i in range(chroma_std.shape[0]): FEATURE_NAMES.append(f"chroma_std_{i+1}")
        
    return features

# Load Data

In [4]:
def load_authentic_training_data(authentic_dir):
    features_list = []

    print(f"Loading authentic training samples from: {authentic_dir}")
    if not os.path.exists(authentic_dir):
        print(f"ERROR: Authentic training directory '{authentic_dir}' not found.")
        return np.array([])

    for filename in os.listdir(authentic_dir):
        if filename.lower().endswith(('.wav', '.mp3')):
            path = os.path.join(authentic_dir, filename)
            feats = extract_features(path)

            if not np.isnan(feats).any():
                features_list.append(feats)
            else:
                print(f"Skipping {filename} due to feature extraction errors.")
                
    return np.array(features_list)

# Model Training

In [None]:
def train_anomaly_model(X_train_authentic):
    if X_train_authentic.shape[0] == 0:
        print("No authentic training data to train on. Exiting training.")
        return None, None, None

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_authentic)

    train_means = np.mean(X_train_scaled, axis=0)
    train_stds = np.std(X_train_scaled, axis=0)
    joblib.dump({'means': train_means, 'stds': train_stds}, TRAIN_STATS_FILE)

    model = OneClassSVM(nu=0.05, kernel="rbf", gamma='scale')
    model.fit(X_train_scaled)

    joblib.dump(model, MODEL_FILE)
    joblib.dump(scaler, SCALER_FILE)
    
    print(f"\nAnomaly detection model saved to {MODEL_FILE}")
    print(f"Scaler saved to {SCALER_FILE}")
    print(f"Training feature stats saved to {TRAIN_STATS_FILE}")
    
    return model, scaler, {'means': train_means, 'stds': train_stds}

# Explainability

In [12]:
def detect_anomaly_and_explain(audio_path, model, scaler, train_feature_stats, feature_names, anom_threshold_std=2.5):

    print(f"\n--- Analyzing: {audio_path} ---")
    new_features = extract_features(audio_path)
    if np.isnan(new_features).any():
        print("Could not extract valid features from the audio file.")
        return

    new_features_scaled = scaler.transform(new_features.reshape(1, -1))
    prediction = model.predict(new_features_scaled)[0]
    anomaly_score = model.decision_function(new_features_scaled)[0]

    if anomaly_score >= -0.025:
        label = "Likely Authentic (Normal)"
        print(f"Prediction: {label}")

    else:
        label = "Potentially Spoofed (Anomaly)"
        print(f"Prediction: {label}")
        print("\nFeatures contributing to anomaly detection (deviation from authentic training data):")

        deviations = []
        train_means = train_feature_stats['means']
        train_stds = train_feature_stats['stds']

        for i in range(len(feature_names)):
            std_dev = train_stds[i] if train_stds[i] > 1e-6 else 1.0
            deviation_in_stds = (new_features_scaled[0, i] - train_means[i]) / std_dev
            
            if abs(deviation_in_stds) > anom_threshold_std :
                deviations.append({
                    "feature": feature_names[i],
                    "value": new_features[i],
                    "scaled_value": new_features_scaled[0, i],
                    "train_mean_scaled": train_means[i],
                    "train_std_scaled": train_stds[i],
                    "deviation_stds": deviation_in_stds
                })
                
        deviations.sort(key=lambda x: abs(x["deviation_stds"]), reverse=True)
        if not deviations and prediction == -1:
            print("  Anomaly detected, but individual feature deviations are not above threshold. Overall pattern is anomalous.")
        elif not deviations and prediction == 1:
            print("  All features within typical authentic ranges.")


        for dev_info in deviations[:10]:
            direction = "higher" if dev_info["deviation_stds"] > 0 else "lower"
            print(f"  - {dev_info['feature']}: {dev_info['value']:.2f} "
                f"(Scaled: {dev_info['scaled_value']:.2f}, "
                f"Train Mean Scaled: {dev_info['train_mean_scaled']:.2f}). "
                f"This is {abs(dev_info['deviation_stds']):.2f} std devs {direction} than typical authentic samples.")
        
        if prediction == -1 and not deviations:
            print("  The combination of features makes this sample an outlier, even if individual features aren't extremely deviant.")

# Testing

In [7]:
model = None
scaler = None
train_feature_stats = None

X_authentic_train = load_authentic_training_data(AUTHENTIC_DIR_TRAIN)
model, scaler, train_feature_stats = train_anomaly_model(X_authentic_train)

Loading authentic training samples from: real

Anomaly detection model saved to voice_anomaly_detector.joblib
Scaler saved to anomaly_scaler.joblib
Training feature stats saved to train_feature_stats.joblib


In [11]:
for file in os.listdir(AUTHENTIC_DIR_TRAIN):
    detect_anomaly_and_explain(os.path.join(AUTHENTIC_DIR_TRAIN, file), model, scaler, train_feature_stats, FEATURE_NAMES)


--- Analyzing: real\DonaldTrump.mp3 ---
Prediction: Likely Authentic (Normal)

--- Analyzing: real\Elon Musk Works 120 Hours Every Week.mp3 ---
Prediction: Likely Authentic (Normal)

--- Analyzing: real\ModiVoice.mp3 ---
Prediction: Likely Authentic (Normal)

--- Analyzing: real\MorganFreeman.mp3 ---
Prediction: Likely Authentic (Normal)

--- Analyzing: real\MrBeast.mp3 ---
Prediction: Likely Authentic (Normal)


In [14]:
for file in os.listdir(SPOOFED_DIR_TEST):
    detect_anomaly_and_explain(os.path.join(SPOOFED_DIR_TEST, file), model, scaler, train_feature_stats, FEATURE_NAMES)


--- Analyzing: fake\DonaldTrump1.mp3 ---
Prediction: Potentially Spoofed (Anomaly)

Features contributing to anomaly detection (deviation from authentic training data):
  - chroma_std_10: 0.35 (Scaled: 4.94, Train Mean Scaled: 0.00). This is 4.94 std devs higher than typical authentic samples.
  - chroma_std_11: 0.36 (Scaled: 2.79, Train Mean Scaled: -0.00). This is 2.79 std devs higher than typical authentic samples.
  - mfcc_std_9: 16.70 (Scaled: 2.68, Train Mean Scaled: 0.00). This is 2.68 std devs higher than typical authentic samples.
  - chroma_std_7: 0.33 (Scaled: 2.67, Train Mean Scaled: 0.00). This is 2.67 std devs higher than typical authentic samples.

--- Analyzing: fake\DonaldTrump2.wav ---
Prediction: Potentially Spoofed (Anomaly)

Features contributing to anomaly detection (deviation from authentic training data):
  - mfcc_std_4: 49.75 (Scaled: 6.65, Train Mean Scaled: 0.00). This is 6.65 std devs higher than typical authentic samples.
  - mfcc_std_2: 87.65 (Scaled: 6.6