# Fake-or-Real Dataset Evaluation

Evaluating the same model on the **Fake-or-Real** dataset for comparison.

- **Dataset**: [Fake-or-Real](https://www.kaggle.com/datasets/mohammedabdeldayem/the-fake-or-real-dataset)  
- **Model**: [MelodyMachine/Deepfake-audio-detection-V2](https://huggingface.co/MelodyMachine/Deepfake-audio-detection-V2)

In [None]:
import os
import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="darkgrid")
%matplotlib inline

In [None]:
DATASET_PATH = "../data/fake_or_real"

# TODO: adjust based on dataset structure
# fake_files = glob.glob(f"{DATASET_PATH}/fake/*.wav")
# real_files = glob.glob(f"{DATASET_PATH}/real/*.wav")
# print(f"Fake: {len(fake_files)}, Real: {len(real_files)}")

In [None]:
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
import librosa
import torch

MODEL_ID = "MelodyMachine/Deepfake-audio-detection-V2"
SAMPLE_RATE = 16_000

extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
model = AutoModelForAudioClassification.from_pretrained(MODEL_ID)
model.eval()
print("Model loaded âœ“")

In [None]:
def predict(path):
    """Run inference on a single audio file."""
    audio, _ = librosa.load(path, sr=SAMPLE_RATE, mono=True)
    inputs = extractor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    probs = torch.softmax(logits, dim=-1)[0].numpy()
    idx = probs.argmax()
    label = model.config.id2label[idx]
    
    return {
        "file": os.path.basename(path),
        "pred": "fake" if label.lower() == "fake" else "real",
        "confidence": round(probs[idx] * 100, 2)
    }

## Run Inference

TODO: run on dataset sample

In [None]:
# TODO: implement inference loop
# results = [predict(f) for f in tqdm(sample_files)]
# results_df = pd.DataFrame(results)

## Results

TODO: calculate metrics

In [None]:
# from sklearn.metrics import accuracy_score, classification_report
# print(classification_report(y_true, y_pred))