<a href="https://colab.research.google.com/github/g-nazos/audio-deepfake-detection/blob/main/notebooks/gender_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#imports

import gradio as gr
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
import torch
import librosa
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load model and processor
model_name = "prithivMLmods/Common-Voice-Geneder-Detection"
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
processor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model.to(device)  # Move model to GPU
model.eval()      # Set to evaluation mode



# Label mapping
id2label = {
    "0": "female",
    "1": "male"
}

def classify_audio(audio_path):
    # Load and resample audio to 16kHz
    speech, sample_rate = librosa.load(audio_path, sr=16000)

    # Process audio
    inputs = processor(
        speech,
        sampling_rate=sample_rate,
        return_tensors="pt",
        padding=True
    )

    # Move inputs to the same device as the model
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist()

    prediction = {
        id2label[str(i)]: round(probs[i], 3) for i in range(len(probs))
    }

    return prediction

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

cuda


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
print(os.getcwd())
folder_path_train_real = "/content/drive/MyDrive/for-dataset/for-original/for-original/training/real"

results_train = {}  # Store predictions

for filename in os.listdir(folder_path_train_real):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_train_real, filename)
        prediction = classify_audio(file_path)
        results_train[filename] = prediction
df_train = pd.DataFrame.from_dict(results_train, orient='index')  # rows = filenames
df_train.index.name = 'filename'  # optional
df_train.reset_index(inplace=True)  # make filename a column instead of index

df_gender_train=df_train.copy()

df_prediction = df_gender_train[['female','male']].idxmax(axis=1)
df_result_train = pd.concat([df_gender_train,df_prediction], axis=1)
print(df_result_train)
df_result_train=df_result_train.drop(df_result_train.columns[[1,2]],axis=1)
df_result_train.rename(columns={0: 'predictions'}, inplace=True)
df_result_train



folder_path_test_real  = "/content/drive/MyDrive/for-dataset/for-original/for-original/testing/real"
results_test={}

for filename in os.listdir(folder_path_test_real):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_test_real, filename)
        prediction = classify_audio(file_path)
        results_test[filename] = prediction
df_test = pd.DataFrame.from_dict(results_test, orient='index')  # rows = filenames
df_test.index.name = 'filename'  # optional
df_test.reset_index(inplace=True)  # make filename a column instead of index

df_gender_test=df_test.copy()

df_prediction = df_gender_test[['female','male']].idxmax(axis=1)
df_result_test = pd.concat([df_gender_test,df_prediction], axis=1)
print(df_result_test)
df_result_test=df_result_test.drop(df_result_test.columns[[1,2]],axis=1)
df_result_test.rename(columns={0: 'predictions'}, inplace=True)
df_result_test


folder_path_validation_real  = "/content/drive/MyDrive/for-dataset/for-original/for-original/validation/real"
results_validation={}

for filename in os.listdir(folder_path_validation_real):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_validation_real, filename)
        prediction = classify_audio(file_path)
        results_validation[filename] = prediction
df_validation = pd.DataFrame.from_dict(results_validation, orient='index')  # rows = filenames
df_validation.index.name = 'filename'  # optional
df_validation.reset_index(inplace=True)  # make filename a column instead of index

df_gender_validation=df_validation.copy()

df_prediction = df_gender_validation[['female','male']].idxmax(axis=1)
df_result_validation = pd.concat([df_gender_validation,df_prediction], axis=1)
print(df_result_validation)
df_result_validation=df_result_validation.drop(df_result_validation.columns[[1,2]],axis=1)
df_result_validation.rename(columns={0: 'predictions'}, inplace=True)
df_result_validation



# Add dataset labels
df_result_test['dataset'] = 'Test'
df_result_train['dataset'] = 'Train'
df_result_validation['dataset'] = 'Validation'

# Combine all three DataFrames
combined = pd.concat([
    df_result_test,
    df_result_train,
    df_result_validation
])

plt.figure(figsize=(8,5))

# Plot gender distribution grouped by dataset
ax = sns.countplot(
    x='predictions',
    hue='dataset',
    data=combined
)

# Add numbers on top of each bar
for p in ax.patches:
    height = p.get_height()
    ax.text(
        p.get_x() + p.get_width()/2,
        height + 0.05,
        str(int(height)),
        ha='center'
    )

plt.title('Gender Distribution: Train vs Test vs Validation for Real Samples')
plt.xlabel('Predicted Gender')
plt.ylabel('Count')
plt.show()

/content


In [None]:
folder_path_train_fake = "/content/drive/MyDrive/for-dataset/for-2sec/for-2seconds/training/fake"

results_train = {}  # Store predictions

for filename in os.listdir(folder_path_train_fake):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_train_fake, filename)
        prediction = classify_audio(file_path)
        results_train[filename] = prediction
df_train = pd.DataFrame.from_dict(results_train, orient='index')  # rows = filenames
df_train.index.name = 'filename'  # optional
df_train.reset_index(inplace=True)  # make filename a column instead of index

df_gender_train=df_train.copy()

df_prediction = df_gender_train[['female','male']].idxmax(axis=1)
df_result_train = pd.concat([df_gender_train,df_prediction], axis=1)
print(df_result_train)
df_result_train=df_result_train.drop(df_result_train.columns[[1,2]],axis=1)
df_result_train.rename(columns={0: 'predictions'}, inplace=True)
df_result_train



folder_path_test_fake = "/content/drive/MyDrive/for-dataset/for-2sec/for-2seconds/testing/fake"
results_test={}

for filename in os.listdir(folder_path_test_fake):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_test_fake, filename)
        prediction = classify_audio(file_path)
        results_test[filename] = prediction
df_test = pd.DataFrame.from_dict(results_test, orient='index')  # rows = filenames
df_test.index.name = 'filename'  # optional
df_test.reset_index(inplace=True)  # make filename a column instead of index

df_gender_test=df_test.copy()

df_prediction = df_gender_test[['female','male']].idxmax(axis=1)
df_result_test = pd.concat([df_gender_test,df_prediction], axis=1)
print(df_result_test)
df_result_test=df_result_test.drop(df_result_test.columns[[1,2]],axis=1)
df_result_test.rename(columns={0: 'predictions'}, inplace=True)
df_result_test


folder_path_validation_fake  = "/content/drive/MyDrive/for-dataset/for-2sec/for-2seconds/validation/fake"
results_validation={}

for filename in os.listdir(folder_path_validation_fake):
    if filename.endswith(".wav"):  # Only process audio files
        file_path = os.path.join(folder_path_validation_fake, filename)
        prediction = classify_audio(file_path)
        results_validation[filename] = prediction
df_validation = pd.DataFrame.from_dict(results_validation, orient='index')  # rows = filenames
df_validation.index.name = 'filename'  # optional
df_validation.reset_index(inplace=True)  # make filename a column instead of index

df_gender_validation=df_validation.copy()

df_prediction = df_gender_validation[['female','male']].idxmax(axis=1)
df_result_validation = pd.concat([df_gender_validation,df_prediction], axis=1)
print(df_result_validation)
df_result_validation=df_result_validation.drop(df_result_validation.columns[[1,2]],axis=1)
df_result_validation.rename(columns={0: 'predictions'}, inplace=True)
df_result_validation



# Add dataset labels
df_result_test['dataset'] = 'Test'
df_result_train['dataset'] = 'Train'
df_result_validation['dataset'] = 'Validation'

# Combine all three DataFrames
combined = pd.concat([
    df_result_test,
    df_result_train,
    df_result_validation
])

plt.figure(figsize=(8,5))

# Plot gender distribution grouped by dataset
ax = sns.countplot(
    x='predictions',
    hue='dataset',
    data=combined
)

# Add numbers on top of each bar
for p in ax.patches:
    height = p.get_height()
    ax.text(
        p.get_x() + p.get_width()/2,
        height + 0.05,
        str(int(height)),
        ha='center'
    )

plt.title('Gender Distribution: Train vs Test vs Validation for Fake Samples')
plt.xlabel('Predicted Gender')
plt.ylabel('Count')
plt.show()

In [1]:
from utils.audio_utils import *

ModuleNotFoundError: No module named 'utils'