In [1]:
from textgrid import TextGrid
import os
import librosa
import pandas as pd
from IPython.display import display, Audio

In [31]:
dataset_dir = '../audioData/VAD_Kaggle/Data/'
annot_dir = os.path.join(dataset_dir, 'Annotation')
audio_dir = os.path.join(dataset_dir, 'Audio')

audioFile_list = []
annotation_list = []
label_list = []

for i in os.listdir(audio_dir):
    if i == 'Noizeus':
        continue
    else:
        new = os.path.join(audio_dir, i)
        for j in os.listdir(new):
            new_ = os.path.join(new, j)
            for k in os.listdir(new_):
                audioFile_list.append(os.path.join(new_,k))
                label_list.append(i)
                l = k.split('.')[0]+'.TextGrid'
                annotation_list.append(os.path.join(annot_dir,i, j, l))
    


In [6]:
#dataset_df = pd.DataFrame({'audio_file':audioFile_list, 'annotation_file':annotation_list, 'label':label_list})
dataset_df = pd.read_csv('kaggle_VAD.csv')
print(dataset_df.columns)

Index(['audio_file', 'annotation_file', 'label'], dtype='object')


In [14]:
import numpy as np
import librosa

def extract_features(file_path):
    y, sr = librosa.load(file_path)
    
    # Extract features
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr, n_fft=512).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_fft=512).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=512).T, axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr).T, axis=0)
    
    return np.hstack([mfccs, chroma, mel, contrast, tonnetz])

features = np.array([extract_features(file) for file in dataset_df['audio_file']])
labels = dataset_df['label']



In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.3, random_state=42, shuffle=True)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Initialise models
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}

# Train and store models
trained_models = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    trained_models[name] = model



In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate models
evaluation_results = {}
for model_name, model in trained_models.items():
    y_pred = model.predict(X_test)
    evaluation_results[model_name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred)
    }

# Print evaluation results
for model_name, metrics in evaluation_results.items():
    print(f"Model: {model_name}")
    for metric_name, metric_value in metrics.items():
        print(f"{metric_name}: {metric_value:.2f}")
    print()


Model: Logistic Regression
Accuracy: 0.98
Precision: 1.00
Recall: 0.95
F1-Score: 0.97

Model: Decision Tree
Accuracy: 0.94
Precision: 0.93
Recall: 0.91
F1-Score: 0.92

Model: Random Forest
Accuracy: 0.97
Precision: 1.00
Recall: 0.91
F1-Score: 0.95

Model: SVM
Accuracy: 0.96
Precision: 1.00
Recall: 0.90
F1-Score: 0.95

Model: KNN
Accuracy: 0.98
Precision: 0.98
Recall: 0.97
F1-Score: 0.97

Model: AdaBoost
Accuracy: 0.96
Precision: 1.00
Recall: 0.90
F1-Score: 0.95

Model: Gradient Boosting
Accuracy: 0.96
Precision: 1.00
Recall: 0.90
F1-Score: 0.95



In [19]:
pd.DataFrame(evaluation_results)

Unnamed: 0,Logistic Regression,Decision Tree,Random Forest,SVM,KNN,AdaBoost,Gradient Boosting
Accuracy,0.980392,0.941176,0.96732,0.960784,0.980392,0.960784,0.960784
Precision,1.0,0.929825,1.0,1.0,0.982456,1.0,1.0
Recall,0.948276,0.913793,0.913793,0.896552,0.965517,0.896552,0.896552
F1-Score,0.973451,0.921739,0.954955,0.945455,0.973913,0.945455,0.945455


In [23]:
import joblib

# Save the models with their names
for name, model in trained_models.items():
    joblib.dump(model, f'trained_models/speakerGender_KaggleVAD/{name}.pkl')

In [26]:
joblib.dump(scaler, f'trained_models/speakerGender_KaggleVAD/scaler.pkl')
joblib.dump(label_encoder, f'trained_models/speakerGender_KaggleVAD/label_encoder.pkl')

['trained_models/speakerGender_KaggleVAD/label_encoder.pkl']

In [30]:
import huggingface_hub

In [24]:
vox_celebNew = '../audioData/VoxCeleb/voxceleb_enrichment_age_gender/dataset/final_dataframe_extended.csv'
pd.read_csv(vox_celebNew).head(10)

Unnamed: 0,Name,gender_wiki,birth_date_wiki,nationality_wiki,gender_dbpedia,birth_date_dbpedia,nationality_dbpedia,gender_gkg,birth_date_gkg,nationality_gkg,...,description,year_in_title,VoxCeleb_ID,gender,birth_year,year_upload_yt,recording_year,recording_year_title_only,speaker_age,speaker_age_title_only
0,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Interview mit Arnd Peiffer beim abschlie\u00d...",2013.0,id00778,male,1987.0,2013.0,2013.0,2013.0,26.0,26.0
1,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""It was a successful first competition for the...",2017.0,id00778,male,1987.0,2017.0,,2017.0,,30.0
2,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""\u00d6stersund-2016. Comments from Benedikt D...",2016.0,id00778,male,1987.0,2016.0,2016.0,2016.0,29.0,29.0
3,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Interview mit Arnd Peiffer bei der DSV-Einkle...",2012.0,id00778,male,1987.0,2012.0,,2012.0,,25.0
4,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Interview nach dem Verfolgungsrennen in Konti...",,id00778,male,1987.0,2010.0,,,,
5,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Drei Schie\u00dffehler im 20-Kilometer-Einzel...",2013.0,id00778,male,1987.0,2013.0,,2013.0,,26.0
6,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Drei Schie\u00dffehler und knapp dreieinhalb ...",2013.0,id00778,male,1987.0,2013.0,,2013.0,,26.0
7,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Peiffer und Lesser blicken zur\u00fcck""",,id00778,male,1987.0,2017.0,,,,
8,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Arnd Peiffer weiss genau, was er will!""",,id00778,male,1987.0,2015.0,,,,
9,Arnd Peiffer,male,1987-03-18,Germany,male,1987-3-18,,male,1987-03-18,Germany,...,"""Endlich Edelmetall f\u00fcr die DSV-Biathlete...",2013.0,id00778,male,1987.0,2013.0,,2013.0,,26.0
