In [2]:
import os
import librosa
import pandas as pd

In [3]:
def load_audio_files(directory):
    audio_data = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            audio, sr = librosa.load(file_path, sr=None)  # Load the audio file
            audio_data.append((filename, audio, sr))  # Store the filename, audio data, and sample rate
    return audio_data

audio_directory = r'/home/gautham/release_in_the_wild'
audio_list = load_audio_files(audio_directory)

In [4]:
def extract_features(audio_data):
    features = []
    for filename, audio, sr in audio_data:
        # Extract MFCCs (Mel-frequency cepstral coefficients) as features
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        features.append((filename, mfccs))
    return features

# Extract MFCC features from the loaded audio
audio_features = extract_features(audio_list)

In [5]:
metadata_file = r'/home/gautham/release_in_the_wild/meta.csv'
metadata_df = pd.read_csv(metadata_file)
print(metadata_df.head(10))

    file               speaker      label
0  0.wav         Alec Guinness      spoof
1  1.wav         Alec Guinness      spoof
2  2.wav          Barack Obama      spoof
3  3.wav         Alec Guinness      spoof
4  4.wav  Christopher Hitchens  bona-fide
5  5.wav              Ayn Rand  bona-fide
6  6.wav          Barack Obama      spoof
7  7.wav          Donald Trump  bona-fide
8  8.wav          Donald Trump  bona-fide
9  9.wav         Alec Guinness  bona-fide


In [6]:
metadata_df.columns = metadata_df.columns.str.strip()

In [7]:
merged_data = pd.merge(metadata_df, pd.DataFrame(audio_list, columns=['Filename', 'Audio', 'SampleRate']), left_on='file', right_on='Filename', how='inner')

In [8]:
print(merged_data.head(10))

    file               speaker      label Filename  \
0  0.wav         Alec Guinness      spoof    0.wav   
1  1.wav         Alec Guinness      spoof    1.wav   
2  2.wav          Barack Obama      spoof    2.wav   
3  3.wav         Alec Guinness      spoof    3.wav   
4  4.wav  Christopher Hitchens  bona-fide    4.wav   
5  5.wav              Ayn Rand  bona-fide    5.wav   
6  6.wav          Barack Obama      spoof    6.wav   
7  7.wav          Donald Trump  bona-fide    7.wav   
8  8.wav          Donald Trump  bona-fide    8.wav   
9  9.wav         Alec Guinness  bona-fide    9.wav   

                                               Audio  SampleRate  
0  [0.0008559248, 5.8470447e-05, 0.0007754833, 0....       16000  
1  [-0.00036084154, 0.000937727, -0.00047797145, ...       16000  
2  [5.2883388e-05, 0.00010718728, 0.00014177158, ...       16000  
3  [0.0036943506, 0.0015072429, -0.0018338299, -0...       16000  
4  [-0.00015450451, -0.0002064928, 0.00040216927,...       16000  
5  

In [15]:
from sklearn.model_selection import train_test_split

# Split the data into train (70%) and test (30%) sets
train_data, test_data = train_test_split(merged_data, test_size=0.3, random_state=42)

# Further split the test data into test (15%) and evaluation (15%) sets
test_data, eval_data = train_test_split(test_data, test_size=0.5, random_state=42)

In [16]:
labels = merged_data['label']

# Convert MFCCs to a format suitable for machine learning
X = [mfccs for filename, mfccs in audio_features]
y = [1 if label == 'spoof' else 0 for label in labels]  # Convert labels to binary (1 for spoof, 0 for bona-fide)

In [19]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Determine the maximum length of MFCC feature vectors
max_length = max(len(mfccs[0]) for mfccs in X_train)

# Function to pad or truncate and reshape MFCCs
def preprocess_mfccs(mfccs, max_length):
    if len(mfccs[0]) < max_length:
        padding = max_length - len(mfccs[0])
        mfccs = np.pad(mfccs, ((0, 0), (0, padding)), mode='constant')
    elif len(mfccs[0]) > max_length:
        mfccs = mfccs[:, :max_length]
    return mfccs.reshape(-1)  # Reshape to a 1D array

# Apply padding or truncation and reshape to all feature vectors
X_train = [preprocess_mfccs(mfccs, max_length) for mfccs in X_train]
X_test = [preprocess_mfccs(mfccs, max_length) for mfccs in X_test]
X_eval = [preprocess_mfccs(mfccs, max_length) for mfccs in X_eval]

# Apply StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_eval = scaler.transform(X_eval)

In [20]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Create an SVM classifier
svm_model = SVC(kernel='linear', random_state=42)

# Train the model on the training data
svm_model.fit(X_train, y_train)

# Predict on the evaluation set
y_eval_pred = svm_model.predict(X_eval)

# Calculate accuracy
accuracy = accuracy_score(y_eval, y_eval_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_eval, y_eval_pred)
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_eval, y_eval_pred)
print("Recall:", recall)

# Calculate F1-score
f1 = f1_score(y_eval, y_eval_pred)
print("F1-Score:", f1)

# Confusion matrix
conf_matrix = confusion_matrix(y_eval, y_eval_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification report
class_report = classification_report(y_eval, y_eval_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.6232431298510593
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
Confusion Matrix:
[[2971    0]
 [1796    0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.62      1.00      0.77      2971
           1       0.00      0.00      0.00      1796

    accuracy                           0.62      4767
   macro avg       0.31      0.50      0.38      4767
weighted avg       0.39      0.62      0.48      4767



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Create a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
rf_model.fit(X_train, y_train)

# Predict on the evaluation set
y_eval_pred = rf_model.predict(X_eval)

# Calculate accuracy
accuracy = accuracy_score(y_eval, y_eval_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_eval, y_eval_pred)
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_eval, y_eval_pred)
print("Recall:", recall)

# Calculate F1-score
f1 = f1_score(y_eval, y_eval_pred)
print("F1-Score:", f1)

# Confusion matrix
conf_matrix = confusion_matrix(y_eval, y_eval_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification report
class_report = classification_report(y_eval, y_eval_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.604992657856094
Precision: 0.3753581661891118
Recall: 0.07293986636971046
F1-Score: 0.12214452214452215
Confusion Matrix:
[[2753  218]
 [1665  131]]
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.93      0.75      2971
           1       0.38      0.07      0.12      1796

    accuracy                           0.60      4767
   macro avg       0.50      0.50      0.43      4767
weighted avg       0.53      0.60      0.51      4767

