First we import libraries & load data

In [1]:
import pandas as pd
import os
import joblib  # For saving the model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Load dataset
FILE_PATH = '/content/audio_features.csv'
df = pd.read_csv(FILE_PATH)

print("Data loaded successfully:")
print(df.head())

Data loaded successfully:
                  filename member_id   phrase augmentation      mfcc_1  \
0  confirm_transaction.wav     Alice  confirm     original -340.400543   
1  confirm_transaction.wav     Alice  confirm  pitch_shift -359.502777   
2  confirm_transaction.wav     Alice  confirm        noise -258.759150   
3          yes_approve.wav     Alice  approve     original -339.216797   
4          yes_approve.wav     Alice  approve  pitch_shift -357.222809   

       mfcc_2     mfcc_3     mfcc_4     mfcc_5     mfcc_6     mfcc_7  \
0  153.440659  11.435617  35.139812  -8.880391  33.800430  -2.553989   
1  137.338013  16.110950  17.321766   2.134316  25.634069  -4.171609   
2   81.416022  36.868308  19.619398   7.742003  17.061521   9.398395   
3  150.816895   9.160224  44.852089 -10.228117  32.047153 -10.446824   
4  133.149139  19.207508  25.644648  -0.325113  20.085651  -9.758201   

      mfcc_8    mfcc_9    mfcc_10   mfcc_11   mfcc_12   mfcc_13  \
0  14.789642  3.226717   9.45

Second we prepare the data(pre-processing) and here we will mainly prepare the features and targets.

In [2]:
# Separate Features (X) and Target (y)

# Our target is the 'member_id'
y_labels = df['member_id']

# Our features are all columns EXCEPT the ones we list here
X = df.drop(columns=['filename', 'member_id', 'phrase', 'augmentation'])

print("\nOriginal Features (X):")
print(X.head())

print("\nOriginal Target (y):")
print(y_labels.head())

# We encode the Target(y)
encoder = LabelEncoder()
y = encoder.fit_transform(y_labels)

# Print the mapping (so we know which number is which person)
print("\nLabel Encoding:")
for i, name in enumerate(encoder.classes_):
    print(f"{name}  ->  {i}")

# Scale the Features (X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("\nScaled features (first 5 rows):")
print(X_scaled[:5])


Original Features (X):
       mfcc_1      mfcc_2     mfcc_3     mfcc_4     mfcc_5     mfcc_6  \
0 -340.400543  153.440659  11.435617  35.139812  -8.880391  33.800430   
1 -359.502777  137.338013  16.110950  17.321766   2.134316  25.634069   
2 -258.759150   81.416022  36.868308  19.619398   7.742003  17.061521   
3 -339.216797  150.816895   9.160224  44.852089 -10.228117  32.047153   
4 -357.222809  133.149139  19.207508  25.644648  -0.325113  20.085651   

      mfcc_7     mfcc_8    mfcc_9    mfcc_10   mfcc_11   mfcc_12   mfcc_13  \
0  -2.553989  14.789642  3.226717   9.459011  9.104712  0.221739  4.942339   
1  -4.171609  17.237194 -0.098724  13.077312 -3.479074  5.789270  0.994323   
2   9.398395   8.086155  8.769283   5.778931  7.310243  3.426549  3.627400   
3 -10.446824  14.669823  0.494543   2.670831  6.588624 -5.343949  6.006868   
4  -9.758201  16.647306 -7.959214  11.459781 -4.397164  9.486970  5.184935   

   spectral_rolloff  rms_energy  
0       4686.679232    0.054667  


Next we'll split the data and train the model

In [3]:
# Split data: 70% for training, 30% for testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled,
    y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

print(f"\nTraining samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")

# Train the Model
print("\nTraining the Random Forest model...")

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)

print("Model training complete.")


Training samples: 16
Testing samples: 8

Training the Random Forest model...
Model training complete.


Finally we evaluate the model  and use the model to make predictions on the test data (which it has never seen) and compare those predictions to the true labels. This tells us how good our model is.

In [4]:
# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate Performance
# Get the names
y_test_names = encoder.inverse_transform(y_test)
y_pred_names = encoder.inverse_transform(y_pred)

print("\n--- Model Evaluation ---")
print("Test Set Predictions vs. True Labels:")
print(pd.DataFrame({'True_Label': y_test_names, 'Predicted_Label': y_pred_names}))

# Calculate metrics as required
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted') # 'weighted' accounts for label imbalance

print(f"\nAccuracy: {accuracy * 100:.2f}%")
print(f"Weighted F1-Score: {f1:.4f}")

# The classification_report gives F1-Score for each person (very useful)
print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=encoder.classes_))


--- Model Evaluation ---
Test Set Predictions vs. True Labels:
  True_Label Predicted_Label
0      Alice           Alice
1      Alice           Alice
2     cedric          cedric
3  Armstrong       Armstrong
4  Armstrong       Armstrong
5     yassin          yassin
6     yassin          yassin
7     cedric           Alice

Accuracy: 87.50%
Weighted F1-Score: 0.8667

Detailed Classification Report:
              precision    recall  f1-score   support

       Alice       0.67      1.00      0.80         2
   Armstrong       1.00      1.00      1.00         2
      cedric       1.00      0.50      0.67         2
      yassin       1.00      1.00      1.00         2

    accuracy                           0.88         8
   macro avg       0.92      0.88      0.87         8
weighted avg       0.92      0.88      0.87         8



Now we save our model

In [5]:
import os
import joblib
MODEL_DIR = '../models/'
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, 'voice_model.joblib')
SCALER_PATH = os.path.join(MODEL_DIR, 'voice_scaler.joblib')
ENCODER_PATH = os.path.join(MODEL_DIR, 'voice_encoder.joblib')

joblib.dump(model, MODEL_PATH)
joblib.dump(scaler, SCALER_PATH)
joblib.dump(encoder, ENCODER_PATH)
print("\nArtifacts Saved")
print(f"Model saved to: {MODEL_PATH}")
print(f"Scaler saved to: {SCALER_PATH}")
print(f"Encoder saved to: {ENCODER_PATH}")


Artifacts Saved
Model saved to: ../models/voice_model.joblib
Scaler saved to: ../models/voice_scaler.joblib
Encoder saved to: ../models/voice_encoder.joblib
