In [None]:
# Install required packages in Colab
!pip install librosa scikit-learn numpy pandas matplotlib seaborn soundfile

In [None]:
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle
import os
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

## Step 1: Define Feature Extraction (Must Match Backend)

This function must extract features EXACTLY as the backend does.

In [None]:
def extract_voice_features(audio_path):
    """
    Extract voice features using librosa - MUST MATCH backend preprocessing
    Returns feature vector matching backend's preprocess_audio function
    """
    # Load audio with librosa
    y, sr = librosa.load(audio_path, sr=22050)
    
    # Extract MFCC features (standard for voice analysis)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_std = np.std(mfccs, axis=1)
    
    # Extract spectral features
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_std = np.std(spectral_centroid)
    
    # Extract zero crossing rate
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_mean = np.mean(zcr)
    zcr_std = np.std(zcr)
    
    # Extract chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)
    chroma_std = np.std(chroma, axis=1)
    
    # Extract RMS energy
    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_std = np.std(rms)
    
    # Combine all features into a single vector (MUST MATCH BACKEND)
    features = np.concatenate([
        mfccs_mean,  # 13 features
        mfccs_std,   # 13 features
        [spectral_centroid_mean, spectral_centroid_std],  # 2 features
        [zcr_mean, zcr_std],  # 2 features
        chroma_mean,  # 12 features
        chroma_std,   # 12 features
        [rms_mean, rms_std]  # 2 features
    ])
    
    return features

## Step 2: Load Your Audio Dataset

**Option A: Upload your own labeled audio files**
- Create folders: `stressed_audio/` and `calm_audio/`
- Put audio files (.wav, .mp3) in respective folders
- Upload to Colab

**Option B: Use a public dataset (e.g., RAVDESS)**
- Map emotions to stress/calm
- Angry, Fearful, Sad ‚Üí Stressed (label 1)
- Happy, Calm, Neutral ‚Üí Not Stressed (label 0)

In [None]:
def load_audio_dataset(stressed_folder, calm_folder):
    """
    Load audio files from folders and extract features
    """
    X = []
    y = []
    
    # Load stressed audio (label 1)
    print("Loading stressed audio files...")
    if os.path.exists(stressed_folder):
        for filename in os.listdir(stressed_folder):
            if filename.endswith(('.wav', '.mp3', '.webm', '.ogg')):
                try:
                    audio_path = os.path.join(stressed_folder, filename)
                    features = extract_voice_features(audio_path)
                    X.append(features)
                    y.append(1)  # Stressed
                except Exception as e:
                    print(f"Error processing {filename}: {e}")
    
    # Load calm audio (label 0)
    print("Loading calm audio files...")
    if os.path.exists(calm_folder):
        for filename in os.listdir(calm_folder):
            if filename.endswith(('.wav', '.mp3', '.webm', '.ogg')):
                try:
                    audio_path = os.path.join(calm_folder, filename)
                    features = extract_voice_features(audio_path)
                    X.append(features)
                    y.append(0)  # Not stressed
                except Exception as e:
                    print(f"Error processing {filename}: {e}")
    
    return np.array(X), np.array(y)

# Load your dataset
# X, y = load_audio_dataset('path/to/stressed_audio', 'path/to/calm_audio')

# OR for quick testing, create synthetic data (REPLACE WITH REAL DATA FOR PRODUCTION)
print("Creating synthetic training data for demonstration...")
print("‚ö†Ô∏è REPLACE THIS WITH REAL LABELED AUDIO FILES FOR PRODUCTION!")
n_samples = 500
n_features = 56  # Must match feature vector size (13+13+2+2+12+12+2)
X = np.random.randn(n_samples, n_features)
y = np.random.randint(0, 2, n_samples)

print(f"Dataset loaded: {len(X)} samples, {X.shape[1]} features")
print(f"Class distribution: Stressed={np.sum(y==1)}, Calm={np.sum(y==0)}")

## Step 3: Visualize Features (Optional)

In [None]:
# Visualize feature distributions
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.hist(X[y==0, 0], alpha=0.5, label='Calm', bins=20)
plt.hist(X[y==1, 0], alpha=0.5, label='Stressed', bins=20)
plt.xlabel('MFCC 1 Mean')
plt.ylabel('Frequency')
plt.legend()
plt.title('Feature Distribution: MFCC 1')

plt.subplot(1, 3, 2)
plt.hist(X[y==0, 26], alpha=0.5, label='Calm', bins=20)
plt.hist(X[y==1, 26], alpha=0.5, label='Stressed', bins=20)
plt.xlabel('Spectral Centroid Mean')
plt.ylabel('Frequency')
plt.legend()
plt.title('Feature Distribution: Spectral Centroid')

plt.subplot(1, 3, 3)
plt.hist(X[y==0, 28], alpha=0.5, label='Calm', bins=20)
plt.hist(X[y==1, 28], alpha=0.5, label='Stressed', bins=20)
plt.xlabel('Zero Crossing Rate Mean')
plt.ylabel('Frequency')
plt.legend()
plt.title('Feature Distribution: ZCR')

plt.tight_layout()
plt.show()

## Step 4: Prepare Training Data

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Step 5: Train Model

In [None]:
# Train Random Forest Classifier
print("Training Random Forest model...")
model = RandomForestClassifier(
    n_estimators=150,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train_scaled, y_train)
print("‚úì Model training complete!")

# Cross-validation
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
print(f"\nCross-validation scores: {cv_scores}")
print(f"Mean CV accuracy: {cv_scores.mean():.3f} (+/- {cv_scores.std():.3f})")

## Step 6: Evaluate Model

In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.3f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Calm', 'Stressed']))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', 
            xticklabels=['Calm', 'Stressed'],
            yticklabels=['Calm', 'Stressed'])
plt.title('Confusion Matrix - Voice Stress Detection')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Feature importance
feature_names = [
    'MFCC_mean_' + str(i) for i in range(13)
] + [
    'MFCC_std_' + str(i) for i in range(13)
] + [
    'SpectralCentroid_mean', 'SpectralCentroid_std',
    'ZCR_mean', 'ZCR_std'
] + [
    'Chroma_mean_' + str(i) for i in range(12)
] + [
    'Chroma_std_' + str(i) for i in range(12)
] + [
    'RMS_mean', 'RMS_std'
]

importances = model.feature_importances_
indices = np.argsort(importances)[::-1][:15]  # Top 15 features

plt.figure(figsize=(12, 6))
plt.title('Top 15 Most Important Features')
plt.bar(range(15), importances[indices])
plt.xticks(range(15), [feature_names[i] for i in indices], rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Step 7: Export Model as .pkl

In [None]:
# Create a pipeline that includes the scaler and model
from sklearn.pipeline import Pipeline

# Create pipeline
voice_pipeline = Pipeline([
    ('scaler', scaler),
    ('classifier', model)
])

# Save model
model_filename = 'voice_stress_model.pkl'
with open(model_filename, 'wb') as f:
    pickle.dump(voice_pipeline, f)

print(f"‚úì Model saved as {model_filename}")
print(f"Model size: {os.path.getsize(model_filename) / 1024:.2f} KB")

# Download the model
print("\nDownloading model...")
files.download(model_filename)
print("‚úì Download complete! Place this file in backend/models/ folder")

## Step 8: Test the Model (Optional)

In [None]:
# Load the saved model
with open(model_filename, 'rb') as f:
    loaded_model = pickle.load(f)

# Test with a sample
sample = X_test[0:1]
prediction = loaded_model.predict(sample)
probability = loaded_model.predict_proba(sample)

print("Test prediction:")
print(f"Predicted class: {prediction[0]} ({'Stressed' if prediction[0] == 1 else 'Calm'})")
print(f"Probabilities: Calm={probability[0][0]:.3f}, Stressed={probability[0][1]:.3f}")
print(f"Actual class: {y_test[0]} ({'Stressed' if y_test[0] == 1 else 'Calm'})")

---

## üéâ Voice Model Training Complete!

**Next Steps:**
1. Download the `voice_stress_model.pkl` file
2. Place it in your project's `backend/models/` folder
3. Ensure you also have the face model trained
4. Start the backend server and test the complete pipeline!

**Tips for Better Voice Models:**
- Use a diverse audio dataset (different speakers, ages, languages)
- Balance classes (equal stressed/calm samples)
- Consider pitch, tempo, and prosody features
- Try SVM with RBF kernel for potentially better results
- Add data augmentation (time stretching, pitch shifting)
- Experiment with deep learning (CNN, LSTM) for even better accuracy

---