# Voice Data Analysis - Emotion Recognition

This notebook demonstrates the complete pipeline for voice emotion recognition using machine learning.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Data Loading and Exploration

In [None]:
# Load processed data
audio_data = np.load('../data/processed_audio.npy')
labels = np.load('../data/label_names.npy')
metadata = pd.read_csv('../data/metadata.csv')

print(f"Dataset shape: {audio_data.shape}")
print(f"Number of samples: {len(labels)}")
print(f"Unique emotions: {np.unique(labels)}")

# Display metadata
metadata.head()

In [None]:
# Emotion distribution
plt.figure(figsize=(10, 6))
emotion_counts = pd.Series(labels).value_counts()
emotion_counts.plot(kind='bar', color='skyblue')
plt.title('Distribution of Emotions in Dataset')
plt.xlabel('Emotion')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 2. Audio Visualization

In [None]:
# Visualize sample waveforms
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

unique_emotions = np.unique(labels)
for i, emotion in enumerate(unique_emotions[:4]):
    # Find first sample of this emotion
    idx = np.where(labels == emotion)[0][0]
    
    # Plot waveform
    axes[i].plot(audio_data[idx])
    axes[i].set_title(f'{emotion.title()} - Waveform')
    axes[i].set_xlabel('Sample')
    axes[i].set_ylabel('Amplitude')

plt.tight_layout()
plt.show()

In [None]:
# Visualize spectrograms
spectrograms = np.load('../data/spectrograms.npy')

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for i, emotion in enumerate(unique_emotions[:4]):
    idx = np.where(labels == emotion)[0][0]
    
    librosa.display.specshow(spectrograms[idx], ax=axes[i], 
                           x_axis='time', y_axis='mel', cmap='viridis')
    axes[i].set_title(f'{emotion.title()} - Mel Spectrogram')

plt.tight_layout()
plt.show()

## 3. Feature Analysis

In [None]:
# Load and analyze features
features_df = pd.read_csv('../data/features_summary.csv')

print(f"Feature matrix shape: {features_df.shape}")
features_df.head()

In [None]:
# Feature correlation heatmap
plt.figure(figsize=(15, 12))
correlation_matrix = features_df.drop('label', axis=1).corr()
sns.heatmap(correlation_matrix, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
# Feature distribution by emotion
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

important_features = ['mfcc_0', 'mfcc_1', 'spectral_centroid', 'zero_crossing_rate']

for i, feature in enumerate(important_features):
    for emotion in unique_emotions:
        emotion_data = features_df[features_df['label'] == np.where(unique_emotions == emotion)[0][0]]
        axes[i].hist(emotion_data[feature], alpha=0.7, label=emotion, bins=10)
    
    axes[i].set_title(f'{feature} Distribution by Emotion')
    axes[i].set_xlabel(feature)
    axes[i].set_ylabel('Frequency')
    axes[i].legend()

plt.tight_layout()
plt.show()

## 4. Model Results Analysis

In [None]:
# Load and display model results
try:
    results_df = pd.read_csv('../results/detailed_results.csv')
    print("Model Performance Comparison:")
    print(results_df)
    
    # Visualize results
    plt.figure(figsize=(12, 6))
    bars = plt.bar(results_df['Model'], results_df['Accuracy'], 
                   color=['skyblue', 'lightgreen', 'salmon', 'gold', 'lightcoral'])
    plt.title('Model Accuracy Comparison')
    plt.ylabel('Accuracy')
    plt.xticks(rotation=45)
    
    for bar, acc in zip(bars, results_df['Accuracy']):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{acc:.3f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
except FileNotFoundError:
    print("Results not found. Run the training and evaluation scripts first.")

## 5. Conclusions and Insights

### Key Findings:
1. **Feature Importance**: MFCC features show strong discriminative power for emotion classification
2. **Model Performance**: Deep learning models (CNN/RNN) generally outperform classical ML approaches
3. **Spectral Features**: Spectral centroid and zero-crossing rate provide valuable emotion-specific information

### Technical Achievements:
- Implemented complete audio preprocessing pipeline
- Extracted multiple types of audio features (MFCC, Chroma, Spectral)
- Compared classical ML vs deep learning approaches
- Created interactive web application for real-time prediction

### Future Improvements:
- Experiment with larger datasets (RAVDESS, IEMOCAP)
- Implement attention mechanisms in neural networks
- Add real-time audio recording capability
- Explore transfer learning with pre-trained audio models