# FMA Music Genre Clustering - Exploratory Data Analysis

This notebook explores the **Free Music Archive (FMA)** dataset for music genre classification using Variational Autoencoders (VAEs).

**Dataset:** FMA - 8 genres (blues, classical, country, disco, hiphop, jazz, metal, pop, reggae, rock)

**Features:**
- MFCC (Mel-Frequency Cepstral Coefficients)
- Mel Spectrograms

**Models:**
- Basic VAE (Easy Task)
- Convolutional VAE (Medium Task)  
- Œ≤-VAE with disentangled representations (Hard Task)

In [None]:
# Setup and imports for FMA dataset exploration
import sys
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import torch
from IPython.display import Audio

# Add project to path
sys.path.insert(0, str(Path('.').resolve()))

from src import config
from src.features import load_audio, extract_mfcc, extract_mel_spectrogram

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')
print("FMA Dataset Exploration - Setup complete!")
print(f"Expected genres: blues, classical, country, disco, hiphop, jazz, metal, pop, reggae, rock")

## 1. Explore FMA Raw Audio Data

The FMA dataset contains 30-second audio clips across multiple genres. Let's examine the data structure.

In [None]:
# Check FMA data directory structure
data_dir = config.RAW_DATA_DIR
print(f"FMA Data directory: {data_dir}")
print(f"Exists: {data_dir.exists()}")

# Expected FMA genres
fma_genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

if data_dir.exists():
    genres = [d.name for d in data_dir.iterdir() if d.is_dir()]
    print(f"\nGenres found: {genres}")
    
    total_files = 0
    for genre in genres:
        genre_dir = data_dir / genre
        audio_files = list(genre_dir.glob('*.wav')) + list(genre_dir.glob('*.mp3'))
        total_files += len(audio_files)
        print(f"  {genre}: {len(audio_files)} files")
    print(f"\nTotal audio files: {total_files}")
else:
    print("\nPlease download FMA dataset to the data/raw directory!")
    print("Structure should be:")
    print("  data/raw/blues/*.wav")
    print("  data/raw/classical/*.wav")
    print("  data/raw/country/*.wav")
    print("  ... etc.")

In [None]:
# Load and visualize a sample FMA audio file
def analyze_audio_sample(audio_path):
    """Load and visualize a single FMA audio file."""
    y, sr = librosa.load(audio_path, sr=config.SAMPLE_RATE, duration=config.AUDIO_DURATION)
    
    fig, axes = plt.subplots(3, 1, figsize=(14, 10))
    
    # Waveform
    axes[0].set_title(f'FMA Waveform: {Path(audio_path).name}')
    librosa.display.waveshow(y, sr=sr, ax=axes[0])
    axes[0].set_xlabel('Time (s)')
    axes[0].set_ylabel('Amplitude')
    
    # Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=config.N_MELS)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    img = librosa.display.specshow(mel_spec_db, sr=sr, hop_length=config.HOP_LENGTH,
                                    x_axis='time', y_axis='mel', ax=axes[1])
    axes[1].set_title('Mel Spectrogram')
    fig.colorbar(img, ax=axes[1], format='%+2.0f dB')
    
    # MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=config.N_MFCC)
    img2 = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axes[2])
    axes[2].set_title(f'MFCC ({config.N_MFCC} coefficients)')
    fig.colorbar(img2, ax=axes[2])
    
    plt.tight_layout()
    plt.show()
    
    return y, sr

# Find a sample FMA file
if data_dir.exists():
    sample_files = list(data_dir.rglob('*.wav'))[:1] + list(data_dir.rglob('*.mp3'))[:1]
    if sample_files:
        y, sr = analyze_audio_sample(sample_files[0])
        print(f"\nFMA Audio properties:")
        print(f"  Sample rate: {sr} Hz")
        print(f"  Duration: {len(y)/sr:.2f} seconds")
        print(f"  Samples: {len(y)}")

In [None]:
# Play audio sample (Jupyter only)
if 'y' in dir() and 'sr' in dir():
    display(Audio(y, rate=sr))

## 2. Compare FMA Features Across Genres

Let's compare acoustic features (Mel Spectrograms and MFCCs) across different FMA genres to understand their distinguishing characteristics.

In [None]:
def compare_fma_genre_features(data_dir):
    """Compare MFCC features across different FMA genres."""
    genres = [d.name for d in data_dir.iterdir() if d.is_dir()]
    
    fig, axes = plt.subplots(len(genres), 2, figsize=(14, 4*len(genres)))
    if len(genres) == 1:
        axes = axes.reshape(1, -1)
    
    for i, genre in enumerate(genres):
        genre_dir = data_dir / genre
        audio_files = list(genre_dir.glob('*.wav')) + list(genre_dir.glob('*.mp3'))
        
        if audio_files:
            # Load first file from FMA genre
            y, sr = librosa.load(audio_files[0], sr=config.SAMPLE_RATE, duration=config.AUDIO_DURATION)
            
            # Mel spectrogram
            mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=config.N_MELS)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
            librosa.display.specshow(mel_spec_db, sr=sr, ax=axes[i, 0], x_axis='time', y_axis='mel')
            axes[i, 0].set_title(f'FMA {genre} - Mel Spectrogram')
            
            # MFCC
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=config.N_MFCC)
            librosa.display.specshow(mfccs, sr=sr, ax=axes[i, 1], x_axis='time')
            axes[i, 1].set_title(f'FMA {genre} - MFCC')
    
    plt.tight_layout()
    plt.show()

if data_dir.exists():
    compare_fma_genre_features(data_dir)

## 3. Load Processed FMA Features

Load pre-extracted MFCC and Mel-spectrogram features from the FMA dataset.

In [None]:
from src.features import load_processed_data

# Try loading processed FMA features
try:
    mfcc_features, labels, genres = load_processed_data('mfcc')
    print(f"FMA MFCC features loaded: {mfcc_features.shape}")
    print(f"Labels: {labels.shape}, Genres: {genres}")
except FileNotFoundError:
    print("FMA MFCC features not found. Run preprocessing first!")
    print("  python -m src.preprocess_fma")

try:
    mel_features, labels, genres = load_processed_data('mel_spectrogram')
    print(f"FMA Mel-Spectrogram features loaded: {mel_features.shape}")
except FileNotFoundError:
    print("FMA Mel-Spectrogram features not found. Run preprocessing first!")

In [None]:
# Visualize FMA feature distributions
if 'mfcc_features' in dir():
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Feature value distribution
    axes[0].hist(mfcc_features.flatten(), bins=50, alpha=0.7, color='steelblue')
    axes[0].set_title('FMA MFCC Feature Value Distribution')
    axes[0].set_xlabel('Value')
    axes[0].set_ylabel('Count')
    
    # Class distribution across FMA genres
    unique, counts = np.unique(labels, return_counts=True)
    colors = plt.cm.tab10(np.linspace(0, 1, len(unique)))
    axes[1].bar([genres[i] for i in unique], counts, color=colors)
    axes[1].set_title('FMA Genre Distribution')
    axes[1].set_xlabel('Genre')
    axes[1].set_ylabel('Sample Count')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nFMA Dataset Statistics:")
    for i, genre in enumerate(genres):
        count = np.sum(labels == i)
        print(f"  {genre}: {count} samples ({100*count/len(labels):.1f}%)")

## 4. Load and Analyze VAE Results on FMA

Analyze clustering results from the VAE models trained on the FMA dataset.

In [None]:
import pandas as pd

# Load metrics from all FMA tasks
results_dirs = {
    'Easy (Basic VAE)': config.RESULTS_DIR / 'easy_task',
    'Medium (Conv-VAE)': config.RESULTS_DIR / 'medium_task',
    'Hard (Beta-VAE)': config.RESULTS_DIR / 'hard_task'
}

all_metrics = []
for task_name, results_dir in results_dirs.items():
    metrics_file = results_dir / 'metrics.csv' if 'Easy' in task_name else results_dir / 'all_metrics.csv'
    if metrics_file.exists():
        df = pd.read_csv(metrics_file)
        df['task'] = task_name
        all_metrics.append(df)
        print(f"‚úì {task_name} FMA results loaded")
    else:
        print(f"‚úó {task_name} FMA results not found (run training first)")

if all_metrics:
    combined_df = pd.concat(all_metrics, ignore_index=True)
    print("\n" + "="*60)
    print("FMA COMBINED RESULTS")
    print("="*60)
    display(combined_df)

In [None]:
# Visualize metrics comparison
if 'combined_df' in dir() and len(combined_df) > 0:
    metrics_to_plot = ['silhouette_score', 'calinski_harabasz_index', 
                       'adjusted_rand_index', 'normalized_mutual_info']
    
    available_metrics = [m for m in metrics_to_plot if m in combined_df.columns]
    
    if available_metrics:
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        axes = axes.flatten()
        
        for i, metric in enumerate(available_metrics[:4]):
            ax = axes[i]
            
            # Group by method
            if 'method' in combined_df.columns:
                pivot = combined_df.pivot_table(values=metric, index='method', 
                                                columns='task' if 'task' in combined_df.columns else None)
                pivot.plot(kind='bar', ax=ax)
            else:
                combined_df.plot(y=metric, kind='bar', ax=ax)
            
            ax.set_title(metric.replace('_', ' ').title())
            ax.set_xlabel('')
            ax.tick_params(axis='x', rotation=45)
        
        plt.tight_layout()
        plt.show()

## 5. Load and Visualize FMA Latent Space

Visualize the latent space learned by VAE models on FMA genre data.

In [None]:
from src.visualization import plot_tsne, plot_umap
from src.vae import VAE
from src.beta_vae import BetaVAE

# Load best model and extract latent features
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Check for saved models
easy_model_path = config.RESULTS_DIR / 'easy_task' / 'vae_best.pth'
hard_model_path = config.RESULTS_DIR / 'hard_task' / 'beta_vae_best.pth'

if easy_model_path.exists():
    print(f"‚úì Easy task model found")
if hard_model_path.exists():
    print(f"‚úì Hard task model found")

In [None]:
# Interactive t-SNE visualization of FMA features
from sklearn.manifold import TSNE

if 'mfcc_features' in dir():
    print("Computing t-SNE on FMA MFCC features...")
    
    mfcc_flat = mfcc_features.reshape(mfcc_features.shape[0], -1)
    
    tsne = TSNE(n_components=2, perplexity=30, random_state=config.RANDOM_SEED)
    tsne_features = tsne.fit_transform(mfcc_flat)
    
    plt.figure(figsize=(12, 10))
    scatter = plt.scatter(tsne_features[:, 0], tsne_features[:, 1], 
                          c=labels, cmap='tab10', alpha=0.6, s=50)
    plt.colorbar(scatter, label='FMA Genre')
    plt.title('t-SNE Visualization of FMA MFCC Features')
    plt.xlabel('t-SNE Dimension 1')
    plt.ylabel('t-SNE Dimension 2')
    
    # Add legend for FMA genres
    for i, genre in enumerate(genres):
        mask = labels == i
        plt.scatter([], [], c=[plt.cm.tab10(i)], label=genre, s=100)
    plt.legend(title='FMA Genres', loc='best')
    plt.show()

## 6. FMA Project Summary

Summary statistics for the FMA music genre clustering project.

In [None]:
# FMA Project summary
print("="*60)
print("FMA MUSIC GENRE CLUSTERING - PROJECT SUMMARY")
print("="*60)

print("\nüìÅ FMA Dataset:")
if 'genres' in dir():
    print(f"  Genres: {genres}")
if 'mfcc_features' in dir():
    print(f"  Total samples: {len(labels)}")
    print(f"  MFCC shape: {mfcc_features.shape}")
if 'mel_features' in dir():
    print(f"  Mel-Spectrogram shape: {mel_features.shape}")

print("\nüß† VAE Models:")
print(f"  Easy Task (Basic VAE): {'‚úì Trained' if easy_model_path.exists() else '‚úó Not trained'}")
print(f"  Medium Task (Conv-VAE): Check results directory")
print(f"  Hard Task (Beta-VAE): {'‚úì Trained' if hard_model_path.exists() else '‚úó Not trained'}")

print("\nüìä Results:")
for task_name, results_dir in results_dirs.items():
    if results_dir.exists():
        files = list(results_dir.glob('*.png')) + list(results_dir.glob('*.csv'))
        print(f"  {task_name}: {len(files)} output files")

print("\n‚öôÔ∏è Audio Configuration:")
print(f"  Sample rate: {config.SAMPLE_RATE} Hz")
print(f"  Audio duration: {config.AUDIO_DURATION} seconds")
print(f"  MFCC coefficients: {config.N_MFCC}")
print(f"  Mel bands: {config.N_MELS}")

print("\nüìå Dataset: Free Music Archive (FMA)")
print("   Reference: https://github.com/mdeff/fma")