# Feature Selection and Dimensionality Reduction Demo

This notebook demonstrates how to use PCA and mutual information for feature selection and dimensionality reduction on audio features.

In [None]:
import numpy as np
import pandas as pd
from AFX.utils.selectors import pca_reducer, mutual_info_selector, correlation_selector

# Load a previously extracted feature set (from batch demo)
df = pd.read_json('batch_features.json').T.reset_index(drop=True)

# Drop non-numeric columns and missing values
feature_cols = [col for col in df.columns if col not in ['file', 'error']]
df = df.dropna(subset=feature_cols)
X = df[feature_cols].values

## Dimensionality Reduction with PCA

In [None]:
# Reduce to 2 principal components for visualization
X_pca, pca = pca_reducer({k: df[k].values for k in feature_cols}, n_components=2)

import matplotlib.pyplot as plt
plt.figure(figsize=(6, 5))
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.7)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA of Audio Features')
plt.tight_layout()
plt.show()

## Feature Selection with Mutual Information

Suppose we have labels (e.g., class IDs) for supervised selection. Here, we simulate random labels for demonstration.

In [None]:
# Simulate random labels (replace with real labels for your dataset)
y = np.random.randint(0, 2, size=X.shape[0])
selected_idx = mutual_info_selector(X, y, k=5)
selected_features = [feature_cols[i] for i in selected_idx]
print('Top 5 features by mutual information:', selected_features)

## Correlation-Based Feature Filtering

In [None]:
selected_corr_idx = correlation_selector(X, threshold=0.9)
selected_corr_features = [feature_cols[i] for i in selected_corr_idx]
print('Features after correlation filtering:', selected_corr_features)

## Next Steps
- Use selected features for ML models.
- Try with real class labels for supervised selection.
- Experiment with different thresholds and selectors.