# Feature Visualization and Analysis

This notebook analyzes the quantitative features extracted from histopathology images.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import UMAP

%matplotlib inline

## Load Features

In [None]:
features_df = pd.read_parquet('../experiments/features_test.parquet')
print(f"Feature shape: {features_df.shape}")
features_df.head()

## Deep Features - PCA Visualization

In [None]:
deep_cols = [c for c in features_df.columns if c.startswith('deep_feature_')]
X_deep = features_df[deep_cols].values
y = features_df['label'].values

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_deep)

plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', alpha=0.6)
plt.colorbar(scatter, label='Class')
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%})')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%})')
plt.title('PCA of Deep Features')
plt.show()

## Classic Features - Correlation Analysis

In [None]:
classic_cols = [c for c in features_df.columns if not c.startswith('deep_') and c not in ['image_path', 'label', 'class_name']]
classic_features = features_df[classic_cols]

plt.figure(figsize=(12, 10))
sns.heatmap(classic_features.corr(), cmap='coolwarm', center=0, square=True)
plt.title('Correlation of Classic Features')
plt.tight_layout()
plt.show()

## Feature Distributions by Class

In [None]:
top_features = ['R_mean', 'G_mean', 'B_mean', 'nuclei_count', 'nuclei_density']

fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

for ax, feat in zip(axes, top_features):
    if feat in features_df.columns:
        features_df.boxplot(column=feat, by='label', ax=ax)
        ax.set_title(feat)

plt.tight_layout()
plt.show()