# Component 4: Feature VisualizationExtract deep features and visualize using t-SNE, UMAP, and hierarchical clustering

In [None]:
import tensorflow as tfimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn.manifold import TSNEimport umapfrom scipy.cluster.hierarchy import dendrogram, linkagefrom tqdm.auto import tqdmimport osSEED = 42np.random.seed(SEED)tf.random.set_seed(SEED)OUTPUT_DIR = '../outputs/features'os.makedirs(OUTPUT_DIR, exist_ok=True)print("✓ Setup complete")

## 4.1 Load Data & Extract Features

In [None]:
# Load full dataset manifestfull_df = pd.read_csv('../outputs/dataset_manifest.csv')# Sample up to 4000 images for efficiencyMAX_SAMPLES = min(4000, len(full_df))sample_df = full_df.sample(n=MAX_SAMPLES, random_state=SEED)print(f"Extracting features from {len(sample_df)} images...")print(f"Class distribution in sample:")print(sample_df['class_name'].value_counts())# Load pretrained ResNet50 for feature extractionbase_model = tf.keras.applications.ResNet50(    include_top=False,    pooling='avg',    weights='imagenet',    input_shape=(224, 224, 3))print(f"\n✓ Loaded ResNet50 (feature dim: 2048)")

In [None]:
# Extract featuresfeatures = []labels = []class_names = []print("\nExtracting features...")for _, row in tqdm(sample_df.iterrows(), total=len(sample_df)):    try:        # Load and preprocess image        img = tf.keras.preprocessing.image.load_img(            row['filepath'], target_size=(224, 224)        )        x = tf.keras.preprocessing.image.img_to_array(img)        x = tf.keras.applications.resnet50.preprocess_input(x)        x = np.expand_dims(x, axis=0)                # Extract features        feat = base_model.predict(x, verbose=0)[0]                features.append(feat)        labels.append(row['class_label'])        class_names.append(row['class_name'])    except Exception as e:        print(f"Error processing {row['filepath']}: {e}")features = np.array(features)labels = np.array(labels)print(f"\n✓ Extracted features: {features.shape}")# Save featuresnp.save(f'{OUTPUT_DIR}/features_resnet50.npy', features)np.save(f'{OUTPUT_DIR}/labels.npy', labels)print(f"✓ Features saved to {OUTPUT_DIR}/")

## 4.2 t-SNE Visualization

In [None]:
# Apply t-SNEprint("\nComputing t-SNE (this may take a few minutes)...")tsne = TSNE(n_components=2, random_state=SEED, perplexity=30, n_iter=1000)tsne_embeddings = tsne.fit_transform(features)# Save embeddingstsne_df = pd.DataFrame(tsne_embeddings, columns=['x', 'y'])tsne_df['label'] = labelstsne_df['class_name'] = class_namestsne_df.to_csv(f'{OUTPUT_DIR}/tsne_embeddings.csv', index=False)# Visualizeplt.figure(figsize=(12, 10))unique_classes = sorted(set(class_names))colors = sns.color_palette('husl', len(unique_classes))for idx, class_name in enumerate(unique_classes):    mask = np.array(class_names) == class_name    plt.scatter(        tsne_embeddings[mask, 0],        tsne_embeddings[mask, 1],        label=class_name,        alpha=0.6,        s=50,        color=colors[idx]    )plt.title('t-SNE Visualization of MRI Features', fontsize=16, fontweight='bold')plt.xlabel('t-SNE Dimension 1', fontsize=12)plt.ylabel('t-SNE Dimension 2', fontsize=12)plt.legend(fontsize=10)plt.grid(alpha=0.3)plt.tight_layout()plt.savefig(f'{OUTPUT_DIR}/tsne_plot.png', dpi=300, bbox_inches='tight')plt.show()print("✓ t-SNE visualization saved")

## 4.3 UMAP Visualization

In [None]:
# Apply UMAPprint("\nComputing UMAP...")reducer = umap.UMAP(random_state=SEED, n_neighbors=15, min_dist=0.1)umap_embeddings = reducer.fit_transform(features)# Save embeddingsumap_df = pd.DataFrame(umap_embeddings, columns=['x', 'y'])umap_df['label'] = labelsumap_df['class_name'] = class_namesumap_df.to_csv(f'{OUTPUT_DIR}/umap_embeddings.csv', index=False)# Visualizeplt.figure(figsize=(12, 10))for idx, class_name in enumerate(unique_classes):    mask = np.array(class_names) == class_name    plt.scatter(        umap_embeddings[mask, 0],        umap_embeddings[mask, 1],        label=class_name,        alpha=0.6,        s=50,        color=colors[idx]    )plt.title('UMAP Visualization of MRI Features', fontsize=16, fontweight='bold')plt.xlabel('UMAP Dimension 1', fontsize=12)plt.ylabel('UMAP Dimension 2', fontsize=12)plt.legend(fontsize=10)plt.grid(alpha=0.3)plt.tight_layout()plt.savefig(f'{OUTPUT_DIR}/umap_plot.png', dpi=300, bbox_inches='tight')plt.show()print("✓ UMAP visualization saved")

## 4.4 Hierarchical Clustering Tree

In [None]:
# Sample subset for tree visualization (hierarchical clustering is O(n²))TREE_SAMPLES = min(500, len(features))sample_indices = np.random.choice(len(features), TREE_SAMPLES, replace=False)sample_features = features[sample_indices]print(f"\nComputing hierarchical clustering on {TREE_SAMPLES} samples...")Z = linkage(sample_features, method='ward')# Visualize dendrogramplt.figure(figsize=(15, 8))dendrogram(Z, no_labels=True)plt.title('Hierarchical Clustering of MRI Features', fontsize=16, fontweight='bold')plt.xlabel('Sample Index', fontsize=12)plt.ylabel('Distance', fontsize=12)plt.tight_layout()plt.savefig(f'{OUTPUT_DIR}/tmap_plot.png', dpi=300, bbox_inches='tight')plt.show()print("✓ Tree map saved")

## 4.5 Summary

In [None]:
print("\n" + "="*60)print("✅ FEATURE VISUALIZATION COMPLETE")print("="*60)print(f"\nArtifacts saved to: {OUTPUT_DIR}/")print(f"  - features_resnet50.npy ({features.shape})")print(f"  - labels.npy ({labels.shape})")print(f"  - tsne_embeddings.csv")print(f"  - umap_embeddings.csv")print(f"  - tsne_plot.png")print(f"  - umap_plot.png")print(f"  - tmap_plot.png")print("\nKey observations:")print("  - Check if classes form distinct clusters")print("  - Overlapping regions may indicate similar features")print("  - t-SNE preserves local structure, UMAP preserves global")