# 🧪 Lab 3: Unsupervised Learning - Mice Protein Expression

## Step 1: Load and preprocess the dataset

In [None]:
import pandas as pd
df = pd.read_csv("Data_Cortex_Nuclear.csv")
df.head()

## Step 2: Drop metadata columns and handle missing values

In [None]:
df_cleaned = df.drop(columns=["MouseID", "Genotype", "Treatment", "Behavior", "class"])
df_cleaned = df_cleaned.dropna()
df_cleaned.head()

## Step 3: Normalize and apply PCA for visualization

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

scaler = StandardScaler()
data_scaled = scaler.fit_transform(df_cleaned)

pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)

import matplotlib.pyplot as plt
df_pca = pd.DataFrame(data_pca, columns=["PC1", "PC2"])
df_pca.head()

## Step 4: Apply Clustering Algorithms

In [None]:
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture

kmeans = KMeans(n_clusters=8, random_state=42)
kmeans_labels = kmeans.fit_predict(data_scaled)

gmm = GaussianMixture(n_components=8, random_state=42)
gmm_labels = gmm.fit_predict(data_scaled)

dbscan = DBSCAN(eps=3, min_samples=5)
dbscan_labels = dbscan.fit_predict(data_scaled)

## Step 4.1: Apply Hierarchical Clustering

In [None]:
from scipy.cluster.hierarchy import linkage, fcluster

linkage_matrix = linkage(data_scaled, method='ward')
hier_labels = fcluster(linkage_matrix, t=8, criterion='maxclust')  # max 8 clusters

## Step 5: Visualize clustering results using PCA-reduced data

In [None]:
fig, axs = plt.subplots(4, 1, figsize=(10, 20))

axs[0].scatter(df_pca["PC1"], df_pca["PC2"], c=kmeans_labels, cmap="tab10", s=30)
axs[0].set_title("K-Means Clustering")
axs[0].set_xlabel("PC1")
axs[0].set_ylabel("PC2")

axs[1].scatter(df_pca["PC1"], df_pca["PC2"], c=gmm_labels, cmap="tab10", s=30)
axs[1].set_title("Gaussian Mixture Model Clustering")
axs[1].set_xlabel("PC1")
axs[1].set_ylabel("PC2")

axs[2].scatter(df_pca["PC1"], df_pca["PC2"], c=hier_labels, cmap="tab10", s=30)
axs[2].set_title("Hierarchical Clustering")
axs[2].set_xlabel("PC1")
axs[2].set_ylabel("PC2")

axs[3].scatter(df_pca["PC1"], df_pca["PC2"], c=dbscan_labels, cmap="tab10", s=30)
axs[3].set_title("DBSCAN Clustering")
axs[3].set_xlabel("PC1")
axs[3].set_ylabel("PC2")

plt.tight_layout()
plt.show()