In [None]:
import os
from scipy.io import loadmat

data = loadmat(os.path.join('leukemia.mat'))
X= data["geneinfo"]
y= data['leukemia']
print(y.shape)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import TruncatedSVD, PCA
from mpl_toolkits.mplot3d import Axes3D

# Load the Leukemia dataset (Assuming a CSV file)
# Replace 'leukemia_data.csv' with your actual dataset file
df = pd.read_csv("leukemia_data.csv")
X = df.iloc[:, :-1].values  # Assuming the last column is the target

# Step 1: Apply SVD
U, S, Vt = np.linalg.svd(X, full_matrices=False)

# Step 2: Plot singular values to find the elbow
plt.figure(figsize=(8, 5))
plt.plot(range(1, len(S) + 1), S, marker="o", linestyle="-")
plt.xlabel("Singular Value Index")
plt.ylabel("Singular Value")
plt.title("Singular Values of Leukemia Dataset")
plt.grid()
plt.show()

# Step 3: Choose k based on the elbow method (Manually select the best k)
k = 3  # Example: Assume elbow occurs at k=3

# Step 4: Reduce dimensionality using top-k singular vectors
X_svd_2d = U[:, :2] @ np.diag(S[:2])  # 2D Representation
X_svd_3d = U[:, :3] @ np.diag(S[:3])  # 3D Representation

# Step 5: 2D Plot
plt.figure(figsize=(8, 5))
sns.scatterplot(x=X_svd_2d[:, 0], y=X_svd_2d[:, 1], hue=df.iloc[:, -1], palette="coolwarm")
plt.xlabel("SVD Component 1")
plt.ylabel("SVD Component 2")
plt.title("Leukemia Data in 2D using SVD")
plt.legend()
plt.grid()
plt.show()

# Step 6: 3D Plot
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_svd_3d[:, 0], X_svd_3d[:, 1], X_svd_3d[:, 2], c=df.iloc[:, -1], cmap='coolwarm', marker="o")
ax.set_xlabel("SVD Component 1")
ax.set_ylabel("SVD Component 2")
ax.set_zlabel("SVD Component 3")
ax.set_title("Leukemia Data in 3D using SVD")
plt.show()

# Step 7: Apply PCA for comparison
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

# Step 8: 2D PCA Plot
plt.figure(figsize=(8, 5))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=df.iloc[:, -1], palette="coolwarm")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("Leukemia Data in 2D using PCA")
plt.legend()
plt.grid()
plt.show()

# Step 9: 3D PCA Plot
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_pca[:, 0], X_pca[:, 1], X_pca[:, 2], c=df.iloc[:, -1], cmap='coolwarm', marker="o")
ax.set_xlabel("PCA Component 1")
ax.set_ylabel("PCA Component 2")
ax.set_zlabel("PCA Component 3")
ax.set_title("Leukemia Data in 3D using PCA")
plt.show()
