In [1]:
# 📘 PCA Analysis Notebook (Step 2)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# Load cleaned dataset
df = pd.read_csv('../data/heart_cleaned.csv')
X = df.drop('target', axis=1)

# Apply PCA
pca = PCA(n_components=X.shape[1])
X_pca = pca.fit_transform(X)

# Explained Variance Ratio
explained_var = pca.explained_variance_ratio_
for i, ratio in enumerate(explained_var):
    print(f"Component {i+1}: {ratio:.4f}")

# Optional: Cumulative Variance Plot
plt.figure(figsize=(8, 4))
plt.plot(np.cumsum(explained_var), marker='o')
plt.title('PCA - Cumulative Explained Variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Variance')
plt.grid(True)
plt.tight_layout()
plt.savefig('../data/pca_variance_plot.png')
plt.close()


Component 1: 0.2367
Component 2: 0.1230
Component 3: 0.0954
Component 4: 0.0846
Component 5: 0.0759
Component 6: 0.0677
Component 7: 0.0662
Component 8: 0.0598
Component 9: 0.0530
Component 10: 0.0434
Component 11: 0.0353
Component 12: 0.0317
Component 13: 0.0274
