In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import FancyArrowPatch

In [2]:
# Set random seed for reproducibility
np.random.seed(23)

# Generate synthetic data for two classes
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)
df = pd.DataFrame(class1_sample, columns=['feature1','feature2','feature3'])
df['target'] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)
df1 = pd.DataFrame(class2_sample, columns=['feature1','feature2','feature3'])
df1['target'] = 0

# Concatenate dataframes
df = pd.concat([df, df1], ignore_index=True)
df = df.sample(40)

# Standardize the dataset
scaler = StandardScaler()
df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

# Compute the covariance matrix correctly
covariance_matrix = df.iloc[:,0:3].cov().values
print('Covariance Matrix:\n', covariance_matrix)

# Compute eigenvalues and eigenvectors
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

# Sort eigenvalues and corresponding eigenvectors in descending order
sorted_indices = np.argsort(eigen_values)[::-1]
eigen_values = eigen_values[sorted_indices]
eigen_vectors = eigen_vectors[:, sorted_indices]

# Select top 2 principal components
pc = eigen_vectors[:, 0:2]

# Transform data using PCA
transformed_df = np.dot(df.iloc[:, 0:3], pc)
new_df = pd.DataFrame(transformed_df, columns=['PC1', 'PC2'])
new_df['target'] = df['target'].values
new_df['target'] = new_df['target'].astype('str')

# 3D Scatter Plot
fig = px.scatter_3d(df, x='feature1', y='feature2', z='feature3', color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')))
fig.show()

# Define class for 3D Arrows
class Arrow3D(FancyArrowPatch):
    def __init__(self, xs, ys, zs, *args, **kwargs):
        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
        self._verts3d = xs, ys, zs

    def draw(self, renderer):
        xs3d, ys3d, zs3d = self._verts3d
        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
        FancyArrowPatch.draw(self, renderer)

# Plot eigenvectors
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['feature1'], df['feature2'], df['feature3'], color='blue', alpha=0.2)
ax.scatter(df['feature1'].mean(), df['feature2'].mean(), df['feature3'].mean(), color='red', alpha=0.5)

for i in range(eigen_vectors.shape[1]):
    vec = eigen_vectors[:, i] * np.sqrt(eigen_values[i])  # Scale vectors for visualization
    arrow = Arrow3D([df['feature1'].mean(), df['feature1'].mean() + vec[0]],
                    [df['feature2'].mean(), df['feature2'].mean() + vec[1]],
                    [df['feature3'].mean(), df['feature3'].mean() + vec[2]],
                    mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
    ax.add_artist(arrow)

ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')
plt.title('Eigenvectors')
plt.show()

# 2D Scatter Plot after PCA
fig = px.scatter(x=new_df['PC1'], y=new_df['PC2'], color=new_df['target'], color_discrete_sequence=px.colors.qualitative.G10)
fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')))
fig.show()

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


AttributeError: 'Arrow3D' object has no attribute 'do_3d_projection'

<Figure size 700x700 with 1 Axes>