In [14]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [17]:
np.random.seed(23)

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],
                      [0,1,0],
                        [0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1 , cov_mat1 , 20)

df = pd.DataFrame(class1_sample , columns = ["feature1" , "feature2" , "feature3"])
df["target"] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],
                      [0,1,0],
                        [0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2 , cov_mat2 , 20)

df1 = pd.DataFrame(class2_sample , columns = ["feature1" , "feature2" , "feature3"])
df1["target"] = 0

df = pd.concat([df , df1] , axis = 0)

df.sample(20)


Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
14,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1
9,1.42514,1.441152,0.182561,0
11,2.224431,0.230401,1.19212,0
4,0.322272,0.060343,-1.04345,1
12,-0.723253,1.461259,-0.085367,0
13,2.823378,-0.332863,2.637391,0


In [18]:
import plotly.express as px

fig = px.scatter_3d(df , x = "feature1" , y = "feature2" , z = "feature3" , color = "target")
fig.update_traces(
    marker = dict(size = 5 ,
                  line = dict(width = 2 , color = "DarkSlateGrey")),
    selector = dict(mode = "markers")
)
fig.show()

In [19]:
### Step 1 : Apply Standard Scaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df.iloc[:, 0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [21]:
## Step 2 : Finding Covariacne Matrix
covariance_matrix = np.cov([df.iloc[:,0] , df.iloc[:,1] , df.iloc[:,2]])
print(covariance_matrix)

[[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [22]:
## Step 3 : Finding EIgen Vectors and Values
eigen_values , eigen_vectors = np.linalg.eig(covariance_matrix)


In [23]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [24]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [26]:
pc = eigen_vectors[0:2]
pc

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442]])

In [30]:
transformed_df = np.dot(df.iloc[:,0:3] , pc.T)
## 40,3 - 3,2 =>> 40,2
new_df = pd.DataFrame(transformed_df , columns = ["PC1" , "PC2"])
new_df["target"] = df["target"].values
new_df.head(20)

Unnamed: 0,PC1,PC2,target
0,-0.429384,0.829265,1
1,-1.12452,0.842226,1
2,0.599433,1.795862,1
3,-0.094556,-0.761566,1
4,-0.401542,1.203061,1
5,0.886733,0.522664,1
6,1.631677,1.894527,1
7,-0.278732,-0.447581,1
8,0.688605,0.658084,1
9,-0.997408,0.778938,1


In [32]:

new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()