In [61]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler

In [62]:
np.random.seed(23) 

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 25)

In [63]:
df = pd.DataFrame(class1_sample,columns=['f1','f2','f3'])
df['target'] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 25)

In [64]:
df1 = pd.DataFrame(class2_sample,columns=['f1','f2','f3'])

df1['target'] = 0

df = df.append(df1,ignore_index=True)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [65]:
df = df.sample(50)

In [66]:
df.head()

Unnamed: 0,f1,f2,f3,target
3,1.772258,-0.347459,0.67014,1
27,2.011059,1.920996,2.93309,0
21,-0.268142,-0.482559,1.24461,1
2,-0.367548,-1.13746,-1.322148,1
14,0.420623,0.41162,-0.071324,1


In [67]:
#y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=df['f1'], y=df['f2'], z=df['f3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkTurquoise')),
                  selector=dict(mode='markers'))

fig.show()

In [68]:
# Step 1 - Apply standard scaling
scaler = StandardScaler()

df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [69]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[ 1.02040816  0.14034257  0.04032334]
 [ 0.14034257  1.02040816 -0.02348885]
 [ 0.04032334 -0.02348885  1.02040816]]


In [70]:
# Step 3 - Finding EV and EVs
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

In [71]:
eigen_values

array([0.86677182, 1.16180684, 1.03264583])

In [72]:
eigen_vectors

array([[-0.68632863,  0.71427656,  0.13697449],
       [ 0.67014634,  0.69426275, -0.26249403],
       [ 0.28258962,  0.08836421,  0.95516222]])

In [73]:
pc = eigen_vectors[0:2]
pc

array([[-0.68632863,  0.71427656,  0.13697449],
       [ 0.67014634,  0.69426275, -0.26249403]])

In [74]:
transformed_df = np.dot(df.iloc[:,0:3],pc.T)
# 40,3 - 3,2
new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,-1.33707,0.221851,1
1,0.328428,1.295524,0
2,-0.128863,-1.202792,1
3,-0.832844,-1.067903,1
4,-0.108409,0.103381,1


In [75]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2)),
                  selector=dict(mode='markers'))
fig.show()