In [None]:
import numpy as np
import pandas as pd

np.random.seed(23)

# Generate class 1 samples
mu_vec1 = np.array([0, 0, 0])
cov_mat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)
df = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df['target'] = 1

# Generate class 2 samples
mu_vec2 = np.array([1, 1, 1])
cov_mat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)
df1 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df1['target'] = 0

# Combine the DataFrames and shuffle
df = pd.concat([df, df1], ignore_index=True)
df = df.sample(40)


# New Section

In [None]:
df.head()


Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [None]:
import plotly.express as px
#y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

In [None]:
# Step 1 - Apply standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [None]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [None]:
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)


In [None]:
eigen_values


array([1.3536065 , 0.94557084, 0.77774573])

In [None]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [None]:
# prompt: plot eigen vector in 3d graph

import plotly.graph_objects as go

fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

# Add eigen vectors to the plot
for i in range(3):
  fig.add_trace(go.Scatter3d(
      x=[0, eigen_vectors[0][i]],
      y=[0, eigen_vectors[1][i]],
      z=[0, eigen_vectors[2][i]],
      mode='lines',
      line=dict(color='red', width=3)
  ))


fig.show()


In [None]:
# prompt: plot  all 3 eigen vector first do mean of that then

import numpy as np
import plotly.graph_objects as go

# Calculate the mean of the features
mean_vector = np.mean(df.iloc[:, 0:3], axis=0)

# Create a figure
fig = go.Figure()

# Add the scatter plot of the data points
fig.add_trace(go.Scatter3d(
    x=df['feature1'],
    y=df['feature2'],
    z=df['feature3'],
    mode='markers',
    marker=dict(
        size=5,
        color=df['target'],  # Set color based on target variable
        opacity=0.8
    )
))


# Add the eigen vectors as lines starting from the mean
for i in range(3):
    eigen_vector = eigen_vectors[:, i]
    fig.add_trace(go.Scatter3d(
        x=[mean_vector[0], mean_vector[0] + eigen_vector[0]],
        y=[mean_vector[1], mean_vector[1] + eigen_vector[1]],
        z=[mean_vector[2], mean_vector[2] + eigen_vector[2]],
        mode='lines',
        line=dict(color='red', width=3)  # Customize the eigen vector lines
    ))

# Set axis labels
fig.update_layout(scene=dict(
    xaxis_title='Feature 1',
    yaxis_title='Feature 2',
    zaxis_title='Feature 3'
))

fig.show()



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [None]:
pc = eigen_vectors[0:2]
pc

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442]])

In [None]:
transformed_df = np.dot(df.iloc[:,0:3],pc.T)
# 40,3 - 3,2
new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,0.599433,1.795862,1
1,1.056919,-0.212737,0
2,-0.271876,0.498222,1
3,-0.621586,0.02311,1
4,1.567286,1.730967,1


In [None]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()
