In [4]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import FancyArrowPatch


In [5]:
df = pd.read_csv('demo.csv')
df.sample(5)

Unnamed: 0,father_income,mother_income,child_income,loan_approved
55,66000,49000,27000,yes
6,70000,48000,28000,no
71,63000,47000,26000,yes
57,69000,51000,29000,yes
62,50000,37000,19000,no


In [7]:
fig = px.scatter_3d(df, x='father_income', y='mother_income', z='child_income', color='loan_approved', color_discrete_sequence=['blue', 'red'])
fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.show()


In [8]:
scaler = StandardScaler()
df.iloc[:, :3] = scaler.fit_transform(df.iloc[:, :3])


In [9]:
covariance_matrix = np.cov(df.iloc[:, 0:3], rowvar=False)
print('Covariance Matrix:\n', covariance_matrix)


Covariance Matrix:
 [[1.01098901 1.00478112 0.99817111]
 [1.00478112 1.01098901 1.00220469]
 [0.99817111 1.00220469 1.01098901]]


In [10]:
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)
print('Eigenvalues:\n', eigen_values)
print('Eigenvectors:\n', eigen_vectors)


Eigenvalues:
 [3.01442942 0.01311634 0.00542127]
Eigenvectors:
 [[-0.57725763 -0.61250116  0.54001477]
 [-0.57803099 -0.16060135 -0.80005461]
 [-0.57676148  0.7739829   0.26133629]]


In [20]:
# Projecting Data onto Principal Components
pc = eigen_vectors[:, :2]
transformed_df = np.dot(df.iloc[:, 0:3], pc)
new_df = pd.DataFrame(transformed_df, columns=['PC1', 'PC2'])
new_df['loan_approved'] = df['loan_approved'].values
new_df['loan_approved'] = new_df['loan_approved'].astype(str)
print(new_df.head())

# 2D Scatter Plot with Principal Components
fig = px.scatter(new_df, x='PC1', y='PC2', color='loan_approved', color_discrete_sequence=px.colors.qualitative.G10)
fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.show()

        PC1       PC2 loan_approved
0 -1.375851  0.396411           yes
1  0.525008  0.072765            no
2 -2.518664  0.121459           yes
3  2.260732  0.008981           yes
4  0.009577  0.028421            no
