In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [47]:
dataset = load_digits()
X = pd.DataFrame(dataset['data'],columns=dataset['feature_names'])
y = pd.Series(dataset['target'])

In [48]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [49]:
sc = StandardScaler()
X_train_scaled = pd.DataFrame(sc.fit_transform(X_train))
X_test_scaled = pd.DataFrame(sc.transform(X_test))

In [50]:
lda = LinearDiscriminantAnalysis(n_components=9)

In [59]:
X_train_scaled_lda = pd.DataFrame(lda.fit_transform(X_train_scaled, y_train))

In [60]:
X_test_scaled_lda = pd.DataFrame(lda.transform(X_test_scaled))

In [61]:
rf = RandomForestClassifier(n_estimators=100,random_state=42)

In [62]:
rf.fit(X_train_scaled_lda,y_train)

In [63]:
y_pred = rf.predict(X_test_scaled_lda)

In [65]:
accuracy_score(y_test,y_pred)

0.9527777777777777

In [72]:
cm = confusion_matrix(y_test,y_pred)

In [74]:
cm[6,6]/cm[6].sum()

np.float64(0.9714285714285714)

In [75]:
cm

array([[31,  0,  0,  1,  1,  0,  0,  0,  0,  0],
       [ 0, 26,  2,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 32,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 33,  0,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 45,  0,  0,  1,  0,  0],
       [ 0,  0,  1,  0,  0, 45,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  1,  0, 34,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 33,  0,  1],
       [ 0,  2,  0,  0,  0,  0,  0,  0, 27,  1],
       [ 0,  0,  0,  1,  1,  0,  0,  0,  1, 37]])

In [77]:
cm[5,5]/cm[:,5].sum()

np.float64(0.9782608695652174)

In [78]:
45/46

0.9782608695652174

In [79]:
cm[3,3]/cm[:,3].sum()

np.float64(0.9166666666666666)

In [80]:
33/36

0.9166666666666666

In [81]:
33/34

0.9705882352941176

In [24]:
X.cov().min().min()

-0.5705136332381097

In [42]:
X.cov()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.0,1.000557,0.556928,0.207929,-0.018771,0.060810,0.048415,-0.038948,0.032338,0.556682,...,0.030970,-0.045368,-0.007910,0.856086,0.556036,0.147728,-0.102406,-0.029887,0.026562,-0.043913
2,0.0,0.556928,1.000557,0.560492,-0.084282,0.043593,0.002842,-0.062313,0.022324,0.582583,...,0.050877,-0.003806,-0.025851,0.515563,0.938145,0.500118,-0.134829,-0.041206,0.072639,0.082569
3,0.0,0.207929,0.560492,1.000557,0.023952,-0.171473,-0.115796,-0.040162,0.035683,0.328527,...,0.139475,0.075376,-0.049112,0.175902,0.560638,0.768373,-0.065993,-0.054966,0.053467,0.082016
4,0.0,-0.018771,-0.084282,0.023952,1.000557,0.508014,0.127835,0.010070,0.042089,0.051686,...,-0.265116,-0.212339,0.017362,-0.047250,-0.020187,-0.008872,-0.082171,-0.215929,-0.250838,-0.215469
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.0,0.147728,0.500118,0.768373,-0.008872,-0.068076,-0.045871,0.001013,0.043398,0.305361,...,0.094770,0.050060,-0.038269,0.113035,0.481979,1.000557,0.058423,-0.095009,0.006853,0.099727
60,0.0,-0.102406,-0.134829,-0.065993,-0.082171,-0.351342,-0.383735,-0.178343,0.049024,-0.000604,...,0.394499,0.105160,0.005700,-0.117973,-0.208072,0.058423,1.000557,0.609854,0.243441,0.103073
61,0.0,-0.029887,-0.041206,-0.054966,-0.215929,-0.268967,-0.304281,-0.141253,0.033428,0.071528,...,0.707913,0.262942,-0.003058,-0.043913,-0.091890,-0.095009,0.609854,1.000557,0.648689,0.262137
62,0.0,0.026562,0.072639,0.053467,-0.250838,-0.267808,-0.179044,-0.063256,0.020700,0.111631,...,0.602602,0.512011,-0.011939,0.014565,0.035388,0.006853,0.243441,0.648689,1.000557,0.620774


In [12]:
X_cov = X.cov()

In [14]:
eig_val, eig_vec = np.linalg.eig(X_cov)

In [16]:
eig_val.shape

(64,)

In [17]:
eig_vec.shape

(64, 64)

In [26]:
pca = PCA(n_components=20)

In [29]:
pca.fit(X)

In [31]:
pca.explained_variance_ratio_.max()

np.float64(0.12033916097734915)