# PCA

In [1]:
# Reload the notebook if an external file is updated
%load_ext autoreload
%autoreload 2

import sys
    
from pathlib import Path

path = (
    Path
    .cwd()
    .parent
    .parent
    .joinpath('project')
)

sys.path.append(path)

In [2]:
from __future__ import annotations

import pandas as pd

from project.reduction import (
    DimensionalityReduction,
    PCAStrategy
)
from project.visualizer.pca import (
    MatplotlibVisualizer,
    PCAVisualizer,
    PlotlyVisualizer
)
from project.visualizer.settings import Settings
from sklearn.datasets import (
    load_digits,
    load_iris,
    load_wine
)
from sklearn.preprocessing import StandardScaler

In [3]:
scale = True

loader = load_iris()

columns, data, target = (
    loader.feature_names,
    loader.data,
    loader.target
)

dataframe = pd.DataFrame(columns=columns, data=data)

In [4]:
if scale:
    scaler = StandardScaler()
    data = scaler.fit_transform(dataframe)

    dataframe = pd.DataFrame(columns=columns, data=data)

dataset = dataframe.to_numpy()

In [5]:
dataframe.columns.to_numpy().shape

(4,)

In [6]:
dataframe.shape

(150, 4)

In [7]:
dataframe.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [8]:
strategy = PCAStrategy(dataframe=dataframe, dataset=dataset, target=target)
reduction = DimensionalityReduction(strategy=strategy)

n_components = 3
transformation = reduction.reduce(n_components=n_components)

visualizer = PlotlyVisualizer(
    dimension=3,
    reduction=reduction,
    transformation=transformation
)

pca = PCAVisualizer(visualizer=visualizer)

method = str(strategy)
dimensionality = f"{n_components}-Component"

title = f"{dimensionality} {method} for the Iris Dataset"
pca.visualizer.engine.settings.title = title

In [9]:
reduction.strategy.instance.components_

array([[ 0.52106591, -0.26934744,  0.5804131 ,  0.56485654],
       [ 0.37741762,  0.92329566,  0.02449161,  0.06694199]])

In [10]:
reduction.strategy.instance.explained_variance_

array([2.93808505, 0.9201649 ])

In [11]:
reduction.strategy.instance.explained_variance_ratio_

array([0.72962445, 0.22850762])

In [12]:
reduction.strategy.instance.singular_values_

array([20.92306556, 11.7091661 ])

In [13]:
reduction.strategy.instance.mean_

array([-1.69031455e-15, -1.84297022e-15, -1.69864123e-15, -1.40924309e-15])

In [14]:
reduction.strategy.instance.n_components_

2

In [15]:
reduction.strategy.instance.n_features_


Attribute `n_features_` was deprecated in version 1.2 and will be removed in 1.4. Use `n_features_in_` instead.



4

In [16]:
reduction.strategy.instance.n_samples_

150

In [17]:
reduction.strategy.instance.noise_variance_

0.08429784161070522

In [18]:
reduction.strategy.instance.n_features_in_

4

In [19]:
pca.blender()
pca.show()

In [20]:
pca.biplot()
pca.show()

In [21]:
pca.explain()
pca.show()

In [22]:
pca.heatmap()
pca.show()

In [23]:
pca.ratio()
pca.show()

In [24]:
pca.transform()
pca.show()