In [52]:
import numpy as np
import scipy
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA

In [2]:
data, target = load_iris(return_X_y=True)

In [211]:
class PCAKLT:
    def __init__(self, n_components):
        self.mu_ = None
        self.cov_ = None
        self.eigenvalues_ = None
        self.eigenvectors_ = None
        self.explained_variance_ratio_ = None
        self.components_ = None

        self.n_components = n_components


    def fit(self, X):
        self.mu_ = np.mean(X, axis=0)
        X_zero_mean = np.subtract(X, self.mu_)
        self.cov_ = (X_zero_mean.T @ X_zero_mean) / (X_zero_mean.shape[0]-1)
        # FIXME: eigenvectors have different sign
        self.eigenvalues_, self.eigenvectors_ = scipy.linalg.eigh(self.cov_)
        self.explained_variance_ratio_ = self.eigenvalues_ / np.sum(self.eigenvalues_)

        if isinstance(self.n_components, int):
            # number of components
            vari = []
            for i, j in enumerate(np.argsort(-self.explained_variance_ratio_)):
                if i >= self.n_components:
                    break
                vari.append(j)
            self.n_components = i
        elif isinstance(self.n_components, float):
            # variance explained
            varsum = 0
            vari = []
            for i, j in enumerate(np.argsort(-self.explained_variance_ratio_)):
                if varsum >= self.n_components:
                    break
                varsum += self.explained_variance_ratio_[j]
                vari.append(j)
            self.n_components = i
        self.components_ = self.eigenvectors_[:, vari].T

        return self

    def transform(self, X):
        X_zero_mean = np.subtract(X, self.mu_)
        # FIXME: transformed values have different sign compared to sklearn
        return (self.components_ @ X_zero_mean.T).T

In [204]:
test_data = np.array([[1,2,3,2],[2,3,5,2],[1,1,1,1]]).T

In [205]:
pca = PCAKLT(2).fit(test_data)
pca.transform(test_data)

array([[-1.35353252, -0.40981667],
       [ 0.        ,  0.        ],
       [ 2.23520712, -0.06204125],
       [-0.8816746 ,  0.47185793]])

In [207]:
skpca = PCA(2, svd_solver="full").fit(test_data)
skpca.transform(test_data)

array([[-1.35353252, -0.40981667],
       [ 0.        ,  0.        ],
       [ 2.23520712, -0.06204125],
       [-0.8816746 ,  0.47185793]])

# Q1

In [216]:
q1 = np.array([
    [6.2, 2.6, 6.6, 1.9],
    [7.2, 2.6, 3.0, 1.0],
    [6.9, 2.4, 2.8, 0.4],
    [4.5, 2.6, 3.4, 1.6],
    [4.8, 2.7, 2.8, 1.0]
])

pca_iris = PCA(4).fit(data)
pca_iris.transform(q1)

array([[ 2.85324867, -0.64535598,  0.11807362,  0.57689597],
       [-0.19183919,  0.7033073 , -1.2296544 , -0.7877199 ],
       [-0.66965831,  0.44026152, -1.51737368, -0.4007917 ],
       [-0.60994123, -1.18412006,  0.69981949, -0.19616334],
       [-1.23895339, -0.76481508,  0.21176112, -0.09454612]])

In [217]:
own_iris = PCAKLT(4).fit(data)
own_iris.transform(q1)

array([[ 2.85324867,  0.64535598, -0.11807362, -0.57689597],
       [-0.19183919, -0.7033073 ,  1.2296544 ,  0.7877199 ],
       [-0.66965831, -0.44026152,  1.51737368,  0.4007917 ],
       [-0.60994123,  1.18412006, -0.69981949,  0.19616334],
       [-1.23895339,  0.76481508, -0.21176112,  0.09454612]])

# Q2

In [None]:
from sklearn.decomposition import sparse_encode