In [2]:
import numpy as np

class MyPCA:
    """
    A small class to perform PCA
    ------------
    X: np.array
        dataframe with n observation and p features (nxp)
    n_components: int
        number of principal components we want to project on
    ------------
    output: np.array
        return the projected data (nxn_components), where n_components is the number of principal components we have projected on
    """
    
    def __init__(self, n_components=None):
        """
        Constructor for PCA class.
        ----------
        Parameters:
        n_components : int (optional)
            The number of principal components to n_componentseep (default is None).
        """
        self.n_components = n_components
        self.eigvecs_ = None
        self.eigvals_ = None
    
    def fit_transform(self, X):
        """
        Fit the PCA model to data and transform it.
        ----------
        Parameters:
        X : numpy array
            The data to fit the PCA model and transform. (n x p)

        Returns:
        projected_data : numpy array
            The projected data.
        """
        # center the data in the feature space
        X_centered = X - np.mean(X, axis=0)

        # compute the covariance matrix
        cov_matrix = np.dot(X_centered.T, X_centered)

        # perform eigen decomposition on the covariance matrix
        eigvals, eigvecs = np.linalg.eig(cov_matrix)

        # sort eigenvectors in decreasing order of eigenvalues
        idx = np.argsort(eigvals)[::-1]        
        eigvecs = eigvecs[:, idx]
        eigvals = eigvals[idx]
        
        self.eigvecs_ = eigvecs
        self.eigvals_ = eigvals

        # select the top n_components eigenvectors
        if self.n_components is not None:
            top_eigvecs = eigvecs[:, :self.n_components]

        
        # project the original data onto the selected eigenvectors
        projected_data = np.dot(X, top_eigvecs)

        return projected_data
   


In [3]:
X = np.genfromtxt("../Datasets/data_kPCA_2022-2023.txt")
y = np.genfromtxt("../Datasets/labels_kPCA_2022-2023.txt")

In [8]:
pca = MyPCA(2)
pca.fit_transform(X)
print(pca.eigvals_)


[6358.14630977 4969.25600835 4151.09816656 3033.60251736 2914.96445963
 2867.75671971 2658.86156706 2428.27203624 2348.22985331 2019.01144932
 1977.13821224 1897.19536404 1760.30939215 1708.81544232 1662.49746474
 1480.56460599 1446.04494589 1298.8441453  1255.88173552  397.49897469]
