In [1]:
import numpy as np

from sklearn.decomposition import PCA

In [2]:
class PrincipalComponentAnalysis:
    
    def __init__(self, n_components=None):
        self.n_components = n_components
        
    def set_params(self, n_components=None):
        if n_components:
            self.n_components = n_components
        
    def fit(self, data):
        if data is None:
            raise Exception('Feature data has not bee provided')

        self.mean_ = np.mean(data, axis=0)

        data = data - self.mean_
        
        self.n_samples_ = data.shape[0]
        self.n_features_ = data.shape[1]
        
        if self.n_components is None:
            self.n_components = min(self.n_samples_, self.n_features_)
        
        self.n_components_ = self.n_components
        
        cov = data.T.dot(data) / (self.n_samples_ - 1)
        
        eig_vals, eig_vecs = np.linalg.eig(cov)
        
        self.singular_values_ = eig_vals[: self.n_components]
        self.components_ = eig_vecs[:, : self.n_components]
        
        return self
    
    def fit_transform(self, data):
        if data is None:
            raise Exception('Feature data has not bee provided')
            
        self.fit(data)
        
        return np.dot(self.components_.T, data.T)
            
    def inverse_transform(self, data):
        if data is None:
            raise Exception('Feature data has not bee provided')
            
        return np.matmul(self.components_, data).T + self.mean_

In [3]:
class LossyDataCompression:
    ''' Compress data in a lossy way using PCA to decide on the number of important features '''
    
    def __init__(self):
        self.pca = PrincipalComponentAnalysis()
        
    def compress(self, data, desired_components=200):        
        self.pca.set_params(n_components=desired_components)
        
        return pca.fit_transform(data)
    
    def decompress(self, data):
        return self.pca.inverse_transform(data)

In [4]:
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

In [5]:
pca = PCA(n_components=2)
pca.fit(X)

x_reduced = pca.fit_transform(X)
print(x_reduced)

x_recovered = pca.inverse_transform(x_reduced)
print(x_recovered)

[[ 1.38340578  0.2935787 ]
 [ 2.22189802 -0.25133484]
 [ 3.6053038   0.04224385]
 [-1.38340578 -0.2935787 ]
 [-2.22189802  0.25133484]
 [-3.6053038  -0.04224385]]
[[-1. -1.]
 [-2. -1.]
 [-3. -2.]
 [ 1.  1.]
 [ 2.  1.]
 [ 3.  2.]]


In [6]:
pca_i = PrincipalComponentAnalysis(n_components=2)
pca_i.fit(X)

x_reduced = pca_i.fit_transform(X)
print(x_reduced)

x_recovered = pca_i.inverse_transform(x_reduced)
print(x_recovered)

[[-1.38340578 -2.22189802 -3.6053038   1.38340578  2.22189802  3.6053038 ]
 [-0.2935787   0.25133484 -0.04224385  0.2935787  -0.25133484  0.04224385]]
[[-1. -1.]
 [-2. -1.]
 [-3. -2.]
 [ 1.  1.]
 [ 2.  1.]
 [ 3.  2.]]
