In this notebook I will go through many different techniques for dimension reduction:
* PCA
* SVD
* Autoencoder
* VAE Autoencoder
    
I will use the MNIST dataset, only to be using a high-dimensional data.

In [2]:
import numpy as np
from time import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms

In [3]:
use_cuda = torch.cuda.is_available()

### Loading data

In [4]:
PATH = './data'

In [5]:
trans = transforms.Compose([transforms.ToTensor(), 
                            transforms.Normalize((0.5,), (1.0,))])
train_set = dset.MNIST(root=PATH, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=PATH, train=False, transform=trans, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [6]:
# Parameters
params = {'batch_size': 8,
         'shuffle': True,
         'num_workers': 4}

In [7]:
train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        **params)

In [8]:
test_loader = torch.utils.data.DataLoader(
        dataset=test_set, 
        **params)

### PCA

In [20]:
import sklearn as sk
import sklearn.decomposition as decomposition

In [32]:
timer = time()
S = np.matmul(np.transpose(x), x)
duration = time() - timer
print(duration)

4.492894649505615


In [33]:
timer = time()
eig_val, eig_vec = np.linalg.eig(S)
duration = time() - timer
print(duration)

0.7613091468811035


In [12]:
sorted_eig_val = sorted(eig_val)

In [34]:
np.min(eig_val)

-14927.79463706756

In [35]:
np.sum(eig_val)

97854.99999999958

In [36]:
pca = decomposition.PCA()

In [37]:
pca.fit(x)

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [38]:
x_transform = pca.transform(x)

In [40]:
x_transform.shape

(70000, 784)

In [41]:
pca.explained_variance_ratio_

array([9.74611592e-02, 7.15544459e-02, 6.14953098e-02, 5.40338453e-02,
       4.88893370e-02, 4.30522703e-02, 3.27826152e-02, 2.88964157e-02,
       2.75836420e-02, 2.34214017e-02, 2.10668855e-02, 2.03755324e-02,
       1.70706430e-02, 1.69401978e-02, 1.58338224e-02, 1.48634490e-02,
       1.31935849e-02, 1.27901425e-02, 1.18727249e-02, 1.15293670e-02,
       1.06603978e-02, 1.00979434e-02, 9.59151230e-03, 9.09635078e-03,
       8.83286819e-03, 8.38818208e-03, 8.09831794e-03, 7.85717898e-03,
       7.40262230e-03, 6.90014160e-03, 6.56143991e-03, 6.45470291e-03,
       6.00819302e-03, 5.85611746e-03, 5.66753559e-03, 5.43470659e-03,
       5.04717070e-03, 4.87079420e-03, 4.78955138e-03, 4.67593650e-03,
       4.54368629e-03, 4.44918011e-03, 4.18216598e-03, 3.96163907e-03,
       3.83707746e-03, 3.75784804e-03, 3.61426284e-03, 3.49024600e-03,
       3.38731603e-03, 3.19696169e-03, 3.16854028e-03, 3.10144912e-03,
       2.96480352e-03, 2.87101448e-03, 2.82513796e-03, 2.69422713e-03,
      

### SVD

### autoencoder

### VAE autoencoder