## 3.1.1 Dimensionality Reduction

To do this we can perform an SVD
operation on the original data and select the top k largest singular values in Σ. These columns
can be selected from Σ and the rows selected from VT. An approximate B of the original vector
A can then be reconstructed.

In [18]:
# data reduction with SVD
from numpy import array
from numpy import diag
from numpy import zeros
from scipy.linalg import svd

A = array([
[1,2,3,4,5,6,7,8,9,10],
[11,12,13,14,15,16,17,18,19,20],
[21,22,23,24,25,26,27,28,29,30]])
print(A)

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]


In [19]:
# factorize
U, s, V = svd(A)

In [20]:
# create m x n Sigma matrix
Sigma = zeros((A.shape[0], A.shape[1]))

In [21]:
# populate Sigma with n x n diagonal matrix
Sigma[:A.shape[0], :A.shape[0]] = diag(s)

In [22]:
#select 
n_elements = 2

Sigma = Sigma[:, :n_elements]
print(V)
print('\n')
V =  V[:n_elements, :]
print(V)

[[-0.24139304 -0.25728686 -0.27318068 -0.2890745  -0.30496832 -0.32086214
  -0.33675595 -0.35264977 -0.36854359 -0.38443741]
 [ 0.53589546  0.42695236  0.31800926  0.20906617  0.10012307 -0.00882003
  -0.11776313 -0.22670623 -0.33564933 -0.44459242]
 [ 0.09975293 -0.01037753  0.23987452  0.05273845  0.0996209  -0.46517509
  -0.52300449 -0.23591804  0.593137    0.14935136]
 [-0.259848   -0.19489762 -0.10988774  0.9273013  -0.07153728 -0.05251768
  -0.04829915 -0.05415057 -0.07582496 -0.0603383 ]
 [-0.26312174 -0.0585767  -0.12150761 -0.05740164  0.9302533  -0.0441965
  -0.05005435 -0.07728074 -0.13808364 -0.12003039]
 [-0.34412869 -0.09157816  0.43274424 -0.05958094 -0.05492318  0.70339002
  -0.33412339 -0.23272698  0.08693936 -0.10601229]
 [-0.3607094   0.01575684  0.51799467 -0.04727562 -0.05090152 -0.33292338
   0.61579275 -0.27850803  0.07385978 -0.15308609]
 [-0.33345757  0.21857007  0.28415895 -0.02511566 -0.05422893 -0.22221261
  -0.27509879  0.75032187 -0.10121342 -0.24172391]
 

In [25]:
# reconstruct
B = U.dot(Sigma.dot(V))
B

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
       [11., 12., 13., 14., 15., 16., 17., 18., 19., 20.],
       [21., 22., 23., 24., 25., 26., 27., 28., 29., 30.]])

In [26]:
# transform
T = U.dot(Sigma)
print(T)

[[-18.52157747  -6.47697214]
 [-49.81310011  -1.91182038]
 [-81.10462276   2.65333138]]


In [27]:
T = A.dot(V.T)
print(T)

[[-18.52157747  -6.47697214]
 [-49.81310011  -1.91182038]
 [-81.10462276   2.65333138]]


#### TruncatedSVD

In [29]:
from sklearn.decomposition import TruncatedSVD

print(A)

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]


In [30]:
# create transform
svd = TruncatedSVD(n_components=2)
svd

TruncatedSVD()

In [31]:
#fit transform
svd.fit(A)

TruncatedSVD()

In [32]:
# apply transform
result = svd.transform(A)
print(result)

[[18.52157747  6.47697214]
 [49.81310011  1.91182038]
 [81.10462276 -2.65333138]]
