In [1]:
import pandas as pd
import numpy as np

In [2]:
# understanding SVD (SINGULAR VALUE DECOMPOSITION)

# THE FORMULA OF SVD IS M = U * s *Vh
# U: Contains all the information about the rows(your observations).
# Vh: Contains all the information about the columns(your features).
# s: Records the SVD process(it is kind of a log record).

In [4]:
# looking for dimensionality reduction

M = np.array([[1,3,4],[2,3,5],[1,2,3],[5,4,6]])
print (M)

[[1 3 4]
 [2 3 5]
 [1 2 3]
 [5 4 6]]


In [5]:
U, s, Vh = np.linalg.svd(M, full_matrices = False)
print (np.shape(U), np.shape(s), np.shape(Vh))
print (s)

(4, 3) (3,) (3, 3)
[ 12.26362747   2.11085464   0.38436189]


In [7]:
print (np.dot(np.dot(U, np.diag(s)), Vh)) # full matrice reconstruction

[[ 1.  3.  4.]
 [ 2.  3.  5.]
 [ 1.  2.  3.]
 [ 5.  4.  6.]]


In [8]:
np.round.__doc__

'\n    Round an array to the given number of decimals.\n\n    Refer to `around` for full documentation.\n\n    See Also\n    --------\n    around : equivalent function\n\n    '

In [9]:
print (np.round(np.dot(np.dot(U[:,:2], np.diag(s[:2])),Vh[:2,:]),1))

[[ 1.   2.8  4.1]
 [ 2.   3.2  4.8]
 [ 1.   2.   3. ]
 [ 5.   3.9  6. ]]


In [11]:
# looking for hidden factors

from sklearn.datasets import load_iris
from sklearn.decomposition import FactorAnalysis
iris = load_iris()
x,y = iris.data, iris.target
factor = FactorAnalysis(n_components = 4, random_state = 101).fit(x)

In [18]:
pd.DataFrame(factor.components_,columns = iris.feature_names)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.707227,-0.153147,1.653151,0.701569
1,0.114676,0.159763,-0.045604,-0.014052
2,-0.0,0.0,0.0,0.0
3,-0.0,0.0,0.0,-0.0


In [21]:
# Achieving dimensionality reduction using PCA
from sklearn.decomposition import PCA
pca = PCA().fit(x)
print ('explained variance by component: %s' %pca.explained_variance_ratio_)
print (pd.DataFrame(pca.components_, columns = iris.feature_names))

explained variance by component: [ 0.92461621  0.05301557  0.01718514  0.00518309]
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0           0.361590         -0.082269           0.856572          0.358844
1           0.656540          0.729712          -0.175767         -0.074706
2          -0.580997          0.596418           0.072524          0.549061
3           0.317255         -0.324094          -0.479719          0.751121


In [39]:
# Recongnizing faces with PCA

from sklearn.datasets import fetch_olivetti_faces
dataset = fetch_olivetti_faces(shuffle = True, random_state = 101)
train_faces = dataset.data[:350,:]
test_faces = dataset.data[350:, :]
train_answers = dataset.target[:350]
test_answers = dataset.target[350:]

downloading Olivetti faces from http://cs.nyu.edu/~roweis/data/olivettifaces.mat to C:\SPB_Data\scikit_learn_data


URLError: <urlopen error [Errno 11004] getaddrinfo failed>

In [23]:
import sklearn

In [24]:
sklearn.datasets.load_sample_image

<function sklearn.datasets.base.load_sample_image>

In [25]:
help (sklearn.datasets.olivetti_faces)

Help on module sklearn.datasets.olivetti_faces in sklearn.datasets:

NAME
    sklearn.datasets.olivetti_faces - Modified Olivetti faces dataset.

DESCRIPTION
    The original database was available from
    
        http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
    
    The version retrieved here comes in MATLAB format from the personal
    web page of Sam Roweis:
    
        http://www.cs.nyu.edu/~roweis/
    
    There are ten different images of each of 40 distinct subjects. For some
    subjects, the images were taken at different times, varying the lighting,
    facial expressions (open / closed eyes, smiling / not smiling) and facial
    details (glasses / no glasses). All the images were taken against a dark
    homogeneous background with the subjects in an upright, frontal position (with
    tolerance for some side movement).
    
    The original dataset consisted of 92 x 112, while the Roweis version
    consists of 64x64 images.

FUNCTIONS
    fetch_oli

In [26]:
sklearn.datasets.olivetti_faces.__doc__

'Modified Olivetti faces dataset.\n\nThe original database was available from\n\n    http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html\n\nThe version retrieved here comes in MATLAB format from the personal\nweb page of Sam Roweis:\n\n    http://www.cs.nyu.edu/~roweis/\n\nThere are ten different images of each of 40 distinct subjects. For some\nsubjects, the images were taken at different times, varying the lighting,\nfacial expressions (open / closed eyes, smiling / not smiling) and facial\ndetails (glasses / no glasses). All the images were taken against a dark\nhomogeneous background with the subjects in an upright, frontal position (with\ntolerance for some side movement).\n\nThe original dataset consisted of 92 x 112, while the Roweis version\nconsists of 64x64 images.\n'

In [31]:
from sklearn.decomposition import NMF


In [33]:
NM = NMF()

In [35]:
NM.fit_transform.__doc__

"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X: {array-like, sparse matrix}, shape (n_samples, n_features)\n            Data matrix to be decomposed\n\n        W : array-like, shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like, shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W: array, shape (n_samples, n_components)\n            Transformed data.\n        "

In [37]:
import scipy

In [38]:
scipy.angle.__doc__

'\n    Return the angle of the complex argument.\n\n    Parameters\n    ----------\n    z : array_like\n        A complex number or sequence of complex numbers.\n    deg : bool, optional\n        Return angle in degrees if True, radians if False (default).\n\n    Returns\n    -------\n    angle : ndarray or scalar\n        The counterclockwise angle from the positive real axis on\n        the complex plane, with dtype as numpy.float64.\n\n    See Also\n    --------\n    arctan2\n    absolute\n\n\n\n    Examples\n    --------\n    >>> np.angle([1.0, 1.0j, 1+1j])               # in radians\n    array([ 0.        ,  1.57079633,  0.78539816])\n    >>> np.angle(1+1j, deg=True)                  # in degrees\n    45.0\n\n    '