In [60]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [61]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [62]:
df = pd.DataFrame(iris.data, iris.target, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
0,4.9,3.0,1.4,0.2
0,4.7,3.2,1.3,0.2
0,4.6,3.1,1.5,0.2
0,5.0,3.6,1.4,0.2


In [63]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [8]:
# data scalling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [54]:
from sklearn.decomposition import PCA
# PCA
n_components = 2
pca = PCA(n_components=n_components)
pca.fit(X_train)

PCA(copy=True, n_components=2, whiten=False)

In [10]:
# PCA transform (data reducing)
X_reduced = np.dot(X_train - pca.mean_,pca.components_.T)

In [11]:
# PCA transform (data reducing) - via pca function
X_reduced_1 = pca.transform(X_train)

In [14]:
# inverse transform
X_restored = np.dot(X_reduced, pca.components_) + pca.mean_

In [15]:
# inverse transform
X_restored_1 = pca.inverse_transform(X_reduced)

In [34]:
# identity matrix
I = np.identity(df.shape[1])
coef = pca.transform(I)
#coef_df = pd.DataFrame(coef, columns=['PC1','PC2','PC3','PC4'],index=df.columns)
coef_df = pd.DataFrame(coef, columns=['PC1','PC2'],index=df.columns)
coef_df

Unnamed: 0,PC1,PC2
sepal length (cm),-0.516091,-0.403596
sepal width (cm),0.266876,-0.913401
petal length (cm),-0.584526,-0.010324
petal width (cm),-0.566354,-0.051978


In [37]:
coef_df = pd.DataFrame(pca.components_,columns=df.columns,index = ['PC-1','PC-2'])
coef_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
PC-1,-0.516091,0.266876,-0.584526,-0.566354
PC-2,-0.403596,-0.913401,-0.010324,-0.051978


In [42]:
coef_df = pd.DataFrame(pca.components_,columns=df.columns)
coef_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.516091,0.266876,-0.584526,-0.566354
1,-0.403596,-0.913401,-0.010324,-0.051978
2,-0.699233,0.270591,0.108447,0.652758
3,-0.286067,0.145793,0.804029,-0.500448


In [71]:
### PCA reduction
pca = PCA(n_components=4)
pca.fit(X_train)
X_reduced = pca.transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [56]:
# check orthogonality
print(np.dot(pca.components_[0], pca.components_[1]))

-5.55111512313e-17


In [65]:
### REDUCTION TO ONE DIMENSION
from scipy.stats import uniform
pca = PCA(n_components=1)
X_reduced = pca.fit_transform(X_train)
z = uniform.rvs(0,0.0005,size=X_reduced.shape[0])
### PCA reduction
plt.scatter(X_reduced[:, 0], z, c=y_train)
plt.show()

In [95]:
from sklearn.decomposition import MiniBatchSparsePCA
# Sparse PCA
n_components = 2
pca = MiniBatchSparsePCA(n_components=n_components, alpha = 0.1)
pca.fit(X_train)
X_reduced = pca.transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [73]:
### REDUCTION TO ONE DIMENSION
from scipy.stats import uniform
# Sparse PCA
n_components = 1
pca = MiniBatchSparsePCA(n_components=n_components, alpha = 5)
X_reduced = pca.fit_transform(X_train)
z = uniform.rvs(0,0.0005,size=X_reduced.shape[0])
### PCA reduction
plt.scatter(X_reduced[:, 0], z, c=y_train)
plt.show()

In [125]:
# Kernel PCA
n_components = 2
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=n_components,kernel="rbf",fit_inverse_transform=True, gamma = 0.1)
kpca.fit(X_train)
X_reduced = kpca.fit_transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [127]:
### REDUCTION TO ONE DIMENSION
from scipy.stats import uniform
# Kernel PCA
n_components = 1
kpca = KernelPCA(n_components=n_components,kernel="rbf",fit_inverse_transform=True)
X_reduced = kpca.fit_transform(X_train)
z = uniform.rvs(0,0.0005,size=X_reduced.shape[0])
### PCA reduction
plt.scatter(X_reduced[:, 0], z, c=y_train)
plt.show()

In [117]:
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train)
plt.show()

In [132]:
# Fast ICA
n_components = 3
from sklearn.decomposition import FastICA
ica = FastICA(n_components=n_components, fun = 'cube')
ica.fit(X_train)
X_reduced = ica.fit_transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [129]:
X_reduced.shape

(105, 3)