In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

### Exploratory data analysis

In [3]:
labels = iris.get('target_names')
features = iris.get('feature_names')
# make dictionary from two lists
label_dict = dict(zip(labels,np.unique(iris.target)))
feature_dict = dict(zip(list(range(4)),features))

In [4]:
# data exploration
with plt.style.context('seaborn-whitegrid'):
    plt.figure(figsize=(8, 6))
    for i in range(4):
        plt.subplot(2, 2, i+1)
        for label in labels:
            plt.hist(X[y==label_dict[label], i],
                     label=label,
                     bins=10,
                     alpha=0.5)
        plt.xlabel(feature_dict[i])
    plt.legend(loc='upper right', fancybox=True, fontsize=8)

    plt.tight_layout()
    plt.show()

In [5]:
# data exploration
with plt.xkcd():
    plt.figure(figsize=(8, 6))
    for i in range(4):
        plt.subplot(2, 2, i+1)
        for label in labels:
            plt.hist(X[y==label_dict[label], i],
                     label=label,
                     bins=10,
                     alpha=0.5)
        plt.xlabel(feature_dict[i])
    plt.legend(loc='upper right', fancybox=True, fontsize=8)

    plt.tight_layout()
    plt.show()

### Types of learning:
 1. Supervised
 2. Unsupervised
 3. Semi-supervised

# Supervised learning

In [6]:
# supervised learning (PCA)
# (1) Split data 
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [7]:
# (2) Scale data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [8]:
# (3) Fit model
from sklearn.decomposition import PCA
n_components = 4
pca = PCA(n_components=n_components)
pca.fit(X_train)
X_reduced = pca.transform(X_train)

In [482]:
pd.DataFrame(pca.components_,columns=df.columns)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.52433,-0.271561,0.578087,0.563159
1,-0.367225,-0.926271,-0.022551,-0.081602
2,0.716814,-0.224034,-0.123511,-0.648638
3,0.276407,-0.134481,-0.806258,0.505432


In [583]:
# plot eigenvectors
eig_vec = pca.components_[:2,:2]
X0 = [0,0]
Y0 = [0,0]
U,V = zip(*eig_vec)
ax = plt.gca()
ax.quiver(X0,Y0,U,V,angles='xy',scale_units='xy',scale=1)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train)
ax.set_xlim([-4,4])
ax.set_ylim([-4,4])
plt.show()

In [11]:
# PCA 3D PLOT WITH EIGENVECTORS
from mpl_toolkits.mplot3d import Axes3D
n_components = 4
from sklearn.decomposition import PCA
pca = PCA(n_components=n_components)
pca.fit(X_train)
X_reduced = pca.fit_transform(X_train)
eig_vec = pca.components_[:3,:3]
X0 = [0,0,0]
Y0 = [0,0,0]
Z0 = [0,0,0]
U,V,W = zip(*eig_vec)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.quiver(X0,Y0,Z0,U,V,W,length=2)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y_train, s = 30)
ax.set_title('3D Vector Field')             # title
ax.view_init(elev=18, azim=30)              # camera elevation and angle
ax.dist=8                                   # camera distance
plt.show()

In [124]:
### PCA reduction
pca = PCA(n_components=2)
pca.fit(X_train)
X_reduced = pca.transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [107]:
# check orthogonality
print(np.dot(pca.components_[0], pca.components_[1]))

3.33066907388e-16


In [169]:
### REDUCTION TO ONE DIMENSION
from scipy.stats import uniform
pca = PCA(n_components=1)
X_reduced = pca.fit_transform(X_train)
z = uniform.rvs(0,0.0005,size=X_reduced.shape[0])
### PCA reduction
plt.scatter(X_reduced[:, 0], z, c=y_train)
plt.show()

In [512]:
# Kernel PCA
n_components = 2
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=n_components,kernel="rbf",fit_inverse_transform=True, gamma = 0.05)
kpca.fit(X_train)
X_reduced = kpca.fit_transform(X_train)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_train)
plt.show()

In [579]:
# Kernel PCA 3D PLOT
from mpl_toolkits.mplot3d import Axes3D
n_components = 4
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=n_components,kernel="rbf",fit_inverse_transform=True, gamma = 1.0)
kpca.fit(X_train)
X_reduced = kpca.fit_transform(X_train)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_reduced[:, 1], X_reduced[:, 2], X_reduced[:,0], c=y_train, s = 30)
plt.show()

In [580]:
# INITIAL COORDINATE LINES LATTICE
x = np.arange(-5,5.1,0.5)
y = np.arange(-5,5.1,0.5)
I = np.vstack(np.meshgrid(x,y)).reshape(2,-1).T
grid_dim = len(x)
I_dim = I[:,0].shape[0]
Ix = I[:, 0].reshape(grid_dim,I_dim//grid_dim)
Iy = I[:, 1].reshape(grid_dim,I_dim//grid_dim)
for i in range(Ix.shape[0]):
    plt.plot(Ix[i],Iy[i],color='b')
    plt.plot(Iy[i],Ix[i],color='r')
plt.show()

In [581]:
# LATICE AFTER KERNEL TRANSFORMATION
x = np.arange(-5,5.1,0.5)
y = np.arange(-5,5.1,0.5)
I = np.vstack(np.meshgrid(x,y)).reshape(2,-1).T
I = kpca.fit_transform(I)
I = I[:,:2]
grid_dim = len(x)
I_dim = I[:,0].shape[0]
Ix = I[:, 0].reshape(grid_dim,I_dim//grid_dim)
Iy = I[:, 1].reshape(grid_dim,I_dim//grid_dim)
for i in range(Ix.shape[0]):
    plt.plot(Ix[i],Iy[i],color='b')
    plt.plot(Iy[i],Ix[i],color='r')
plt.show()

In [534]:
# LATICE AFTER KERNEL TRANSFORMATION (COMIC FONT)
x = np.arange(-5,5.1,0.5)
y = np.arange(-5,5.1,0.5)
I = np.vstack(np.meshgrid(x,y)).reshape(2,-1).T
I = kpca.fit_transform(I)
I = I[:,:2]
grid_dim = len(x)
I_dim = I[:,0].shape[0]
Ix = I[:, 0].reshape(grid_dim,I_dim//grid_dim)
Iy = I[:, 1].reshape(grid_dim,I_dim//grid_dim)
with plt.xkcd():
    for i in range(Ix.shape[0]):
        plt.plot(Ix[i],Iy[i],color='b')
        plt.plot(Iy[i],Ix[i],color='r')
    plt.show()

In [498]:
### REDUCTION TO ONE DIMENSION
from scipy.stats import uniform
# Kernel PCA
n_components = 1
kpca = KernelPCA(n_components=n_components,kernel="rbf",fit_inverse_transform=True)
X_reduced = kpca.fit_transform(X_train)
z = uniform.rvs(0,0.0005,size=X_reduced.shape[0])
### PCA reduction
with plt.xkcd():
    plt.scatter(X_reduced[:, 0], z, c=y_train)
    plt.show()