#  Dimensionality Reduction

#### Dimensionality Reduction via
      Principal Component Analysis (PCA)
      Kernel Principal Component Analysis (KPCA)


In [None]:
 #Added version check for recent scikit-learn 0.18 checks
from distutils.version import LooseVersion as Version
from sklearn import __version__ as sklearn_version

#### The main goal is to project the data onto the most relevant directions.



Eigendecomposition of the covariance matrix allows to find out the directions where the data has the largest spread.

     the eigenvectors are related with these directions
     the eigenvalues are related with the spread of the data on the directions

In [None]:
import numpy as np
import pandas as pd
import math
# graphics and maths
#Numerical python functions written for compatability with MATLAB commands with the same names.
%matplotlib inline 
import matplotlib.pyplot as plt 
import matplotlib.gridspec as gridspec 
import matplotlib.mlab as mlab


### Example 1: Half-moon shapes

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons

Xtoy, ytoy = make_moons(n_samples=100, random_state=123)

plt.scatter(Xtoy[ytoy == 0, 0], Xtoy[ytoy == 0, 1], color='red', marker='^', alpha=0.5)
plt.scatter(Xtoy[ytoy == 1, 0], Xtoy[ytoy == 1, 1], color='blue', marker='o', alpha=0.5)

plt.tight_layout()
# plt.savefig('./figures/half_moon_1.png', dpi=300)
plt.show()

## PCA 

In [1]:
from sklearn.decomposition import PCA


pca = PCA(n_components=2)
X_spca = pca.fit_transform(Xtoy)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(7, 3))

ax[0].scatter(X_spca[ytoy == 0, 0], X_spca[ytoy == 0, 1],
              color='red', marker='^', alpha=0.5)
ax[0].scatter(X_spca[ytoy == 1, 0], X_spca[ytoy == 1, 1],
              color='blue', marker='o', alpha=0.5)
inx=sum(ytoy==0)
print(inx)
ax[1].scatter(X_spca[ytoy == 0, 0], np.zeros((inx, 1)) + 0.02,
              color='red', marker='^', alpha=0.5)
inx=sum(ytoy==0)
print(inx)
ax[1].scatter(X_spca[ytoy == 1, 0], np.zeros((inx, 1)) - 0.02,
              color='blue', marker='o', alpha=0.5)

ax[0].set_xlabel('PC1')
ax[0].set_ylabel('PC2')
ax[1].set_ylim([-1, 1])
ax[1].set_yticks([])
ax[1].set_xlabel('PC1')

plt.tight_layout()
# plt.savefig('./figures/half_moon_2.png', dpi=300)
plt.show()

ModuleNotFoundError: No module named 'sklearn'

### Questions
1. Interpret the code. what is represented on the figures.
2. Reducing the dimension to one. With this projection is possible to discriminate between blue and red examples?
3. What are the directions to project the data?

In [None]:
##### 
#print(Xtoy[0:2,:])
#print(X_spca[0:2,:])
#print(X_spca.shape)
#aa=pca.inverse_transform(X_spca[0:2,:])
#print(aa)
# The eigenvectors
print('eigenvectors\n', pca.components_)
# singular values
print('singular_ values\n', pca.singular_values_)

## Kernel PCA

In [None]:
from sklearn.decomposition import KernelPCA


kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
X_spca = kpca.fit_transform(Xtoy)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(7, 3))

ax[0].scatter(X_spca[ytoy == 0, 0], X_spca[ytoy == 0, 1],
              color='red', marker='^', alpha=0.5)
ax[0].scatter(X_spca[ytoy == 1, 0], X_spca[ytoy == 1, 1],
              color='blue', marker='o', alpha=0.5)
inx=sum(ytoy==0)
print(inx)
ax[1].scatter(X_spca[ytoy == 0, 0], np.zeros((inx, 1)) ,
              color='red', marker='^', alpha=0.5)
inx=sum(ytoy==0)
print(inx)
ax[1].scatter(X_spca[ytoy == 1, 0], np.zeros((inx, 1)),
              color='blue', marker='o', alpha=0.5)

ax[0].set_xlabel('PC1')
ax[0].set_ylabel('PC2')
ax[1].set_ylim([-1, 1])
ax[1].set_yticks([])
ax[1].set_xlabel('PC1')

plt.tight_layout()
# plt.savefig('./figures/circles_2.png', dpi=300)
plt.show()

## Question

1.  Interpret the results.
2.  Which kernel was used?
3.  Reducing the dimension to one. With this projection is possible to discriminate between blue and red examples?

### Example 2: Separating concentric circles

Try PCA and Kernel PCA.

In [None]:
from sklearn.datasets import make_circles

Xtoy1, ytoy1 = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2)

plt.scatter(Xtoy[ytoy == 0, 0], Xtoy[ytoy == 0, 1], color='red', marker='^', alpha=0.5)
plt.scatter(Xtoy[ytoy == 1, 0], Xtoy[ytoy == 1, 1], color='blue', marker='o', alpha=0.5)

plt.tight_layout()
# plt.savefig('./figures/circles_1.png', dpi=300)
plt.show()

### Example 3:
1. Reading a data set.
2. Two  processing blocks  before the classifier.

In [None]:
df = pd.read_csv("wdbcBBB.csv",header = 0)
df.head()

In [None]:
df.drop('ID',axis=1,inplace=True)
print(df.diagnosis.unique())
df['diagnosis'] = df['diagnosis'].map({'M':1,'B':0})


#### Prepare the data for a classification task

In [None]:
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import train_test_split
else:
    from sklearn.model_selection import train_test_split

y=df.as_matrix(columns=[df.columns[0]])

# features
X=df.as_matrix(columns=df.columns[1:])
print(X.shape)

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.3, random_state=0)

### Transform the data : zero mean an variance one


# Question:
Interpret the procedure to standardize the data.


**Comment**
 the correct way is to re-use parameters from the training set if we are doing any kind of transformation -- the test set should basically stand for "new, unseen" data.

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

## PCA

In [None]:
from sklearn.decomposition import PCA

pca = PCA()
X_train_pca = pca.fit_transform(X_train_std)

pca.explained_variance_ratio_

In [None]:
plt.bar(range(30), pca.explained_variance_ratio_, alpha=0.5, align='center')
plt.step(range(30), np.cumsum(pca.explained_variance_ratio_), where='mid')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.show()

# Dimension reduction before classification


In [None]:
# Only two components for illustration
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_std)
print(pca)

### Question:
1. With two components what is the explained variance kept by two projections.

In [None]:
X_test_pca = pca.transform(X_test_std)

### Question:
Explain data manipulations in train and test sets.

In [None]:
plt.rcParams.update({'font.size': 10})
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,5))
axes = axes.ravel()   
ax = axes[0]
ax.figure
inx=(y_train==1)
inx=inx.ravel()
ax.scatter(X_train_pca[inx,0],X_train_pca[inx,1],marker='s',color='r',label='M')  

inx=(y_train==0)
inx=inx.ravel()
ax.scatter(X_train_pca[inx,0],X_train_pca[inx,1],marker='o',color='g',label='B')    
ax.set_title("Training set")
ax.set_xlabel('PC 1')
ax.set_ylabel('PC 2')
ax.legend()
ax.grid()
#plt.show()
 

ax = axes[1]
ax.figure
inx=(y_test==1)
inx=inx.ravel()
ax.scatter(X_test_pca[inx,0],X_test_pca[inx,1],marker='s',color='r',label='M')  
inx=(y_test==0)
inx=inx.ravel()
ax.scatter(X_test_pca[inx,0],X_test_pca[inx,1],marker='o',color='g',label='B')
    
ax.set_title("Test set: data not used to adapt PCA")    

ax.legend() 
ax.grid()
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.show()

### Questions:
A classififier  with the Principal components of the data.

1. How many atributes are in the input of the classifier?

2. How the attributes can be related with the raw data?

3. Try to use a Kernel PCA


<br>
<br>