## PCA plus SVM Classifier

### In this example you can adjust the number of components to keep, and observe the output after performing classification using SVM

Adjust the number of components (eigenvectors) by adjusting the value of the "numbComponents" variable. 


In [29]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import cross_validation

iris = datasets.load_iris()  # Using Iris data set 
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)

numbComponents = 1  # Adjust number of components yourself (e.g. 1, 2, ... as long as components don't exceed # features!)
pca = PCA(n_components = numbComponents)  # Selecting number of components to keep
pca.fit(X_train) # Fit

if numbComponents == 1:
    print('Explained variance ratio: %s' % str(pca.explained_variance_ratio_))

if numbComponents > 1: # At least two components selected
    # Percentage of variance explained for each components
    # This show how much varience is captured by the eigenvectors
    print('Explained variance ratio: %s' % str(pca.explained_variance_ratio_))
    print("\nSpecifically for the first two components...")
    print('Varience explained by the first component: {:2.2f}%'.format(pca.explained_variance_ratio_[0]*100))
    print('Varience explained by the second component: {:2.2f}% \n'.format(pca.explained_variance_ratio_[1]*100))

X_t_train = pca.transform(X_train) # Transform
X_t_test = pca.transform(X_test)
clf = SVC()  # SVM Classifier
clf.fit(X_t_train, y_train) # Train SVM Classifier
print('Score: {:2.3f}%'.format(clf.score(X_t_test, y_test)*100)) # Test and show resulting prediction accuracy (score)
#print('Prediction Labels:', clf.predict(X_t_test))

Explained variance ratio: [ 0.9348456]
Score: 88.333%
