# Module 11 - Dimensionality Reduction - Hands On 1 - PCA

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
iris = pd.read_csv(url, names = names)

In [3]:
iris.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
X = iris.drop('Class', axis = 1)
Y = iris['Class']

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = .7, random_state = 0)

In [6]:
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)

In [7]:
pca = PCA()
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)

In [8]:
explained_variance = pca.explained_variance_ratio_
explained_variance

array([0.77006591, 0.17467955, 0.0476828 , 0.00757174])

In [9]:
def perform_pca(n):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = .7, random_state = 0)
    pca = PCA(n_components = n)
    pca_X_train = pca.fit_transform(X_train)
    pca_X_test = pca.fit_transform(X_test)
    rfc = RandomForestClassifier(max_depth = 2, random_state = 0)
    rfc.fit(pca_X_train, Y_train)
    Y_pred = rfc.predict(pca_X_test)
    print(confusion_matrix(Y_test, Y_pred))
    print(classification_report(Y_test, Y_pred))
    print('Accuracy Score: {0}\n\n'.format(accuracy_score(Y_test, Y_pred)))

In [10]:
for x in range(1,5): perform_pca(x)

[[16  0  0]
 [ 0 12  6]
 [ 0  0 11]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        16
Iris-versicolor       1.00      0.67      0.80        18
 Iris-virginica       0.65      1.00      0.79        11

       accuracy                           0.87        45
      macro avg       0.88      0.89      0.86        45
   weighted avg       0.91      0.87      0.87        45

Accuracy Score: 0.8666666666666667


[[16  0  0]
 [ 0 12  6]
 [ 0  0 11]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        16
Iris-versicolor       1.00      0.67      0.80        18
 Iris-virginica       0.65      1.00      0.79        11

       accuracy                           0.87        45
      macro avg       0.88      0.89      0.86        45
   weighted avg       0.91      0.87      0.87        45

Accuracy Score: 0.8666666666666667


[[14  2  0]
 [ 0  3 15]
 [ 0  2  9]]
           