## Import necessary modules into our project.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sb

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


## Read in the Iris dataset and view the data.

In [None]:
df = pd.read_csv('iris.csv')

df

## Remove the first column since it just contan running numbers.

In [None]:
df.drop(['Id'], axis=1, inplace=True)

df

## View unscaled features by Species.

In [None]:
sb.set(style='darkgrid')
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12,8))

sb.kdeplot(data=df, x='SepalLengthCm', hue='Species', ax=ax[0,0])
sb.kdeplot(data=df, x='SepalWidthCm', hue='Species', ax=ax[0,1])
sb.kdeplot(data=df, x='PetalLengthCm', hue='Species', ax=ax[1,0])
sb.kdeplot(data=df, x='PetalWidthCm', hue='Species', ax=ax[1,1])

### Extract the "SepalLengthCm", "SepalWidthCm", "PetalLengthCm" and "PetalWidthCm" columns as our features.

In [None]:
X = df.loc[:,'SepalLengthCm':'PetalWidthCm'].values
X

### Extract and view the "Species" column. The values are our labels.

In [None]:
y = df['Species'].values
y

## Perform Standardization before applying PCA.

In [None]:
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
X_scaled

## Display plots after data scaling. These are standard normal distribution plots.

In [None]:
sb.set(style='darkgrid')
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12,8))

k = 0
for i in range(2):
    for j in range(2):
        sb.kdeplot(data=X_scaled[0:50, k], ax=ax[i,j])
        sb.kdeplot(data=X_scaled[50:100, k], ax=ax[i,j])
        sb.kdeplot(data=X_scaled[100:150, k], ax=ax[i,j])
        k += 1

ax[0,0].set_xlabel('SepalLengthCm')
ax[0,1].set_xlabel('SepalWidthCm')
ax[1,0].set_xlabel('PedalLengthCm')
ax[1,1].set_xlabel('PedalWidthCm')


In [None]:
pca = PCA(n_components=2)

pc = pca.fit_transform(X_scaled)
pc

### Determine the amount of variance captured by each of our principal components.

In [None]:
pca.explained_variance_ratio_


### Determine the total variance captured by the two principal components.

In [None]:
pca.explained_variance_ratio_.sum()