In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA


In [None]:
# Function that will be used later to draw vectors on the graph (run this cell now, it won't have any effect until later.)
def draw_vector(v0, v1, ax=None):
    ax = ax or plt.gca()
    arrowprops=dict(arrowstyle='->',
                    linewidth=2,
                    shrinkA=0, shrinkB=0)
    ax.annotate('', v1, v0, arrowprops=arrowprops)

In [None]:
rng = np.random.RandomState(1)
X = np.dot(rng.rand(2, 2), rng.randn(2, 200)).T
# Performing PCA (documentation can be found: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)
pca = PCA(n_components=2, whiten=True) # Perform PCA, specifiying/choosing number of components (eigenvectors) to keep (2)
pca.fit(X) # Once parameters above have been specified, now fit the data to it

In [None]:
# Percentage of variance explained for each components
# This show how much varience is captured by the eigenvectors
print('Explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_))
print("")
print("In other words... we can see that:")
print('Varience explained by the first component: {:2.2f}%'.format(pca.explained_variance_ratio_[0]*100))
print('Varience explained by the second component: {:2.2f}%'.format(pca.explained_variance_ratio_[1]*100))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 6)) 
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

# plot data
print("Plot of what the data looks like. Can you estimate the first and second eigenvectors (principal components)?")
ax.scatter(X[:, 0], X[:, 1], alpha=0.2)
ax.axis('equal');
ax.set(xlabel='x', ylabel='y', title='Plot of Original Data')
fig.savefig('Plot_Orig_Data.png')

# *Stop here!!!*

### If this (PCA_Example - [2] Eigenvectors Shown) is the first file you ran, then please *STOP HERE* and follow the following instructions. However, if you did run the first file (PCA_Example - [1] ...) then you can skip this cell and go on to the next! Thank you.
## -----------

Do not go any further or run any other files. Before showing the final result of the principal components...

Look at the plot created of the data (generated by the cell immediately above this one). Can you estimate what the first and second eigenvectors (principal compenents) will be? Where could they be placed? Discuss with a partner.

You may very roughly sketch the plot on paper and draw where you feel the first and second eigenvectors would be. If you have any questions or are unsure what to do, please raise your hand and as the professor. 


## *You may now continue...*

You may now run the last cell! Observe the output. Discuss with a partner. 


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 6)) # was 1, 2, ...
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

# plot data ~ original data (with original axis) including the two eigenvectors (principal components)
ax[0].scatter(X[:, 0], X[:, 1], alpha=0.2)
for length, vector in zip(pca.explained_variance_, pca.components_):
    v = vector * 3 * np.sqrt(length)
    draw_vector(pca.mean_, pca.mean_ + v, ax=ax[0])  # using the draw_vector function
ax[0].axis('equal');
ax[0].set(xlabel='x', ylabel='y', title='Original Data with first and second eigenvectors (principal components) shown')

# plot data ~ transformed data including the two eigenvectors (principal components)
X_pca = pca.transform(X) # ** Transform the data to align to the selected two eigenvectors **
ax[1].scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.2)
draw_vector([0, 0], [0, 3], ax=ax[1])  # using the draw_vector function
draw_vector([0, 0], [3, 0], ax=ax[1])  # using the draw_vector function
ax[1].axis('equal')
ax[1].set(xlabel='component 1', ylabel='component 2',
          title='Transformed data with principal components shown',
          xlim=(-5, 5), ylim=(-3, 3.1))

fig.savefig('PCA-rotation.png')

Were you successful in guessing where the eigenvectors would be?