In [32]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls
from sklearn.preprocessing import StandardScaler
%matplotlib notebook

In [33]:
def pca(input_csv):
#     Loading the Dataset
    df = pd.read_csv(input_csv)
    X = df.ix[:,1:9].values
    y = df.ix[:,9].values
    
#     Standardizing
    X_std = StandardScaler().fit_transform(X)
    
#     Eigendecomposition - Computing Eigenvectors and Eigenvalues
    mean_vec = np.mean(X_std, axis=0)
    cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (X_std.shape[0]-1)
    print('Covariance matrix \n%s' %cov_mat)
    cov_mat = np.cov(X_std.T)

    eig_vals, eig_vecs = np.linalg.eig(cov_mat)

    print('Eigenvectors \n%s' %eig_vecs)
    print('\nEigenvalues \n%s' %eig_vals)
    
#     Selecting Principal Components
    for ev in eig_vecs:
        np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))
    print('Everything ok!')
    
    # Make a list of (eigenvalue, eigenvector) tuples
    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eig_pairs.sort()
    eig_pairs.reverse()

    # Visually confirm that the list is correctly sorted by decreasing eigenvalues
    print('Eigenvalues in descending order:')
    for i in eig_pairs:
        print(i[0])
    
#     Explained Variance
    tot = sum(eig_vals)
    var_exp = [(i / tot)*100 for i in sorted(eig_vals, reverse=True)]
    cum_var_exp = np.cumsum(var_exp)

    trace1 = Bar(
            x=['PC %s' %i for i in range(1,9)],
            y=var_exp,
            showlegend=False)

    trace2 = Scatter(
            x=['PC %s' %i for i in range(1,9)], 
            y=cum_var_exp,
            name='cumulative explained variance')

    data = Data([trace1, trace2])

    layout=Layout(
            yaxis=YAxis(title='Explained variance in percent'),
            title='Explained variance by different principal components')

    fig = Figure(data=data, layout=layout)
    plot_url = py.plot(fig)
    
#     return graph1
#     Projection Onto the New Feature Space
    matrix_w = np.hstack((eig_pairs[0][1].reshape(8,1), 
                      eig_pairs[1][1].reshape(8,1)))
    Y = X_std.dot(matrix_w)
    traces = []

    for name in ('don px components ', 'walk on treadmill (2)', 'walk on treadmill (1)', 'sit rest with px simulator doffed', 'sit rest with px simulator donned (3)', 'sit rest bare limb', 'doff px components', 'sit rest with px simulator donned (2)',
                 'sit rest with px simulator donned (1)'):

        trace = Scatter(
            x=Y[y==name,0],
            y=Y[y==name,1],
            mode='markers',
            name=name,
            marker=Marker(
                size=12,
                line=Line(
                    color='rgba(217, 217, 217, 0.14)',
                    width=0.5),
                opacity=0.8))
        traces.append(trace)


    data = Data(traces)
    layout = Layout(showlegend=True,
                    scene=Scene(xaxis=XAxis(title='PC1'),
                    yaxis=YAxis(title='PC2'),))

    fig = Figure(data=data, layout=layout)
    plot_url = py.plot(fig)
    

In [34]:
%config InlineBackend.close_figures = False
pca('./goodfit1.csv')

Covariance matrix 
[[ 1.00011461  0.92288055  0.9099399   0.46749589  0.79953475  0.88415634
   0.88860384  0.90643156]
 [ 0.92288055  1.00011461  0.93810683  0.44739292  0.88438843  0.94931778
   0.90474184  0.9154363 ]
 [ 0.9099399   0.93810683  1.00011461  0.67331851  0.86723337  0.8890719
   0.91513549  0.91903443]
 [ 0.46749589  0.44739292  0.67331851  1.00011461  0.44507371  0.35704125
   0.54316992  0.51948415]
 [ 0.79953475  0.88438843  0.86723337  0.44507371  1.00011461  0.96104961
   0.96021737  0.95669224]
 [ 0.88415634  0.94931778  0.8890719   0.35704125  0.96104961  1.00011461
   0.95745196  0.96620253]
 [ 0.88860384  0.90474184  0.91513549  0.54316992  0.96021737  0.95745196
   1.00011461  0.99595169]
 [ 0.90643156  0.9154363   0.91903443  0.51948415  0.95669224  0.96620253
   0.99595169  1.00011461]]
Eigenvectors 
[[-0.35680576  0.06776584 -0.63190817 -0.5112935  -0.10590913 -0.4198527
  -0.07932012  0.11654933]
 [-0.36725946  0.13113332 -0.33765047  0.56206169  0.560391