In [1]:
# import needed libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls
import plotly.graph_objs as go
tls.set_credentials_file(username='enter info', api_key='enter info')

In [2]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__)
init_notebook_mode(connected=True)

2.5.1


In [3]:
#Create a DataFrame containing the data you wish to analyze.
#The rows stand for 'Linearity', 'Products & Markets', 'Software Design', and 'Mechanics'
df = pd.read_csv(
    filepath_or_buffer='example_data.csv', 
    header=None, 
    sep=',')

df = df.transpose()
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,8.0,9.0,6.0,8.0,6.0,6.0,15.0,4.0
1,2.5,6.0,2.0,8.0,2.0,2.0,8.0,9.0
2,20.0,10.0,5.0,8.0,6.0,6.0,13.0,6.5
3,6.5,5.0,4.0,3.0,6.0,5.0,8.5,6.5


In [4]:
#Show another way to get a Numpy representation of a Dataframe
D = df.iloc[:,0:8].values
print(D)

[[  8.    9.    6.    8.    6.    6.   15.    4. ]
 [  2.5   6.    2.    8.    2.    2.    8.    9. ]
 [ 20.   10.    5.    8.    6.    6.   13.    6.5]
 [  6.5   5.    4.    3.    6.    5.    8.5   6.5]]


In [5]:
#Center the transpose of the data matrix.
#In other words, center the rows of the data matrix
B = StandardScaler(copy=True, with_mean=True, with_std=False).fit_transform(D.T)
#Get the centered data matrix
B = B.T

In [6]:
print(B)

[[  0.25     1.25    -1.75     0.25    -1.75    -1.75     7.25    -3.75  ]
 [ -2.4375   1.0625  -2.9375   3.0625  -2.9375  -2.9375   3.0625   4.0625]
 [ 10.6875   0.6875  -4.3125  -1.3125  -3.3125  -3.3125   3.6875  -2.8125]
 [  0.9375  -0.5625  -1.5625  -2.5625   0.4375  -0.5625   2.9375   0.9375]]


In [7]:
#Divide each entry in the data matrix by the standard deviation of the samples in that row.
x = np.matrix(df.iloc[:,0:8].values.reshape((4, 8)))
B = B/x.std(1, ddof=1)
B

matrix([[ 0.07513429,  0.37567144, -0.52594002,  0.07513429, -0.52594002,
         -0.52594002,  2.17889436, -1.12701433],
        [-0.78080353,  0.34035025, -0.94096835,  0.98100956, -0.94096835,
         -0.94096835,  0.98100956,  1.30133921],
        [ 2.11662566,  0.13615721, -0.85407702, -0.25993648, -0.65603018,
         -0.65603018,  0.73029774, -0.55700675],
        [ 0.55162399, -0.33097439, -0.91937332, -1.50777224,  0.25742453,
         -0.33097439,  1.72842184,  0.55162399]])

In [8]:
#Perform singular value decomposition on the centered matrix
U,S,V = np.linalg.svd(B)
U.round(2)

array([[-0.59,  0.09, -0.36,  0.72],
       [-0.29,  0.88, -0.06, -0.37],
       [-0.52, -0.44, -0.43, -0.59],
       [-0.55, -0.13,  0.83, -0.02]])

In [9]:
#Singular Value Decomposition
# Make a list of (singular value, eigenvector) tuples
eig_pairs = [(np.abs(S[i]), U[:,i]) for i in range(len(S))]

# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort()
eig_pairs.reverse()

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Singular values in descending order:')
for i in eig_pairs:
    print(i[0].round(2))
    print(i[1].round(2))

Singular values in descending order:
3.81
[[-0.59]
 [-0.29]
 [-0.52]
 [-0.55]]
2.64
[[ 0.09]
 [ 0.88]
 [-0.44]
 [-0.13]]
1.91
[[-0.36]
 [-0.06]
 [-0.43]
 [ 0.83]]
1.69
[[ 0.72]
 [-0.37]
 [-0.59]
 [-0.02]]


In [10]:
#Using SVD
#Creates a matrix containing the two dominant eigenvectors as column vectors.
U2 = np.hstack((eig_pairs[0][1].reshape(4,1), 
                      eig_pairs[1][1].reshape(4,1)))

print('Matrix U2:\n', U2.round(2))

Matrix U2:
 [[-0.59  0.09]
 [-0.29  0.88]
 [-0.52 -0.44]
 [-0.55 -0.13]]


In [11]:
#Using SVD
#Dimensionally reduce the data matrix
B2 = B.T.dot(U2)
print(B2)

[[-1.22907548 -1.69410919]
 [-0.20718982  0.31630406]
 [ 1.52900573 -0.37590027]
 [ 0.63819146  1.18592995]
 [ 0.77951918 -0.61885692]
 [ 1.10251356 -0.5413215 ]
 [-2.88847521  0.5042988 ]
 [ 0.27551058  1.22365507]]


In [12]:
#Plot the two dominant singular values from SVD
traces = []

trace1 = go.Scatter(
    x=B2[:,0],
    y=B2[:,1],
    mode='markers+text',
    text=['Dan', 'Cassandra', 'Will', 'Josh', 'Junwon', 'Anna', 'Jessie', 'Ashley'],
    textposition='bottom',
    marker=Marker(
            size=12,
            line=Line(
                color='rgba(217, 217, 217, 0.14)',
                width=0.5),
            opacity=0.8))

traces.append(trace1)


data = Data(traces)

layout = Layout(showlegend=False,
                xaxis=dict(title='PC1 of U',),
                yaxis=dict(title='PC2 of U',),
                title='2D SVD of Class Time Data',
                margin=dict(t=50, b=50, r=0, l=60, autoexpand=False),
                font=dict(size=16)
                )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='2d-SVD')