In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from numpy.linalg import norm

In [76]:
samples = 4000
th = np.random.rand(samples)
r = np.random.rand(samples)

x = 1*r*np.sin(th*2*np.pi)
y = 2.2*r*np.cos(th*2*np.pi)
z = 0.07*np.random.randn(samples)

# Bases of rotated hyperplane
v1 = np.array([0.4,1,-0.5])
v2 = np.array([1,-0.4,0])
v1 = v1/norm(v1)
v2 = v2/norm(v2)
v3 = np.cross(v1,v2)
# Supposing tha x,y,z are the cordinates on bases v1,v2,v3
# I want to read them in canonical base
M = np.matrix(np.concatenate([v1.reshape(-1,1),v2.reshape(-1,1),v3.reshape(-1,1)],axis=1))
X = np.matrix(np.concatenate([x.reshape(-1,1),y.reshape(-1,1),z.reshape(-1,1)],axis=1))
X = X
Y = X@M.T
# Inertia
I2 = norm(Y@v2.reshape(-1,1))**2
I1 = norm(Y@v1.reshape(-1,1))**2
I3 = norm(Y@v3.reshape(-1,1))**2


In [77]:
print(f'mean: {Y.mean(0)}, std: {Y.std(0)}')
print(norm(Y@v2.reshape(-1,1))**2)
print(norm(Y@v1.reshape(-1,1))**2)
print(norm(Y@v3.reshape(-1,1))**2)

mean: [[ 0.00527671 -0.00270695  0.00111856]], std: [[0.83907263 0.4743681  0.18046786]]
3181.200114073397
646.2530251370628
19.23899242767712


In [108]:
import plotly.express as px
import plotly.graph_objects as go


df = pd.DataFrame(Y,columns=['X','Y','Z'])
df_v1 = pd.DataFrame([[0,0,0],v1], columns=['X','Y','Z'])
df_v2 = pd.DataFrame([[0,0,0],v2], columns=['X','Y','Z'])
df_v3 = pd.DataFrame([[0,0,0],v3], columns=['X','Y','Z'])

#fig = go.Scatter3D(df, x='X', y='Y', z='Z',width=800, height=600,
#                   marker={'size':1})
fig = go.Figure(data=[go.Scatter3d(x=df.X, y=df.Y, z=df.Z,
                                   name='samples',
                                   mode='markers',    
                                   marker=dict(size=2, colorscale='greys', opacity=0.5),
                                   )])
## v2
fig.add_scatter3d(connectgaps=None, x = df_v2.X.values, y = df_v2.Y.values, z = df_v2.Z.values,
                 name='First Principal Component',
                 mode='lines+markers',
                 line=dict(width=6),
                 marker=dict(symbol='diamond', size=3),
                 visible='legendonly'
                 )
fig.add_scatter3d(connectgaps=None, x = df_v1.X.values, y = df_v1.Y.values, z = df_v1.Z.values,
                 name='Second Principal Component',
                 mode='lines+markers',
                 line=dict(width=6),
                 marker=dict(symbol='diamond', size=3),
                 visible='legendonly'
                 )

fig.add_scatter3d(connectgaps=None, x = df_v3.X.values, y = df_v3.Y.values, z = df_v3.Z.values,
                 name='Third Principal Component',
                 mode='lines+markers',
                 line=dict(width=6),
                 marker=dict(symbol='diamond', size=3),
                 visible='legendonly'
                 )
# Projections
fig.add_scatter3d(x=df.X, y=df.Y, z=df.Z,
                   name='projections',
                   mode='markers',
                   opacity=0,
                   marker=dict(size=2, colorscale='greys_r'),
                   projection=dict(x=dict(show=True, opacity=1), y=dict(show=True, opacity=1), z=dict(show=True, opacity=1)),
                   visible='legendonly'
                  )


fig.update_layout(scene_aspectmode='cube',
                  scene =dict(
                      xaxis=dict(nticks=10, range=[-2.5,2.5]),
                      yaxis=dict(nticks=10, range=[-2.5,2.5]),
                      zaxis=dict(nticks=10, range=[-2.5,2.5])
                  )
                 )
fig.write_html("PCA_3D.html")

In [113]:
import plotly.express as px
import plotly.graph_objects as go


df2_v1 = pd.DataFrame([[0,0],[1,0]], columns=['X','Y'])
df2_v2 = pd.DataFrame([[0,0],[0,1]], columns=['X','Y'])

#fig = go.Scatter3D(df, x='X', y='Y', z='Z',width=800, height=600,
#                   marker={'size':1})
fig = go.Figure(data=[go.Scatter(x=x, y=y,
                                   name='samples',
                                   mode='markers',    
                                   marker=dict(size=3, colorscale='greys', opacity=1),
                                   )])
## v2
fig.add_scatter(connectgaps=None, x = df2_v2.X.values, y = df2_v2.Y.values,
                 name='First Principal Component',
                 mode='lines+markers',
                 line=dict(width=6),
                 marker=dict(symbol='diamond', size=3),
                 visible='legendonly'
                 )
fig.add_scatter(connectgaps=None, x = df2_v1.X.values, y = df2_v1.Y.values,
                 name='Second Principal Component',
                 mode='lines+markers',
                 line=dict(width=6),
                 marker=dict(symbol='diamond', size=3),
                 visible='legendonly'
                 )



fig.update_yaxes(
    scaleanchor = "x",
    scaleratio = 1,
  )
fig.write_html("PCA_2D.html")

In [8]:
v1

array([ 0.25916053,  0.86386843, -0.43193421])