In [36]:
import numpy as np
import pandas as pd
from typing import Final

VARIABLES_COUNT: Final[int] = 10
LINES_COUNT: Final[int] = 100

rng = np.random.default_rng()
scales: np.ndarray = (10 ** rng.uniform(1, 6, size=VARIABLES_COUNT)).astype(int)

data: dict[str, list[int]] = {
    f"x{i + 1}": (rng.random(LINES_COUNT) * scales[i]).astype(int)
    for i in range(VARIABLES_COUNT)
}

df: pd.DataFrame = pd.DataFrame(data)
df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,203,263,469,32,37,16853,4493,8539,7,3
1,741,2613,17,46,26,13591,4783,870,20,44
2,617,2663,650,6,21,3626,1098,34234,0,26
3,423,1177,482,23,36,1500,5399,38490,10,12
4,15,1995,426,34,23,3699,5983,34165,26,44


In [37]:
df.describe()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,520.11,1721.63,298.07,26.15,25.91,8889.38,3809.42,46853.2,13.28,27.1
std,305.406895,1025.807971,204.168343,14.338882,14.300593,5163.761842,2265.441265,27613.494473,7.593298,16.30548
min,3.0,12.0,0.0,0.0,0.0,48.0,62.0,870.0,0.0,0.0
25%,244.5,885.25,103.25,12.75,14.0,4616.25,1897.75,22380.25,7.0,13.75
50%,562.5,1690.5,276.0,27.5,28.0,8516.5,4048.0,43354.5,14.0,25.0
75%,793.25,2677.25,472.25,38.0,38.25,13698.0,5613.0,68074.25,19.0,41.75
max,958.0,3272.0,653.0,48.0,50.0,17193.0,8171.0,93666.0,27.0,55.0


In [38]:
centered_df: pd.DataFrame = df - df.mean()
centered_df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,-317.11,-1458.63,170.93,5.85,11.09,7963.62,683.58,-38314.2,-6.28,-24.1
1,220.89,891.37,-281.07,19.85,0.09,4701.62,973.58,-45983.2,6.72,16.9
2,96.89,941.37,351.93,-20.15,-4.91,-5263.38,-2711.42,-12619.2,-13.28,-1.1
3,-97.11,-544.63,183.93,-3.15,10.09,-7389.38,1589.58,-8363.2,-3.28,-15.1
4,-505.11,273.37,127.93,7.85,-2.91,-5190.38,2173.58,-12688.2,12.72,16.9


In [None]:
V: pd.DataFrame = centered_df.cov(ddof=0)
print(V.to_string())

                x1            x2             x3            x4            x5            x6            x7            x8           x9           x10
x1    93273.371616  3.080245e+04   -2252.098687   -139.834848   -244.757677  2.513168e+05 -4.157417e+04  4.076470e+05  -146.253333    460.140404
x2    30802.445152  1.052282e+06    4992.005960  -1979.075253   -491.872020 -7.931555e+03 -7.787770e+05  6.378390e+06  -281.157980   2256.582828
x3    -2252.098687  4.992006e+03   41684.712222   -278.152020    262.440707  5.857136e+04  5.884227e+04 -6.678005e+05   -70.231919    -91.168687
x4     -139.834848 -1.979075e+03    -278.152020    205.603535      8.791414  3.715064e+03  4.998758e+02 -7.100480e+04    21.907071     34.863636
x5     -244.757677 -4.918720e+02     262.440707      8.791414    204.506970 -2.033282e+04 -4.140366e+03  3.193665e+03     3.944646    -31.344444
x6   251316.836566 -7.931555e+03   58571.356970   3715.063636 -20332.824040  2.666444e+07  1.395351e+06 -1.308487e+07  4543.650101

In [42]:
eigenvalues, eigenvectors = np.linalg.eig(V.values)
print(eigenvalues)
print(eigenvectors)

[7.62836893e+08 2.65124839e+07 5.14086093e+06 8.69132531e+05
 8.96545266e+04 4.00102441e+04 5.16199885e+01 2.68411620e+02
 1.96671830e+02 1.59474340e+02]
[[-5.29229921e-04 -9.68222375e-03 -1.14449944e-02 -2.17126694e-02
   9.98638895e-01  4.48601267e-02  2.18107075e-03  1.03067305e-03
   1.80486567e-03 -1.54801878e-03]
 [-8.37916650e-03 -2.42090441e-03 -1.74654450e-01 -9.84083166e-01
  -2.24397331e-02 -2.21211327e-02  4.92081328e-04  9.72702956e-04
   2.22245811e-03 -1.02978621e-03]
 [ 8.77176086e-04 -1.88243733e-03  9.45021417e-03 -2.30982267e-02
  -4.52727696e-02  9.98586330e-01  4.44053899e-04 -6.81336316e-03
  -3.06339945e-03 -9.62111738e-03]
 [ 9.31723185e-05 -9.20026900e-05  9.49663018e-06  1.62514607e-03
  -8.99515159e-04 -7.96267633e-03 -1.20993682e-01 -4.74311544e-01
   5.55031567e-01 -6.72504406e-01]
 [-4.69562099e-06  7.72676722e-04 -5.36227041e-04  1.10830757e-03
  -8.35860337e-04  8.77495009e-03 -3.94202424e-02  6.94031002e-02
   7.88953133e-01  6.09181743e-01]
 [ 1.778480