In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mlgrad.af as af
import mlgrad.funcs as funcs
import mlgrad.utils as utils
import mlgrad.pca as pca
import pandas as pd
import sklearn.datasets as datasets
import sklearn.decomposition as decomposition

ModuleNotFoundError: No module named 'mlgrad.func'

In [None]:
df = pd.read_csv("data/hygdata_v3-abridged.csv", sep=',', usecols=['ci','lum'])
df.dropna(inplace=True)
print(df.columns)
df

In [None]:
ci = df['ci'].values
lum = df['lum'].values
iii = lum > 0
lum = lum[iii]
ci = ci[iii]
lum = np.log(lum)
T = np.log(4600 * (10.92/(ci+1.7)+10.92/(ci+0.62)))
X0 = np.c_[lum,T]

In [None]:
# plt.figure(figsize=(18,3))
plt.scatter(lum, T, s=9, c='w', edgecolors='k', linewidth=0.5)
plt.xlabel('lum')
plt.ylabel('T')
plt.title('Hertzsprung-Russell Diagram')
plt.minorticks_on()
plt.show()

In [None]:
pca_sk = decomposition.PCA()
pca_sk = pca_sk.fit(X0)

In [None]:
sk_cs = pca_sk.components_
sk_c0 = pca_sk.mean_
sk_a1 = sk_cs[0,:]
sk_a2 = sk_cs[1,:]

In [None]:
plt.figure(figsize=(20,3))
plt.scatter(lum, T, s=9, c='w', edgecolors='k', linewidth=0.5)
plt.xlabel('lum')
plt.ylabel('T')
plt.scatter([sk_c0[0]], [sk_c0[1]], s=81, c='w', edgecolors='k')
plt.plot([sk_c0[0]-2*sk_a1[0], sk_c0[0]+2*sk_a1[0]], [sk_c0[1]-2*sk_a1[1], sk_c0[1]+2*sk_a1[1]], label='1')
plt.plot([sk_c0[0]-2*sk_a2[0], sk_c0[0]+2*sk_a2[0]], [sk_c0[1]-2*sk_a2[1], sk_c0[1]+2*sk_a2[1]], label='2')
plt.title('Hertzsprung-Russell Diagram')
plt.minorticks_on()
plt.legend()
plt.show()

In [None]:
c0 = pca.location(X0)
plt.figure(figsize=(12,5))
plt.plot(sorted(pca.distance_center(X0, c0)), marker='s', markersize=2)
plt.minorticks_on()
plt.show()
print(c0)

In [None]:
# N = len(X0)
# alpha = (N - 8) / N
# print(alpha)
wma = af.averaging_function("WM", rhofunc=funcs.Quantile_Sqrt(0.9, 0.001))
c = pca.robust_location(X0, wma)
plt.plot(sorted(pca.distance_center(X0, c)), marker='s', markersize=3)
plt.show()
print(c)

In [None]:
D0 = pca.distance_center(X0, c0)
D1 = pca.distance_center(X0, c)
plt.scatter(D0, D1)
plt.minorticks_on()
plt.grid(1)
plt.show()

In [None]:
X1 = X0 - c
X1_0 = X0 - c0

In [None]:
a1_0, L1_0 = pca.find_pc(X1_0)
plt.plot(sorted(pca.distance_line(X1_0, a1_0)), marker='o', markersize=4)
plt.show()
print(L1_0, a1_0)

In [None]:
# N = len(X0)
# alpha = (N - 12) / N
# print(alpha)
wma = af.averaging_function("WM", rhofunc=funcs.Quantile_Sqrt(0.9, 0.001))
a1, L1 = pca.find_robust_pc(X1, wma)
plt.plot(sorted(pca.distance_line(X1, a1)), marker='o', markersize=4)
plt.show()
print(L1, a1)
U1 = pca.project_line(X1, a1)
U1_0 = pca.project_line(X1_0, a1_0)

In [None]:
plt.scatter(pca.distance_line(X1_0, a1_0), pca.distance_line(X1, a1))
plt.minorticks_on()
plt.grid(1)
plt.show()

In [None]:
X2 = pca.project(X1, a1)
X2_0 = pca.project(X1_0, a1_0)

In [None]:
a2_0, L2_0 = pca.find_pc(X2)
plt.plot(sorted(pca.distance_line(X2_0, a2_0)), marker='o', markersize=4)
plt.show()
print(L2_0, a2_0)

In [None]:
# N = len(X0)
# alpha = (N - 10) / N
# print(alpha)
wma = af.averaging_function("WM", rhofunc=funcs.Quantile_Sqrt(0.9, 0.001))
a2, L2 = pca.find_robust_pc(X2, wma)
plt.plot(sorted(pca.distance_line(X2, a2)), marker='o', markersize=3)
plt.show()
print(L2, a2)
U2 = pca.project_line(X2, a2)
U2_0 = pca.project_line(X2_0, a2_0)

In [None]:
plt.scatter(pca.distance_line(X2_0, a2_0), pca.distance_line(X2, a2))
plt.minorticks_on()
plt.grid(1)
plt.show()

In [None]:
plt.figure(figsize=(13,3))
plt.scatter(lum, T, s=9, c='w', edgecolors='k', linewidth=0.5)
plt.scatter([sk_c0[0]], [sk_c0[1]], s=81, c='w', edgecolors='k')
plt.scatter([c[0]], c[[1]], s=81, c='w', edgecolors='k')
plt.plot([sk_c0[0]-4*sk_a1[0], sk_c0[0]+4*sk_a1[0]], [sk_c0[1]-4*sk_a1[1], sk_c0[1]+4*sk_a1[1]], 
         label='pca', linestyle='--', c='k')
plt.plot([sk_c0[0]-2*sk_a2[0], sk_c0[0]+2*sk_a2[0]], [sk_c0[1]-2*sk_a2[1], sk_c0[1]+2*sk_a2[1]],
        linestyle='--', c='k')
plt.plot([c[0]-4*a1[0], c[0]+4*a1[0]], [c[1]-4*a1[1], c[1]+4*a1[1]], 
         label='robust_pca', linestyle='-', c='k')
plt.plot([c[0]-2*a2[0], c[0]+2*a2[0]], [c[1]-2*a2[1], c[1]+2*a2[1]],
        linestyle='-', c='k')
plt.xlabel('lum')
plt.ylabel('ci')
plt.title('Hertzsprung-Russell Diagram')
plt.minorticks_on()
plt.legend()
plt.tight_layout()
plt.savefig('robust_pca_hygdata.eps')
plt.show()

In [None]:
print(c0, c)

In [None]:
print(a1_0, a2_0)
print(a1, a2)