In [2]:
import numpy as np
import pandas as pd
# import csv
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook
# %matplotlib inline

In [3]:
def normalize(a):
    return (a - min(a)) / (max(a)-min(a))

def weighted_cosine_dists(center, weights=False):
    dists = np.zeros(n_stars)
    for i in range(n_stars):
        val = np.dot(center, pos.iloc[i])
        if val > 0.9999:
            dists[i] = 0.
        else:
            # dists[i] = dark.iloc[center] * dark.iloc[i] * np.arccos(val)
            # dists[i] = np.arccos(val)  # spherical separation
            dists[i] = 1- val  # cosine dissimilarity
            if weights:
                dists[i] *= dark.iloc[i]
    return dists

def ra_dec_2_cart(ra, dec):
    return np.array([np.cos(dec) * np.cos(ra), np.cos(dec) * np.sin(ra), np.sin(dec)])

In [4]:
# from  Hipparcos, the New Reduction (van Leeuwen, 2007) from VizieR
data = pd.read_csv('data_big.csv')
data = data[data["Hpmag"]<6]
len(data)

4559

In [5]:
truth = pd.read_csv("constellation_centers.csv")
true_centers = np.zeros((88, 3))
for i in range(88):
    ra0 = truth["RA"].iloc[i]
    dec0 = truth["Dec"].iloc[i]
    true_centers[i, :] = ra_dec_2_cart(ra0, dec0)

In [6]:
# ra = np.pi / 12 * (data["ra_hours"] + data["ra_minutes"] / 60 + data["ra_seconds"] / 3600);
# dec = np.pi / 180 * (data["dec_degrees"] + data["dec_minutes"] / 60 + data["dec_seconds"] / 3600);
star_ra = data["Radeg"] * np.pi / 180
star_dec = data["Dedeg"] * np.pi / 180
star_mag = data["Hpmag"]
bright = 0.9 * normalize(star_mag)
dark = 1 - 0.9 * bright
star_x = np.cos(star_dec) * np.cos(star_ra)
star_y = np.cos(star_dec) * np.sin(star_ra)
star_z = np.sin(star_dec)
pos = pd.concat([star_x, star_y, star_z], axis=1)
pos = pos.rename(columns = {0:"x", 1:"y", "Dedeg":"z"})

In [7]:
# dists_tot = np.zeros((n_stars, n_stars))
# for i in range(n_stars):
#     dists_tot[i, :] = weighted_cosine_dists(i)

In [11]:
n_stars = len(data)
n_const = 88
# cluster = np.zeros((n_const, n_stars))
# cluster[0, :] = np.ones(n_stars)
cluster = np.zeros(n_stars)
centers = np.zeros((n_const, 3))
kmeans = True
weights = True

start = time.time()
if kmeans:
    tol = 1e-5
    for i in range(n_const):
        hold = np.random.uniform(low=-1.0, high=1.0, size=3)
        centers[i, :] = hold/np.linalg.norm(hold)
    delta = 1
    dists = weighted_cosine_dists(centers[0, :], weights=weights)
    while delta > tol:
        
        # assign to updated cluster centers
        dist_tot_1 = np.sum(dists)
        for i in range(n_const):
            dists_temp = weighted_cosine_dists(centers[i, :], weights=weights)
            for j in range(0, n_stars):
                if dists_temp[j] < dists[j]:
                    cluster[j] = i
                    dists[j] = dists_temp[j]
        dist_tot_2 = np.sum(dists)
        delta = abs(dist_tot_1 - dist_tot_2) / dist_tot_1
        print(delta)
#         delta = 0
        
        #calculate new cluster centers
        for i in range(n_const):
            hold = np.random.uniform(low=0.0, high=1.0, size=3)
            sx = np.array([np.sum(pos["x"].iloc[np.where(cluster == i)]), np.sum(pos["y"].iloc[np.where(cluster == i)]), np.sum(pos["z"].iloc[np.where(cluster == i)])])
            ra0 = np.arctan2(sx[1], sx[0])
            dec0 = np.arctan2(sx[2], (np.cos(ra0) * sx[0] * (1 + (sx[1] / sx[0]) ** 2 )))
            centers[i, :] = ra_dec_2_cart(ra0, dec0)
            
        end = time.time()
        print("clustering has taken %f seconds"%(end - start))
    
    for i in range(n_const):
        dists_temp = weighted_cosine_dists(centers[i, :], weights=weights)
        for j in range(n_stars):
            if dists_temp[j] < dists[j]:
                cluster[j] = i
                dists[j] = dists_temp[j]
            
else:
    centers[0, :] = pos.iloc[np.random.randint(0, high=n_stars)]
    dists = weighted_cosine_dists(centers[0, :])
    for i in range(1, n_const):
        # print(i)
        centers[i, :] = pos.iloc[np.argmax(dists)]
        dists_temp = weighted_cosine_dists(centers[i, :])
        for j in range(n_stars):
            if dists_temp[j] < dists[j]:
                cluster[j] = i
                dists[j] = dists_temp[j]
end = time.time()
print("clustering took %f seconds"%(end - start))

0.9720345789794085
clustering has taken 22.484505 seconds
0.5417787001720347
clustering has taken 44.566577 seconds
0.13187519164387346
clustering has taken 66.814759 seconds
0.03820407892035168
clustering has taken 88.938912 seconds
0.016252528354750675
clustering has taken 112.064243 seconds
0.008873990316965043
clustering has taken 134.459087 seconds
0.004551729150047877
clustering has taken 156.385452 seconds
0.0012422002940757238
clustering has taken 178.308966 seconds
0.00020232443403667475
clustering has taken 200.524631 seconds
0.0
clustering has taken 222.605539 seconds
clustering took 244.514966 seconds


In [15]:
bump = 1.05
fig = plt.figure(figsize=[8, 8])
ax = fig.add_subplot(111, projection='3d')
ax.scatter(star_x, star_y, star_z, marker='o', c=normalize(cluster), cmap="tab20", s=4e4/n_stars, alpha=.8)
ax.scatter(bump * centers[:, 0], bump * centers[:, 1], bump * centers[:, 2], marker='X', c="k", s=50)
#ax.scatter(bump * true_centers[:, 0], bump * true_centers[:, 1], bump * true_centers[:, 2], marker='X', c="r", s=50)
ax.grid(False)

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.show()

<IPython.core.display.Javascript object>

In [13]:
def f1(th):
    return 1-np.cos(th)
x = np.linspace(0, np.pi, 1000)
fig = plt.figure(figsize=[8, 8])
plt.plot(x, f1(x))
plt.plot(x, 2 * x / np.pi)
plt.show()

<IPython.core.display.Javascript object>