In [7]:
# importing packages

# data manipulation
import numpy as np
import pandas as pd

# benchmarking
import time

# importing and saving analysis data
import os.path

# letting me know when large calculattions are done
import winsound

# plotting and image exporting
import matplotlib.pyplot as plt
from matplotlib import cm, colors
from mpl_toolkits.mplot3d import Axes3D
from PIL import Image
%matplotlib notebook
# %matplotlib inline

# my custom functions for this project
# see file for specifics
import project_functions as pf

In [8]:
# importing star data from VizieR
data = pd.read_csv('data_big.csv')
data = data[data["Hpmag"]<6]
n_stars = len(data)
print(n_stars)

# getting star positions from the data
star_ra = data["Radeg"] * np.pi / 180
star_dec = data["Dedeg"] * np.pi / 180
# converting right ascention and declination to cartesian coordinates on unit celestial sphere
# conversion from https://en.wikipedia.org/wiki/Equatorial_coordinate_system
star_x = np.cos(star_dec) * np.cos(star_ra)
star_y = np.cos(star_dec) * np.sin(star_ra)
star_z = np.sin(star_dec)
pos = pd.concat([star_x, star_y, star_z], axis=1)
pos = pos.rename(columns = {0:"x", 1:"y", "Dedeg":"z"})

# importing centers and areas of the modern constellations
# data from https://en.wikipedia.org/wiki/88_modern_constellations_by_area
truth = pd.read_csv("constellation_centers.csv")
true_centers = np.zeros((88, 3))
for i in range(88):
    ra0 = truth["RA"].iloc[i]
    dec0 = truth["Dec"].iloc[i]
    true_centers[i, :] = pf.ra_dec_2_cart(ra0, dec0)
percent_const = truth["SA"] / 41253  # percent of sky that each constellation takes up
percent_const /= np.sum(percent_const)  # make sure it normalizes to one

4559


In [39]:
rand_tots = pf.bootstrap_array(resample = 769, filename = "random_1000.npy", lams=1000., save_iteration=False)

print(len(rand_tots))

1000


In [45]:
kc_nw_tots = pf.bootstrap_array(resample = 0, filename = "kc_nw_1000.npy", lams=1000., updates = True, cluster_func='kcenter')
kc_w_tots = pf.bootstrap_array(resample = 0, filename = "kc_w_1000.npy", lams=1000., updates = True, cluster_func='kcenter', use_weights=True)

print(len(kc_nw_tots))
print(len(kc_w_tots))

100
100


In [44]:
km_nw_no_tots = pf.bootstrap_array(resample = 40, filename = "km_nw_no_1000.npy", lams=1000., updates = True, cluster_func='kmeans')
km_w_no_tots = pf.bootstrap_array(resample = 40, filename = "km_w_no_1000.npy", lams=1000., updates = True, cluster_func='kmeans', use_weights=True)
km_w_oa_tots = pf.bootstrap_array(resample = 40, filename = "km_w_oa_1000.npy", lams=1000., updates = True, cluster_func='kmeans', use_weights=True, online="anneal")


print(len(km_nw_no_tots))
print(len(km_w_no_tots))
print(len(km_w_oa_tots))

working on bootstrap 1 for kmeans clustering method
0.9715404560426701
clustering has taken 22.720461 seconds
0.5165200325731883
clustering has taken 45.349347 seconds
0.13775294198325086
clustering has taken 67.925863 seconds
0.05591846502279064
clustering has taken 90.809907 seconds
0.02416726101383861
clustering has taken 113.437602 seconds
0.011069194546260945
clustering has taken 135.890388 seconds
0.006359182629691185
clustering has taken 158.493155 seconds
0.0018366867063115786
clustering has taken 181.175543 seconds
0.0012890162302811225
clustering has taken 203.832681 seconds
0.00047219402647954094
clustering has taken 226.642887 seconds
clustering took 249.324735 seconds
working on bootstrap 2 for kmeans clustering method
0.9736365454891748
clustering has taken 22.747518 seconds
0.531494092289715
clustering has taken 45.774141 seconds
0.13739493925614937
clustering has taken 68.499502 seconds
0.06080643768331309
clustering has taken 91.242937 seconds
0.0320578243576416
cluste

0.47965135455049673
clustering has taken 47.005564 seconds
0.1387793845439608
clustering has taken 70.723215 seconds
0.04559280767330741
clustering has taken 93.818456 seconds
0.02473264667618842
clustering has taken 117.347678 seconds
0.012971560569558744
clustering has taken 140.684792 seconds
0.005866313882966202
clustering has taken 163.723686 seconds
0.002842683147208184
clustering has taken 187.217885 seconds
0.000894985899868948
clustering has taken 210.149070 seconds
clustering took 233.239814 seconds
working on bootstrap 13 for kmeans clustering method
0.9739449235253536
clustering has taken 23.464814 seconds
0.5081069790758032
clustering has taken 46.529538 seconds
0.12409448819751467
clustering has taken 70.039745 seconds
0.04743886186165466
clustering has taken 93.357505 seconds
0.02627252711652392
clustering has taken 117.674133 seconds
0.016873023963528345
clustering has taken 142.241108 seconds
0.009512882855718998
clustering has taken 165.033522 seconds
0.00479089058829

0.02012632272739044
clustering has taken 116.359820 seconds
0.0102314764043549
clustering has taken 139.022631 seconds
0.0030420628362247256
clustering has taken 161.560326 seconds
0.0005168915337628485
clustering has taken 184.208725 seconds
clustering took 206.914970 seconds
working on bootstrap 24 for kmeans clustering method
0.9770109376203576
clustering has taken 22.942612 seconds
0.46118125040985
clustering has taken 45.529177 seconds
0.13061228679760967
clustering has taken 68.002045 seconds
0.05301591053774259
clustering has taken 90.627506 seconds
0.028017746693890115
clustering has taken 113.155228 seconds
0.01635686991119179
clustering has taken 135.796646 seconds
0.00887034012672778
clustering has taken 158.422107 seconds
0.004029956962111305
clustering has taken 180.898965 seconds
0.003366648233542846
clustering has taken 203.437658 seconds
0.003873121790902338
clustering has taken 226.101017 seconds
0.002231777663700002
clustering has taken 248.603806 seconds
0.0
clusteri

clustering took 295.726778 seconds
working on bootstrap 36 for kmeans clustering method
0.9768611312977066
clustering has taken 22.712920 seconds
0.43438884519737514
clustering has taken 46.153706 seconds
0.14180131709665372
clustering has taken 69.229678 seconds
0.04869313615087676
clustering has taken 92.735326 seconds
0.01890606539740608
clustering has taken 115.753448 seconds
0.013889855955055146
clustering has taken 139.134454 seconds
0.006045120347754763
clustering has taken 162.685072 seconds
0.0030741341799913474
clustering has taken 185.857621 seconds
0.001579420043723951
clustering has taken 209.255178 seconds
0.00026472561599539434
clustering has taken 233.970822 seconds
clustering took 259.092434 seconds
working on bootstrap 37 for kmeans clustering method
0.9764854025107854
clustering has taken 24.530107 seconds
0.4523752946277383
clustering has taken 48.846291 seconds
0.13341713206055086
clustering has taken 73.193948 seconds
0.04041296445796985
clustering has taken 97.69

0.9808088741043728
clustering has taken 22.617482 seconds
0.38284988916732554
clustering has taken 45.070404 seconds
0.11811132329546985
clustering has taken 67.458500 seconds
0.04958224609958651
clustering has taken 89.812686 seconds
0.02296227896419433
clustering has taken 112.567800 seconds
0.013069374900648472
clustering has taken 135.090536 seconds
0.005375858008129392
clustering has taken 157.680092 seconds
0.0026884660805821608
clustering has taken 181.760660 seconds
0.0021464026479624536
clustering has taken 204.900743 seconds
0.0007828450753704388
clustering has taken 228.159510 seconds
clustering took 250.736101 seconds
working on bootstrap 9 for kmeans clustering method
0.9754051832014731
clustering has taken 24.370790 seconds
0.4695267967575778
clustering has taken 47.356288 seconds
0.1246383244670549
clustering has taken 70.379683 seconds
0.04724172705850038
clustering has taken 92.828617 seconds
0.03778683622456299
clustering has taken 115.206738 seconds
0.029018052541688



0.4982097066436964
clustering has taken 45.243940 seconds
0.13527661068761213
clustering has taken 67.735758 seconds
0.048669174584457055
clustering has taken 90.194664 seconds
0.019296870231148955
clustering has taken 112.556830 seconds
0.007009872912866234
clustering has taken 135.210215 seconds
0.005763301355449097
clustering has taken 157.779825 seconds
0.005222475005800406
clustering has taken 180.484076 seconds
0.0033779972009065503
clustering has taken 203.019776 seconds
0.0018879020180134491
clustering has taken 225.613322 seconds
0.0010094657845301969
clustering has taken 247.950554 seconds
0.0
clustering has taken 270.507199 seconds
clustering took 292.875348 seconds
working on bootstrap 20 for kmeans clustering method
0.9699329416483081
clustering has taken 22.842879 seconds
0.52191439731397
clustering has taken 45.328713 seconds
0.13441331552172364
clustering has taken 67.846462 seconds
0.06811036835339836
clustering has taken 90.271459 seconds
0.03732614915869389
clusterin

0.0010946253971940252
clustering has taken 249.105464 seconds
0.0
clustering has taken 271.717959 seconds
clustering took 294.245681 seconds
working on bootstrap 31 for kmeans clustering method
0.9806979526331502
clustering has taken 22.761097 seconds
0.38332945524672546
clustering has taken 45.399523 seconds
0.11588148685313063
clustering has taken 67.950184 seconds
0.058345281550192946
clustering has taken 90.599580 seconds
0.03167006995309608
clustering has taken 113.210081 seconds
0.015064791148897983
clustering has taken 135.794651 seconds
0.009488644067038993
clustering has taken 158.316389 seconds
0.004828600721788366
clustering has taken 181.089455 seconds
0.0007408703818726065
clustering has taken 203.761790 seconds
clustering took 226.354339 seconds
working on bootstrap 32 for kmeans clustering method
0.9745830773262188
clustering has taken 22.847866 seconds
0.49000595140068043
clustering has taken 45.430441 seconds
0.11731589535869004
clustering has taken 67.982099 seconds
0

clustering took 581.403403 seconds
working on bootstrap 2 for kmeans clustering method
0.9766891263949173
clustering has taken 25.500767 seconds
0.39159148210801226
clustering has taken 50.830991 seconds
0.1891396606421087
clustering has taken 75.046201 seconds
0.09352513876133929
clustering has taken 100.346501 seconds
0.04242851128673345
clustering has taken 123.967298 seconds
0.025587899915081154
clustering has taken 147.742212 seconds
0.01626768482551091
clustering has taken 171.018930 seconds
0.010981568057810311
clustering has taken 194.583877 seconds
0.00872123796643268
clustering has taken 218.066045 seconds
0.0066733469846647
clustering has taken 241.567162 seconds
0.005518298185139373
clustering has taken 264.957576 seconds
0.004509554130343266
clustering has taken 288.331036 seconds
0.0038706308836454558
clustering has taken 311.733417 seconds
0.00340587827060998
clustering has taken 337.210249 seconds
0.003170479714562363
clustering has taken 361.783498 seconds
0.0025270567

0.005010826663729284
clustering has taken 304.682755 seconds
0.004446794944401075
clustering has taken 329.553208 seconds
0.0039028574403833774
clustering has taken 354.329913 seconds
0.0034343934748088624
clustering has taken 379.167454 seconds
0.0028848802114531938
clustering has taken 404.160580 seconds
0.002457821018262371
clustering has taken 431.896367 seconds
0.0020439974902823096
clustering has taken 458.615873 seconds
0.001752553365906827
clustering has taken 486.752589 seconds
0.0014960519811203804
clustering has taken 514.668891 seconds
0.001292854045122033
clustering has taken 540.070922 seconds
0.0011319742657277011
clustering has taken 565.151813 seconds
0.0009480729861819782
clustering has taken 590.050192 seconds
clustering took 613.884418 seconds
working on bootstrap 8 for kmeans clustering method
0.9713092552339694
clustering has taken 25.535674 seconds
0.5084156894483892
clustering has taken 50.621551 seconds
0.1701816672577424
clustering has taken 75.336421 seconds


0.3368089533660811
clustering has taken 59.627418 seconds
0.14685587979022122
clustering has taken 87.215726 seconds
0.0690175350132704
clustering has taken 112.761054 seconds
0.03842835485386465
clustering has taken 138.708814 seconds
0.022532577993401977
clustering has taken 164.642504 seconds
0.016622801062759637
clustering has taken 190.099028 seconds
0.012022044684826783
clustering has taken 215.750620 seconds
0.009318826261291976
clustering has taken 241.297349 seconds
0.007538248142321402
clustering has taken 266.787289 seconds
0.006817510521520469
clustering has taken 292.001114 seconds
0.006493245322942286
clustering has taken 317.490266 seconds
0.005344779902038942
clustering has taken 342.812128 seconds
0.004302559111919032
clustering has taken 368.140901 seconds
0.003435490361665455
clustering has taken 393.910523 seconds
0.002922337343624048
clustering has taken 419.301629 seconds
0.0025230822951341713
clustering has taken 444.656705 seconds
0.0020548777313216296
clusterin

clustering has taken 302.605313 seconds
0.0038714852442036513
clustering has taken 325.937882 seconds
0.003415869858844567
clustering has taken 349.415064 seconds
0.00305366517139896
clustering has taken 372.689787 seconds
0.002759712943105371
clustering has taken 395.969497 seconds
0.002569130705917509
clustering has taken 419.289101 seconds
0.0023879412451115077
clustering has taken 442.619674 seconds
0.0020364072857785047
clustering has taken 465.941272 seconds
0.0018352272657677275
clustering has taken 489.279825 seconds
0.0015988321528804218
clustering has taken 512.537594 seconds
0.0013663680666028885
clustering has taken 535.745496 seconds
0.0011661536666335974
clustering has taken 558.976337 seconds
0.000987046946886473
clustering has taken 582.317882 seconds
clustering took 604.673065 seconds
working on bootstrap 19 for kmeans clustering method
0.9780291659739869
clustering has taken 23.653709 seconds
0.3901457554499054
clustering has taken 47.139867 seconds
0.1644448759490825

0.4556360526486739
clustering has taken 47.177765 seconds
0.18470939829883784
clustering has taken 70.752686 seconds
0.07105821831228053
clustering has taken 94.116172 seconds
0.03187493123785662
clustering has taken 117.604324 seconds
0.019586041460689974
clustering has taken 141.026652 seconds
0.013274026336792871
clustering has taken 164.522784 seconds
0.010476437047894728
clustering has taken 187.916189 seconds
0.007764883577189469
clustering has taken 211.340512 seconds
0.006415024993479043
clustering has taken 234.736910 seconds
0.00555417617930368
clustering has taken 258.207110 seconds
0.004879931183783564
clustering has taken 281.793001 seconds
0.0043076837160021106
clustering has taken 305.315063 seconds
0.0034168931900068313
clustering has taken 328.627685 seconds
0.00293168187113905
clustering has taken 352.142765 seconds
0.0025366675934287283
clustering has taken 375.571078 seconds
0.0021084452244638
clustering has taken 398.970467 seconds
0.0017525074574405866
clustering 

0.0016528396554670488
clustering has taken 467.809273 seconds
0.00140221977578744
clustering has taken 491.272493 seconds
0.0012674017017126663
clustering has taken 514.608054 seconds
0.00107776433679127
clustering has taken 538.092217 seconds
0.000926763990778578
clustering has taken 561.477644 seconds
clustering took 584.064209 seconds
working on bootstrap 30 for kmeans clustering method
0.9734486730779944
clustering has taken 23.648722 seconds
0.4646191013089749
clustering has taken 46.922449 seconds
0.18091539642275203
clustering has taken 70.200164 seconds
0.08362342124984755
clustering has taken 93.437986 seconds
0.04294718828319822
clustering has taken 116.745622 seconds
0.02785994382541854
clustering has taken 140.041290 seconds
0.018994001124386435
clustering has taken 163.302050 seconds
0.014600555330994993
clustering has taken 186.515936 seconds
0.011058217608928044
clustering has taken 209.797641 seconds
0.008428882877699844
clustering has taken 232.994572 seconds
0.0073036

clustering has taken 164.091936 seconds
0.011877262173111454
clustering has taken 187.372644 seconds
0.009846306883380933
clustering has taken 211.031340 seconds
0.00892832593819425
clustering has taken 234.511513 seconds
0.00755695521794493
clustering has taken 257.959772 seconds
0.006309231933266729
clustering has taken 281.217541 seconds
0.005496597696653032
clustering has taken 304.684749 seconds
0.004498925676404901
clustering has taken 328.033275 seconds
0.003867631306263091
clustering has taken 351.491508 seconds
0.003228448773333603
clustering has taken 374.829063 seconds
0.0028618136700466704
clustering has taken 398.185568 seconds
0.0023037005900180324
clustering has taken 421.551048 seconds
0.0019039518292941397
clustering has taken 444.900571 seconds
0.0017092284407220517
clustering has taken 468.245108 seconds
0.001491973420792625
clustering has taken 491.632530 seconds
0.0012874648726204146
clustering has taken 514.989035 seconds
0.0010793428967925517
clustering has taken

0.0023165066922025573
clustering has taken 397.731782 seconds
0.0018922843096733776
clustering has taken 421.005508 seconds
0.0015806216104242675
clustering has taken 444.276242 seconds
0.0013428967244825888
clustering has taken 467.544982 seconds
0.0011237208622864165
clustering has taken 490.857604 seconds
0.000939424101701123
clustering has taken 514.079469 seconds
clustering took 536.361847 seconds
40
40
40
