In [143]:
import pandas as pd
import numpy as np

from collections import Counter
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from sklearn.decomposition import PCA


In [144]:
exoplanet_data = pd.read_pickle('exoplanet_cluster_input')

In [168]:
ap = AffinityPropagation(preference=-0.95).fit(exoplanet_data)
label_counts = Counter(ap.labels_)
print(label_counts)

Counter({46: 64, 49: 45, 33: 44, 22: 38, 51: 37, 43: 35, 10: 27, 35: 25, 21: 24, 9: 23, 45: 20, 32: 20, 0: 18, 15: 16, 18: 14, 27: 14, 24: 12, 47: 12, 30: 11, 26: 10, 20: 10, 7: 10, 50: 9, 6: 9, 19: 9, 14: 8, 31: 8, 17: 8, 37: 8, 40: 8, 28: 7, 16: 6, 48: 6, 8: 5, 4: 5, 13: 5, 41: 5, 3: 4, 2: 4, 39: 4, 11: 4, 29: 4, 1: 3, 34: 3, 12: 3, 23: 3, 42: 3, 5: 2, 25: 1, 36: 1, 38: 1, 44: 1})


In [169]:
print(metrics.silhouette_score(exoplanet_data, ap.labels_, metric='euclidean'))
print(len(label_counts.keys()))

0.2513168120383756
52


In [170]:
pca = PCA(n_components=4)
X_r = pca.fit(exoplanet_data).transform(exoplanet_data)
ap2 = AffinityPropagation(preference=-0.95).fit(X_r)

In [171]:
metrics.silhouette_score(X_r, ap.labels_, metric='euclidean')

0.2638354524596665

In [172]:
ap2_counts = Counter(ap2.labels_)
print(len(ap2_counts.keys()))

50


In [173]:
print(sum(label_counts.values())/len(label_counts.values()))
print(sum(ap2_counts.values())/len(ap2_counts.values()))

13.0
13.52


In [174]:
ap.labels_[0:5]

array([14, 49, 14, 15, 33])

In [180]:
cluster_assign = pd.DataFrame(ap.labels_, columns= ['cluster'], index=exoplanet_data.index)


In [181]:
cluster_assign

Unnamed: 0,cluster
508,14
520,49
546,14
551,15
552,33
...,...
13979,46
13980,43
13981,51
13982,26


In [182]:
exoplanet_extract = pd.read_pickle('current_exo_ext')


In [183]:
exoplanet_extract.loc[508]

mpl_bmassj        0.00579
mpl_dens              5.6
mpl_discmethod    Transit
mpl_eqt               525
mpl_hostname       GJ 357
mpl_insol            12.6
mpl_letter              b
mpl_orbeccen          NaN
mpl_orbincl         89.12
mpl_orbper        3.93072
mpl_orbsmax         0.035
mpl_pnum                3
mpl_radj            0.109
mpl_status              3
mst_age               NaN
mst_lum            -1.798
mst_mass            0.342
mst_metratio       [Fe/H]
mst_rad              0.34
mst_teff             3505
Name: 508, dtype: object

In [184]:
exoplanet_data.loc[508]

Star Temp. Eff.      8.162516
Star Mass            0.851005
Star Size            0.850151
Planet Orb. Dist.    0.710496
Planet period        1.780146
Planet Mass          0.696038
Planet Size          0.746214
Planet Temp. Est.    6.267201
Planet Density       2.028148
Name: 508, dtype: float64

In [185]:
exoplanet_extract['cluster']=np.nan

exoplanet_extract.iloc[exoplanet_data.index]

Unnamed: 0,mpl_bmassj,mpl_dens,mpl_discmethod,mpl_eqt,mpl_hostname,mpl_insol,mpl_letter,mpl_orbeccen,mpl_orbincl,mpl_orbper,...,mpl_pnum,mpl_radj,mpl_status,mst_age,mst_lum,mst_mass,mst_metratio,mst_rad,mst_teff,cluster
508,0.00579,5.600,Transit,525.0,GJ 357,12.6,b,,89.120,3.930720,...,3,0.109,3,,-1.798,0.3420,[Fe/H],0.34,3505.0,
520,0.59500,0.975,Transit,1170.0,HAT-P-3,,b,,86.310,2.899738,...,1,0.911,3,2.900,,0.9250,[Fe/H],0.85,5190.0,
546,0.00692,4.400,Transit,433.0,LTT 1445 A,,b,0.190,89.400,5.358820,...,1,0.123,3,,,0.2560,[Fe/H],0.28,3337.0,
551,2.51000,1.680,Transit,1405.0,TOI-150,,b,0.262,88.090,5.857487,...,1,1.255,3,2.346,0.497,1.3510,[Fe/H],1.53,6255.0,
552,1.22000,0.490,Transit,1669.0,TOI-163,,b,0.000,87.240,4.231306,...,1,1.489,3,1.823,0.636,1.4352,[Fe/H],1.65,6495.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13979,0.37100,0.216,Transit,1457.0,Qatar-8,,b,0.000,89.290,3.714950,...,1,1.285,3,8.300,0.228,1.0290,[Fe/H],1.31,5738.0,
13980,1.19000,1.430,Transit,1134.0,Qatar-9,,b,0.000,89.230,1.540731,...,1,1.009,3,7.500,-0.821,0.7190,[Fe/H],0.70,4309.0,
13981,0.73600,0.248,Transit,1955.0,Qatar-10,,b,0.000,85.870,1.645321,...,1,1.543,3,3.200,0.300,1.1560,[Fe/H],1.25,6124.0,
13982,0.09439,0.340,Transit,628.0,TOI-216,25.9,b,0.132,88.364,17.089000,...,2,0.686,3,,,0.8740,[M/H],0.84,5026.0,


In [186]:
exoplanet_extract.loc[cluster_assign.index,'cluster']=cluster_assign['cluster']

In [187]:
exoplanet_extract.loc[exoplanet_data.index,'cluster']

508      14.0
520      49.0
546      14.0
551      15.0
552      33.0
         ... 
13979    46.0
13980    43.0
13981    51.0
13982    26.0
13983    50.0
Name: cluster, Length: 676, dtype: float64

In [188]:
exoplanet_extract[exoplanet_extract['cluster']==14.0]

Unnamed: 0,mpl_bmassj,mpl_dens,mpl_discmethod,mpl_eqt,mpl_hostname,mpl_insol,mpl_letter,mpl_orbeccen,mpl_orbincl,mpl_orbper,...,mpl_pnum,mpl_radj,mpl_status,mst_age,mst_lum,mst_mass,mst_metratio,mst_rad,mst_teff,cluster
508,0.00579,5.6,Transit,525.0,GJ 357,12.6,b,,89.12,3.93072,...,3,0.109,3,,-1.798,0.342,[Fe/H],0.34,3505.0,14.0
546,0.00692,4.4,Transit,433.0,LTT 1445 A,,b,0.19,89.4,5.35882,...,1,0.123,3,,,0.256,[Fe/H],0.28,3337.0,14.0
914,0.00761,5.4,Transit,517.0,L 98-59,,c,0.07,,3.6904,...,3,0.12,3,,,0.312,[Fe/H],0.31,3412.0,14.0
946,0.00727,3.3,Transit,409.0,L 98-59,,d,0.09,,7.4512,...,3,0.14,3,,,0.312,[Fe/H],0.31,3412.0,14.0
2037,0.00569,4.7,Transit,438.0,LHS 1140,6.16,c,0.31,89.92,3.777931,...,2,0.114,3,5.0,-2.356,0.179,[Fe/H],0.21,3216.0,14.0
3932,0.00267,3.6,Transit,400.0,TRAPPIST-1,4.25,b,0.081,89.65,1.510871,...,7,0.097,3,,-3.281,0.0802,[Fe/H],0.12,2559.0,14.0
3933,0.00434,6.45,Transit,342.0,TRAPPIST-1,2.27,c,0.083,89.67,2.421823,...,7,0.094,3,,-3.281,0.0802,[Fe/H],0.12,2559.0,14.0
4022,0.00129,4.9,Transit,288.0,TRAPPIST-1,1.143,d,0.07,89.75,4.04961,...,7,0.069,3,,-3.281,0.0802,[Fe/H],0.12,2559.0,14.0


In [189]:
exoplanet_extract[exoplanet_extract['cluster']==49.0]

Unnamed: 0,mpl_bmassj,mpl_dens,mpl_discmethod,mpl_eqt,mpl_hostname,mpl_insol,mpl_letter,mpl_orbeccen,mpl_orbincl,mpl_orbper,...,mpl_pnum,mpl_radj,mpl_status,mst_age,mst_lum,mst_mass,mst_metratio,mst_rad,mst_teff,cluster
520,0.595,0.975,Transit,1170.0,HAT-P-3,,b,,86.31,2.899738,...,1,0.911,3,2.9,,0.925,[Fe/H],0.85,5190.0,49.0
585,0.93,0.89,Transit,1345.0,NGTS-8,503.0,b,0.01,86.9,2.4997,...,1,1.09,3,12.48,,0.89,[Fe/H],0.98,5241.0,49.0
808,0.931,0.914,Transit,1311.0,WASP-2,492.0,b,,84.49,2.152175,...,1,1.081,3,,-0.313,0.895,[Fe/H],0.87,5180.0,49.0
809,0.87,0.811,Transit,1337.0,WASP-44,533.0,b,,86.21,2.423804,...,1,1.1,3,,-0.198,0.935,[Fe/H],0.91,5420.0,49.0
810,0.964,1.418,Transit,1153.0,WASP-45,294.0,b,,85.02,3.126089,...,1,0.946,3,,-0.336,0.85,[Fe/H],0.85,5150.0,49.0
822,1.166,1.031,Radial Velocity,1209.0,HD 189733,356.0,b,,85.69,2.218577,...,1,1.119,3,,-0.465,0.812,[Fe/H],0.77,5050.0,49.0
2272,0.577,0.46,Transit,1296.0,HATS-69,466.6,b,0.519,88.49,2.225258,...,1,0.945,3,8.0,-0.318,0.892,[Fe/H],0.88,5137.0,49.0
2356,0.44,0.95,Transit,1260.0,WASP-144,,b,0.0,86.9,2.278315,...,1,0.85,3,8.71,,0.81,,0.81,5200.0,49.0
2357,0.89,1.6,Transit,1200.0,WASP-145 A,,b,0.0,83.3,1.769038,...,1,0.9,3,6.99,,0.76,,0.68,4900.0,49.0
3565,0.922,0.764,Transit,1171.0,WASP-98,,b,,86.38,2.96264,...,1,1.144,3,2.7,,0.809,[Fe/H],0.74,5473.0,49.0


In [191]:
exoplanet_extract[exoplanet_extract['cluster']==47.0]

Unnamed: 0,mpl_bmassj,mpl_dens,mpl_discmethod,mpl_eqt,mpl_hostname,mpl_insol,mpl_letter,mpl_orbeccen,mpl_orbincl,mpl_orbper,...,mpl_pnum,mpl_radj,mpl_status,mst_age,mst_lum,mst_mass,mst_metratio,mst_rad,mst_teff,cluster
659,0.86,0.131,Transit,2470.0,WASP-78,,b,0.0,81.3,2.175173,...,1,2.06,3,2.8,,1.39,[Fe/H],2.35,6100.0,47.0
889,2.093,0.689,Transit,2782.0,WASP-33,,b,,86.63,1.21987,...,1,1.593,3,,,1.495,,1.44,7430.0,47.0
891,1.465,0.267,Transit,2593.0,WASP-12,,b,,83.52,1.09142,...,1,1.937,3,,,1.434,,1.66,6360.0,47.0
1200,1.183,0.243,Transit,2358.0,WASP-121,,b,0.0,87.6,1.274925,...,1,1.865,3,1.5,0.519,1.353,[Fe/H],1.46,6459.0,47.0
2270,1.45,0.374,Transit,2193.0,HATS-67,3821.0,b,0.057,79.03,1.609179,...,1,1.685,3,0.51,0.547,1.435,[Fe/H],1.44,6594.0,47.0
2804,1.47,0.266,Transit,2580.0,WASP-12,,b,0.0,83.37,1.09142,...,1,1.9,3,,0.607,1.434,[Fe/H],1.66,6360.0,47.0
3117,2.88,0.53,Transit,4050.0,KELT-9,44900.0,b,0.0,86.79,1.481124,...,1,1.891,3,0.3,1.724,2.52,[Fe/H],2.36,10170.0,47.0
3813,1.069,0.492,Transit,2520.0,WASP-19,,b,0.002,78.78,0.788839,...,1,1.392,3,,,0.904,[Fe/H],1.0,5568.0,47.0
9804,0.89,0.24,Transit,2350.0,WASP-78,,b,0.0,83.2,2.175176,...,1,1.7,3,,,1.33,,2.2,6100.0,47.0
10410,1.49,0.55,Transit,2508.0,WASP-103,,b,,86.3,0.925542,...,1,1.528,3,4.0,0.413,1.22,[Fe/H],1.44,6110.0,47.0


In [142]:
exoplanet_extract.to_pickle('clustered_exoplanets')