In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from alg_functions import *

In [2]:
%pip install astroML

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


# Obtain the Data

In [10]:
red_dwarfs = pd.read_csv('datasets/red_dwarfs.csv')
white_dwarfs = pd.read_csv('datasets/white_dwarfs.csv')

red_dwarfs['type'] = 'red'
white_dwarfs['type'] = 'white'

red_dwarfs = red_dwarfs[red_dwarfs['subclass'].str.startswith('K') | red_dwarfs['subclass'].str.startswith('M')]
white_dwarfs = white_dwarfs[white_dwarfs['subclass'] == 'WD']

red_dwarfs = red_dwarfs.iloc[:2000]
white_dwarfs = white_dwarfs.iloc[:2000]

dwarfs = pd.concat([red_dwarfs, white_dwarfs], ignore_index=True)

dwarfs = dwarfs.dropna()

display(dwarfs.head())

Unnamed: 0,spectroFlux_u,spectroFlux_g,spectroFlux_r,spectroFlux_i,spectroFlux_z,class,subclass,type
0,0.690362,2.639864,6.609772,9.040903,10.89239,STAR,K1,red
1,1.696024,5.127242,11.38034,15.28857,18.13893,STAR,K1,red
2,1.93937,6.556342,14.39116,19.00653,22.24492,STAR,K1,red
3,2.038488,9.414086,19.35224,25.07453,29.21216,STAR,K1,red
4,1.535014,5.44757,11.53837,15.07731,20.65042,STAR,K1,red


In [22]:
dwarfs['u-g'] = dwarfs['spectroFlux_u'] - dwarfs['spectroFlux_g']
dwarfs['g-r'] = dwarfs['spectroFlux_g'] - dwarfs['spectroFlux_r']
dwarfs['r-i'] = dwarfs['spectroFlux_r'] - dwarfs['spectroFlux_i']
dwarfs['i-z'] = dwarfs['spectroFlux_i'] - dwarfs['spectroFlux_z']
dwarfs['u-r'] = dwarfs['spectroFlux_u'] - dwarfs['spectroFlux_r']
dwarfs['r-z'] = dwarfs['spectroFlux_r'] - dwarfs['spectroFlux_z']
dwarfs['u-z'] = dwarfs['spectroFlux_u'] - dwarfs['spectroFlux_z']

pred_columns_dwarfs = ['u-g', 'g-r', 'r-i', 'i-z']

display(dwarfs.head())

Unnamed: 0,spectroFlux_u,spectroFlux_g,spectroFlux_r,spectroFlux_i,spectroFlux_z,class,subclass,type,u-g,g-r,r-i,i-z,u-r,r-z,u-z
0,0.690362,2.639864,6.609772,9.040903,10.89239,STAR,K1,red,-1.949502,-3.969908,-2.431131,-1.851487,-5.91941,-4.282618,-10.202028
1,1.696024,5.127242,11.38034,15.28857,18.13893,STAR,K1,red,-3.431218,-6.253098,-3.90823,-2.85036,-9.684316,-6.75859,-16.442906
2,1.93937,6.556342,14.39116,19.00653,22.24492,STAR,K1,red,-4.616972,-7.834818,-4.61537,-3.23839,-12.45179,-7.85376,-20.30555
3,2.038488,9.414086,19.35224,25.07453,29.21216,STAR,K1,red,-7.375598,-9.938154,-5.72229,-4.13763,-17.313752,-9.85992,-27.173672
4,1.535014,5.44757,11.53837,15.07731,20.65042,STAR,K1,red,-3.912556,-6.0908,-3.53894,-5.57311,-10.003356,-9.11205,-19.115406


# Algorithm 1

In [23]:
A = dwarfs[dwarfs['type'] == 'white'][['u-g', 'g-r', 'r-i', 'i-z']].values
for epsilon in [0.01, 0.001, 0.0001]:
    print('epsilon: ', epsilon)
    c, r, k, chi, timer = frank_wolfe_alg_MEB(epsilon, A)
    print('---------------')

epsilon:  0.01
Center: [140.52804351 213.88566069  85.86509275  37.62491031]
Radius: 325.05716271240453
Iterations: 5
CPU time: 0.09771966934204102
Core Set Size: 4
---------------
epsilon:  0.001
Center: [140.72559632 216.15189978  87.89006649  39.14004694]
Radius: 322.55120057514233
Iterations: 11
CPU time: 0.22409629821777344
Core Set Size: 4
---------------
epsilon:  0.0001
Center: [140.74162857 216.37800544  88.09349715  39.2924501 ]
Radius: 322.3039075882181
Iterations: 17
CPU time: 0.36678552627563477
Core Set Size: 4
---------------


In [24]:
# Calculate the Euclidean distance from each point to the center
distances = np.linalg.norm(dwarfs[pred_columns_dwarfs] - c, axis=1)

# Add a new boolean column indicating whether the point is inside the ball
dwarfs['InsideBall'] = distances < r

measure_accuracy('red', 'white', dwarfs)

Unnamed: 0,Metric,Count
0,True Positive (TP),1999.0
1,False Negative (FN),1.0
2,True Negative (TN),426.0
3,False Positive (FP),1536.0
4,Precision,0.565488
5,Recall,0.9995
6,F1 Score,0.722313
