In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from astropy.coordinates import SkyCoord
from astropy import units as u
from time import time

from matplotlib import rc
from matplotlib.ticker import AutoMinorLocator


def font_stile(a):
    rc('font',**{'family':'serif','serif':[a]})
    rc('text', usetex=True)
    return True

def crossmatch_tree(coords1, coords2, radius):
    start_time = time()
    max_radius = radius
    matches = []
    no_matches = []
    
    
    coords1_sc = SkyCoord(coords1*u.degree, frame='icrs')
    coords2_sc = SkyCoord(coords2*u.degree, frame='icrs')
    

    closest_ids, closest_dists, _ = coords1_sc.match_to_catalog_sky(coords2_sc)
    
    for id1, (closest_id2, dist) in enumerate(zip(closest_ids, closest_dists)):
        closest_dist = dist.value
        
        if closest_dist > max_radius:
            no_matches.append(id1)
        else:
            matches.append([id1, closest_id2, closest_dist])
    
    time_taken = time() - start_time
    return matches, no_matches, time_taken


In [2]:
plt.rcParams.update({'font.size': 11})
activated = font_stile('Arial')

In [3]:
full_data_1 = pd.read_csv("/home/esteban/Desktop/ml_gaia/smaller_data_cutting_mean_magintude.csv")

In [4]:
full_data_2 = pd.read_csv("/home/esteban/Desktop/ml_gaia/south_pole_data.csv")

In [5]:
full_data = pd.concat([full_data_1], ignore_index=True)

In [6]:
gaia_alert_data = pd.read_csv("/home/esteban/Desktop/ml_gaia/alerts.csv")

In [7]:
gaia_alert_data[" Class"].value_counts(dropna=False)

unknown           13683
SN Ia              1978
QSO                 939
CV                  646
SN II               635
YSO                 209
AGN                 142
SN IIn              122
SN IIP               97
BL Lac               90
SN Ic                60
Varstar              58
SN Ib                54
ULENS                42
SN IIb               41
SLSN                 40
Nova                 35
star                 28
SN Ic-BL             24
SN Ib/c              23
SN Ia-pec            17
SN I                 15
TDE                  15
dM                   15
SN                   13
SN Ibn               12
RCrB                 11
XRB                  11
symbiotic star        7
SN Ia-CSM             6
SN Iax                5
Other                 4
galaxy                4
CCSN                  4
LBV                   4
SN IIL                3
SSO                   2
SN II?                1
SN IIn-pec            1
SN I-pec              1
dK                    1
ILRT            

In [8]:
full_data_top = full_data.head()

In [9]:
full_data_alert_top = gaia_alert_data.head()

In [10]:
full_data_top

Unnamed: 0,source_id,random_index,l,b,ra,dec,phot_g_mean_mag,bp_g,g_rp
0,760561364351045120,1503350018,181.000067,70.000004,171.800075,36.120229,18.281464,0.662739,0.735563
1,1539023155958580352,1046671611,141.525303,70.000006,183.859042,45.806575,17.931921,0.535776,0.667856
2,3972369622338023936,1793755031,244.446942,70.000014,175.23577,15.626861,17.907908,1.219429,1.099232
3,3728686337743239680,90670554,353.233926,70.000022,208.56398,13.535987,19.350208,0.397049,0.454447
4,767392698813975424,1737809286,169.993721,70.000022,173.929446,39.482336,19.130243,0.478941,0.664692


In [11]:
full_data_alert_top

Unnamed: 0,#Name,Date,RaDeg,DecDeg,AlertMag,HistoricMag,HistoricStdDev,Class,Published,Comment,TNSid
0,Gaia22apm,2022-02-13 05:17:54,200.58364,-6.10488,17.03,18.39,0.45,BL Lac,2022-02-14 08:16:24,known blazar brightens by ~1 mag,AT2022clw
1,Gaia22apl,2022-02-13 07:13:16,73.6391,-27.72316,18.76,,,unknown,2022-02-14 08:16:20,candidate SN near galaxy pair APMUKS(BJ) B0452...,AT2022cli
2,Gaia22apk,2022-02-12 17:20:01,197.86097,-6.4397,18.85,19.31,0.13,unknown,2022-02-14 08:16:14,"Gaia, WISE and UV source brightens by 0.4 mag ...",AT2022clv
3,Gaia22apj,2022-02-13 03:21:52,306.62464,36.94113,18.26,18.52,0.06,unknown,2022-02-14 08:16:04,red Gaia source brightens by ~0.35 mag,AT2022clu
4,Gaia22api,2022-02-12 03:11:34,318.20285,37.09541,18.33,19.11,0.04,unknown,2022-02-13 11:40:34,Flare on Gaia source,AT2022cey


In [12]:
full_data.shape

(1982818, 9)

In [13]:
gaia_alert_data.shape

(19102, 11)

In [14]:
s_array1 = full_data[["ra", "dec"]].to_numpy()

In [15]:
print(np.shape(s_array1))

(1982818, 2)


In [16]:
s_array2 = gaia_alert_data[[" RaDeg", " DecDeg"]].to_numpy()

In [17]:
print(np.shape(s_array2))

(19102, 2)


In [None]:
matches, no_matches, time = crossmatch_tree(s_array1, s_array2,0.0006) #0.1 0.02 0.0006 is the gaia resolution

In [None]:
print(np.shape(matches))

In [None]:
print(np.array(matches)[:,0]) # indices for all the cross-matched sorces in the full data cuts

In [None]:
matches_inside_full= np.array(matches)[:,0]

In [None]:
matches_inside_alerts= np.array(matches)[:,1]

In [None]:
reduced = full_data.iloc[matches_inside_full]

In [None]:
reduced_inside_alerts = gaia_alert_data.iloc[matches_inside_alerts]

In [None]:
reduced_inside_alerts[" Class"].value_counts(dropna=False)

In [None]:
#training_data_aux= reduced_inside_alerts.loc[reduced_inside_alerts[" Class"].isin(["SN Ia","QSO","SN II","AGN"])] 

In [None]:
#aux=training_data_aux[[" Class"]].replace({"QSO": 0, "SN Ia": 1,"SN II":2, "AGN":3 })
aux1 = reduced_inside_alerts[[" Class"]]
aux2 = reduced[["bp_g","g_rp","phot_g_mean_mag"]]

aux2.shape


In [None]:
aux1.reset_index(drop=True, inplace=True) # to avoid conflicting indices
aux2.reset_index(drop=True, inplace=True)# to avoid conflicting indices
test=pd.concat([aux2, aux1], axis=1)
test

In [None]:
training_data= test.loc[test[" Class"].isin(["SN Ia","QSO","SN II","AGN"])] 
test_data = test.loc[test[" Class"].isin(["unknown"])] 



In [None]:
training_data[" Class"].replace({"SN Ia":0 ,"QSO":1,"SN II":2,"AGN":3}, inplace=True) # dictionary for the classfier

In [None]:
training_data


In [None]:
training_save = training_data[["bp_g","g_rp","phot_g_mean_mag"]].fillna(0)
results_training = training_data[[" Class"]].fillna(0)

In [None]:
training_save.to_csv('training.csv', index=False)

In [None]:
results_training.to_csv('classified.csv', index = False)

In [None]:
training_save = training_data[["bp_g","g_rp","phot_g_mean_mag"]]
results_training = training_data[[" Class"]]

In [None]:
test_save = test_data[["bp_g","g_rp","phot_g_mean_mag"]]
results_test = test_data[[" Class"]]

In [None]:
test_save.to_csv('test.csv', index=False)

In [None]:
#onlysn=reduced_inside_alerts.loc[reduced_inside_alerts[" Class"].isin(["SN Ia"])] 
#onlysn[" Class"].value_counts(dropna=False)

In [None]:
#onlyqso=reduced_inside_alerts.loc[reduced_inside_alerts[" Class"].isin(["QSO"])] 
#onlyqso[" Class"].value_counts(dropna=False)

In [None]:
#g_rp_full = full_data[["g_rp"]].to_numpy()
#bp_g_full = full_data[["bp_g"]].to_numpy()


In [None]:
#g_rp = reduced[["g_rp"]].to_numpy()
#bp_g = reduced[["bp_g"]].to_numpy()
    

In [None]:
#onlysn2=reduced_inside_alerts.loc[reduced_inside_alerts[" Class"].isin(["SN II"])] 
#onlysn2[" Class"].value_counts(dropna=False)

In [None]:
#newframe=reduced_inside_alerts[" Class"].replace({"QSO": 0, "SN Ia": 1,"SN II":2, "AGN":3 })#.value_counts(dropna=False)