In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np

In [2]:
truth_train = np.loadtxt('../../data/truth_train.dat')
truth_test = np.loadtxt('../../data/truth_test.dat')

In [3]:
# load TPC results
tpc_cv = np.loadtxt('../../data/clens_tpc_oob.mlz', unpack=True, usecols=(2,))
tpc_test = np.loadtxt('../../data/clens_tpc_test.mlz', unpack=True, usecols=(2,))

In [4]:
# load SOMc results
som_cv = np.loadtxt('../../data/clens_som_oob.mlz', unpack=True, usecols=(2,))
som_test = np.loadtxt('../../data/clens_som_test.mlz', unpack=True, usecols=(2,))

In [5]:
# load HBC results
hbc_all = np.loadtxt('../../data/clens_all.hbc', unpack=True, usecols=(0,))
hbc_cv = hbc_all[:len(truth_train)]
hbc_test = hbc_all[len(truth_train):]

In [6]:
wa_test = np.loadtxt('../../data/clens_test.wa')
bom_test = np.loadtxt('../../data/clens_test.bom')
stack_test = np.loadtxt('../../data/clens_test.stack')
bmc_test = np.loadtxt('../../data/clens_test.bmc')

In [7]:
# read in FLUX_RADIUS and MAG_i and make a classification
def morph_class(magnitude, half_radius, cut=[0, 25, 1.0, 3.0]):
    point_source = ((magnitude > cut[0]) & (magnitude < cut[1]) &
                    (half_radius > cut[2]) & (half_radius < cut[3]))
    return point_source.astype(np.int)

mag_i_lower = 17
mag_i_upper = 21.0
r_h_lower = 1.4
r_h_upper = 2.8

r_h_cv = np.loadtxt('../../data/flux_radius.train.dat')
mag_i_cv = np.loadtxt('../../data/mag_i.train.dat')
morph_cv = morph_class(mag_i_cv, r_h_cv, cut=[mag_i_lower, mag_i_upper, r_h_lower, r_h_upper])
r_h_test = np.loadtxt('../../data/flux_radius.test.dat')
mag_i_test = np.loadtxt('../../data/mag_i.test.dat')
morph_test = morph_class(mag_i_test, r_h_test, cut=[mag_i_lower, mag_i_upper, r_h_lower, r_h_upper])

In [8]:
# true galaxies classified as stars
morph_gs = ((morph_test == 1) & (truth_test == 0)).sum()
# true galaxies classified as galaxies
morph_gg = ((morph_test == 0) & (truth_test == 0)).sum()
# true stars classified as galaxies
morph_sg = ((morph_test == 0) & (truth_test == 1)).sum()
# true stars classified as stars
morph_ss = ((morph_test == 1) & (truth_test == 1)).sum()

# galaxy completeness
morph_g_comp = morph_gg / (morph_gg + morph_gs)
# galaxy purity
morph_g_pur = morph_gg / (morph_gg + morph_sg)
# star completeness
morph_s_comp = morph_ss / (morph_ss + morph_sg)
# star purity
morph_s_pur = morph_ss / (morph_ss + morph_gs)

In [9]:
all_test = {'TPC': tpc_test,
            'SOM': som_test,
            'HB': hbc_test,
            'Morphology': morph_test,
            'WA': wa_test,
            'BoM': bom_test,
            'Stacking': stack_test,
            'BMC': bmc_test}

from sklearn.metrics import roc_auc_score

auc = {}

for k in all_test.keys():
    auc[k] = "%.4f" % roc_auc_score(truth_test, all_test[k])
    
max_auc = max(auc, key=auc.get)

auc[max_auc] = r"\textbf{%s}" % auc[max_auc]

print(auc)

{u'BMC': u'0.9852', u'TPC': u'\\textbf{0.9870}', u'WA': u'0.9806', u'Stacking': u'0.9842', u'SOM': u'0.9683', u'HB': u'0.9403', u'BoM': u'0.9870', u'Morphology': u'0.8555'}


In [10]:
from sklearn.metrics import mean_squared_error

mse = {}

for k in all_test.keys():
    mse[k] = "%.4f" % mean_squared_error(truth_test, all_test[k])
    
min_mse = min(mse, key=mse.get)

mse[min_mse] = r"\textbf{%s}" % mse[min_mse]

print(mse)

{u'BMC': u'\\textbf{0.0174}', u'TPC': u'0.0208', u'WA': u'0.0266', u'Stacking': u'0.0194', u'SOM': u'0.0452', u'HB': u'0.0705', u'BoM': u'0.0208', u'Morphology': u'0.0397'}


In [11]:
def calc_completeness_purity(truth, classif, mag, p_cut=0.5, bins=np.arange(16, 26, 0.5)):
    '''
    '''
    # true galaxies classified as stars
    gs_bin, _ = np.histogram(mag[(classif > p_cut) & (truth == 0)], bins=bins)
    # true galaxies classified as galaxies
    gg_bin, _ = np.histogram(mag[(classif < p_cut) & (truth == 0)], bins=bins)
    # true stars classified as galaxies
    sg_bin, _ = np.histogram(mag[(classif < p_cut) & (truth == 1)], bins=bins)
    # true stars classified as stars
    ss_bin, _ = np.histogram(mag[(classif > p_cut) & (truth == 1)], bins=bins)

    # galaxy completeness
    g_comp_bin = gg_bin / (gg_bin + gs_bin)
    g_comp_bin[~np.isfinite(g_comp_bin)] = 1
    # galaxy purity
    g_pur_bin = gg_bin / (gg_bin + sg_bin)
    g_pur_bin[~np.isfinite(g_pur_bin)] = 1
    # star completeness
    s_comp_bin = ss_bin / (ss_bin + sg_bin)
    s_comp_bin[~np.isfinite(s_comp_bin)] = 1
    # star purity
    s_pur_bin = ss_bin / (ss_bin + gs_bin)
    s_pur_bin[~np.isfinite(s_pur_bin)] = 1
    
    return g_comp_bin, g_pur_bin, s_comp_bin, s_pur_bin

    
def find_purity_at(truth_test, clf, step=0.001, gc=None, sc=None):
    
    if bool(gc) == bool(sc):
        raise Exception('Specify only one of gp or sp parameter.')

    pbin = np.arange(0, 1, step)
    
    pure_all = np.zeros(len(pbin))
    comp_all = np.zeros(len(pbin))
    
    for i, p in enumerate(pbin):
        
        # true galaxies classified as stars
        gs = ((clf >= p) & (truth_test == 0)).sum()
        # true galaxies classified as galaxies
        gg = ((clf < p) & (truth_test == 0)).sum()
        # true stars classified as galaxies
        sg = ((clf < p) & (truth_test == 1)).sum()
        # true stars classified as stars
        ss = ((clf >= p) & (truth_test == 1)).sum()
    
        if gc is not None:
            if gg == 0 and gg + sg == 0:
                pure_all[i] = 1
            else:
                pure_all[i] = gg / (gg + sg)
            if gg == 0 and gg + gs == 0:
                comp_all[i] = 1
            else:
                comp_all[i] = gg / (gg + gs)
            
        if sc is not None:
            if ss == 0 and ss + sg == 0:
                comp_all[i] = 1
            else:
                comp_all[i] = ss / (ss + sg)
            if ss == 0 and ss + gs == 0:
                pure_all[i] = 1
            else:
                pure_all[i] = ss / (ss + gs)
    
    if gc is not None:
        ibin = np.argmin(np.abs(comp_all - gc))
        return pbin[ibin], pure_all[ibin]
    
    if sc is not None:
        ibin = np.argmin(np.abs(comp_all - sc))
        return pbin[ibin], pure_all[ibin]

In [12]:
g_pur1 = {}

for k in all_test.keys():
    i, j = find_purity_at(truth_test, all_test[k], gc=morph_g_comp, step=0.0001)
    g_pur1[k] = "%.4f" % j
    
max_g_pur1 = max(g_pur1, key=g_pur1.get)
g_pur1[max_g_pur1] = r"\textbf{%s}" % g_pur1[max_g_pur1]
print(g_pur1)

{u'BMC': u'\\textbf{0.9800}', u'TPC': u'0.9714', u'WA': u'0.9755', u'Stacking': u'0.9752', u'SOM': u'0.9125', u'HB': u'0.9219', u'BoM': u'0.9714', u'Morphology': u'0.9597'}


In [13]:
s_pur1 = {}

for k in all_test.keys():
    i, j = find_purity_at(truth_test, all_test[k], sc=morph_s_comp, step=0.0001)
    s_pur1[k] = "%.4f" % j
    
max_s_pur1 = max(s_pur1, key=s_pur1.get)
s_pur1[max_s_pur1] = r"$\textbf{%s}$" % s_pur1[max_s_pur1]
print(s_pur1)

{u'BMC': u'$\\textbf{0.9959}$', u'TPC': u'0.9838', u'WA': u'0.9926', u'Stacking': u'0.9902', u'SOM': u'0.8454', u'HB': u'0.7017', u'BoM': u'0.9838', u'Morphology': u'0.9666'}


In [14]:
g_pur2 = {}

for k in all_test.keys():
    i, j = find_purity_at(truth_test, all_test[k], gc=0.96, step=0.0001)
    g_pur2[k] = "%.4f" % j
    
max_g_pur2 = max(g_pur2, key=g_pur2.get)
g_pur2[max_g_pur2] = r"$\textbf{%s}$" % g_pur2[max_g_pur2]
print(g_pur2)

{u'BMC': u'$\\textbf{0.9924}$', u'TPC': u'0.9918', u'WA': u'0.9872', u'Stacking': u'0.9918', u'SOM': u'0.9788', u'HB': u'0.9471', u'BoM': u'0.9918', u'Morphology': u'0.9597'}


In [15]:
s_pur2 = {}
for k in all_test.keys():
    i, j = find_purity_at(truth_test, all_test[k], sc=0.25, step=0.0001)
    s_pur2[k] = "%.4f" % j
    
#max_s_pur2 = max(s_pur2, key=s_pur2.get)
#s_pur2[max_s_pur2] = r"\mathbf{%s}" % s_pur2[max_s_pur2]
#print(s_pur2)

In [16]:
output = r"""\begin{tabular}{l c c c c c c}
  Classifier & AUC & MSE &
  $p_{g}\left(c_g=%.4f\right)$ & $p_{s}\left(c_s=%.4f\right)$ &
  $p_{g}\left(c_g=0.9600\right)$ & $p_{s}\left(c_s=0.2500\right)$ \\
  \hline
  TPC        & %s & %s & %s & %s & %s & %s \\
  SOMc       & %s & %s & %s & %s & %s & %s \\
  HB         & %s & %s & %s & %s & %s & %s \\
  Morphology & %s & %s & %s & %s & %s & %s \\
  WA         & %s & %s & %s & %s & %s & %s \\
  BoM        & %s & %s & %s & %s & %s & %s \\
  Stacking   & %s & %s & %s & %s & %s & %s \\
  BMC        & %s & %s & %s & %s & %s & %s \\
\end{tabular}""" % (morph_g_comp, morph_s_comp,
                    auc['TPC'], mse['TPC'], g_pur1['TPC'], s_pur1['TPC'], g_pur2['TPC'], s_pur2['TPC'],
                    auc['SOM'], mse['SOM'], g_pur1['SOM'], s_pur1['SOM'], g_pur2['SOM'], s_pur2['SOM'],
                    auc['HB'], mse['HB'],  g_pur1['HB'],  s_pur1['HB'],  g_pur2['HB'], s_pur2['HB'],
                    '-', mse['Morphology'], g_pur1['Morphology'], s_pur1['Morphology'], '-', '-',
                    auc['WA'], mse['WA'], g_pur1['WA'], s_pur1['WA'], g_pur2['WA'], s_pur2['WA'],
                    auc['BoM'], mse['BoM'], g_pur1['BoM'], s_pur1['BoM'], g_pur2['BoM'], s_pur2['BoM'],
                    auc['Stacking'], mse['Stacking'], g_pur1['Stacking'], s_pur1['Stacking'], g_pur2['Stacking'], s_pur2['Stacking'],
                    auc['BMC'], mse['BMC'], g_pur1['BMC'], s_pur1['BMC'], g_pur2['BMC'], s_pur2['BMC']
                    )

print(output)

\begin{tabular}{l c c c c c c}
  Classifier & AUC & MSE &
  $p_{g}\left(c_g=0.9964\right)$ & $p_{s}\left(c_s=0.7145\right)$ &
  $p_{g}\left(c_g=0.9600\right)$ & $p_{s}\left(c_s=0.2500\right)$ \\
  \hline
  TPC        & \textbf{0.9870} & 0.0208 & 0.9714 & 0.9838 & 0.9918 & 0.9977 \\
  SOMc       & 0.9683 & 0.0452 & 0.9125 & 0.8454 & 0.9788 & 0.9551 \\
  HB         & 0.9403 & 0.0705 & 0.9219 & 0.7017 & 0.9471 & 0.6963 \\
  Morphology & - & 0.0397 & 0.9597 & 0.9666 & - & - \\
  WA         & 0.9806 & 0.0266 & 0.9755 & 0.9926 & 0.9872 & 0.9977 \\
  BoM        & 0.9870 & 0.0208 & 0.9714 & 0.9838 & 0.9918 & 0.9977 \\
  Stacking   & 0.9842 & 0.0194 & 0.9752 & 0.9902 & 0.9918 & 1.0000 \\
  BMC        & 0.9852 & \textbf{0.0174} & \textbf{0.9800} & $\textbf{0.9959}$ & $\textbf{0.9924}$ & 1.0000 \\
\end{tabular}


In [17]:
with open('../../tables/metrics_all.tex', 'w') as f:
    f.write(output)