In [1]:
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import os

# Construct meaningful classes

In [2]:
# construct big classification
big_class = {}
big_class['multiple_object'] = ['EB*', 'EllipVar', 'Symbiotic*','SB*','DQHer', 'Nova-like','EB*betLyr','AMHer','Nova','EB*Algol','EB*WUMa','CataclyV*',
                                  'DwarfNova','EB*']
big_class['star'] = ['V*?', 'Pulsar', 'PulsV*bCep','low-mass*','post-AGB*', 'Pec*','pMS*','HotSubdwarf','PM*','*inNeb','pulsV*SX','RGB*','HB*','BYDra',
                       'PulsV*RVTau', 'BlueSG*','Irregular_V*','WD*','Ae*','RedSG*','AGB*','OH/IR','Be*','Cepheid','PulsV*delSct','RotV*','PulsV*',
                       'PulsV*WVir','S*','RSCVn','deltaCep','TTau*','Em*','Orion_V*','YSO','V*','C*','Mira','LPV*','Star','RRLyr']
big_class['galaxy'] = ['LINER','Blazar','AGN','BLLac','QSO']
big_class['SN'] = ['SNIb-pec', 'SNIb/c','SNII-pec','SN', 'SNIbn','SNIc-BL','SNI','SNIb','SNIIb','SLSN-II','SNIIP','SLSN-I','SNIc','SNIIn','SNII']
big_class['other_TNS'] = ['Mdwarf', 'LBV','TDE','Other','CV','Varstar']


# reverse it
class_dict = {}
for key in big_class.keys():
    for item in big_class[key]:
        class_dict[item] = key
        
class_dict['Ia'] = 'Ia'

# Check population in original sample

In [3]:
dir_name = '../../data/AL_data/'

fname = 'data/features_42876.csv'
features = pd.read_csv(fname, index_col=False)
features['big_class'] = None

for i in range(features.shape[0]):
    features.loc[i, 'big_class'] = class_dict[features.iloc[i]['type']]

types, freq = np.unique(features['big_class'].values, return_counts=True)
indx = np.argsort(freq)
            
pop = pd.DataFrame()
pop['type'] = types
pop['n'] = freq
pop['perc'] = 100 * np.array(freq)/features.shape[0]

In [4]:
for i in range(pop.shape[0]):
    print(pop.iloc[indx[i]]['type'], '   ---   ', pop.iloc[indx[i]]['n'], '  ---   ', pop.iloc[indx[i]]['perc'])

other_TNS    ---    174   ---    0.4058214385670305
SN    ---    762   ---    1.7772180240694095
Ia    ---    1600   ---    3.7316913891221195
galaxy    ---    4254   ---    9.921634480828436
multiple_object    ---    8636   ---    20.14180427278664
star    ---    27450   ---    64.02183039462636


# Read queried sample for UncSampling

In [5]:
dirname = 'results_42876/UncSampling/queries/'

flist = os.listdir(dirname)

tot = 0

queries = {}

for i in range(len(flist)):
    data = pd.read_csv(dirname + flist[i], delim_whitespace=True, skiprows=1, names = ['mute'] + list(features.keys()), index_col=False)
    data['big_class'] = None
    
    for j in range(data.shape[0]):
        data.loc[j, 'big_class'] = class_dict[data.iloc[j]['type']]
    
    types, freq = np.unique(data['big_class'].values, return_counts=True)
    
    tot = tot + data.shape[0]
    
    for j in range(len(types)):
        if types[j] in queries.keys():
            queries[types[j]] = queries[types[j]] + freq[j]
        else:
            queries[types[j]] = freq[j]

In [6]:
tot

6000

In [7]:
indx = np.argsort(list(queries.values()))

for i in range(len(indx)):
    print(list(queries.keys())[indx[i]], '   ---   ', list(queries.values())[indx[i]]/100)

other_TNS    ---    0.33
multiple_object    ---    5.13
SN    ---    6.57
galaxy    ---    10.25
star    ---    15.97
Ia    ---    21.75


# Read queried sample for Random Sampling

In [8]:
dirname2 = 'results_42876/RandomSampling/queries/'

flist2 = os.listdir(dirname2)

tot2 = 0

queries2 = {}

for i in range(len(flist2)):
    data2 = pd.read_csv(dirname2 + flist2[i], delim_whitespace=True, skiprows=1, names = ['mute'] + list(features.keys()))
    
    data2['big_class'] = None
    
    for j in range(data2.shape[0]):
        data2.loc[j, 'big_class'] = class_dict[data2.iloc[j]['type']]
        
    types2, freq2 = np.unique(data2['big_class'].values, return_counts=True)
    
    tot2 = tot2 + data2.shape[0]
    
    for j in range(len(types2)):
        if types2[j] in queries2.keys():
            queries2[types2[j]] = queries2[types2[j]] + freq2[j]
        else:
            queries2[types2[j]] = freq2[j]

In [9]:
tot2

6000

In [10]:
indx2 = np.argsort(list(queries2.values()))

for i in range(len(indx2)):
    print(list(queries2.keys())[indx2[i]], '   ---   ', list(queries2.values())[indx2[i]]/100)

other_TNS    ---    0.23
SN    ---    0.95
Ia    ---    1.99
galaxy    ---    6.11
multiple_object    ---    12.54
star    ---    38.18


In [11]:
photo_Ia = {}

for i in range(len(flist2)):
    dirname3 = 'results_42876/UncSampling/class_prob/v' + str(i) + '/class_prob_UncSampling__loop_59.csv'
    data3 = pd.read_csv(dirname3)
    
    data3['big_class'] = None
    for j in range(data3.shape[0]):
        data3.loc[j, 'big_class'] = class_dict[data3.iloc[j]['type']]
    
    flag = data3['prob_Ia'].values > 0.5
    data4 = data3[flag]
    
    types, freq = np.unique(data4['big_class'].values, return_counts=True)
    photo_Ia[i] = pd.DataFrame({'type': types, 'n': freq, 'perc': 100 * freq/data4.shape[0]})


In [25]:
big_types = photo_Ia[0]['type'].values

In [42]:
photo_Ia[0]

Unnamed: 0,type,n,perc
0,Ia,769,83.860414
1,SN,85,9.269357
2,galaxy,4,0.436205
3,multiple_object,15,1.635769
4,other_TNS,3,0.327154
5,star,41,4.471101


In [54]:
photo_Ia[1]['type'].values

array(['Ia', 'SN', 'galaxy', 'multiple_object', 'other_TNS', 'star'],
      dtype=object)

In [57]:
photo_Ia_mean = {}

for i in range(len(big_types)):
    perc = 0
    count = 0
    n = 0
    
    print(big_types[i])
    
    for j in range(len(photo_Ia)):
        if big_types[i] in photo_Ia[j]['type'].values:
            flag = photo_Ia[j]['type'] == big_types[i]
            perc = perc + photo_Ia[j]['perc'][flag].values[0]
            n = n + photo_Ia[j]['n'][flag].values[0]
            count = count + 1
    tot = perc / count
    photo_Ia_mean[big_types[i]] = [tot, n/count]

Ia
SN
galaxy
multiple_object
other_TNS
star


In [58]:
photo_Ia_mean

{'Ia': [82.46299109027292, 716.77],
 'SN': [8.489785028807358, 75.08],
 'galaxy': [1.9596612241373463, 17.29],
 'multiple_object': [1.8218826850504597, 17.242424242424242],
 'other_TNS': [0.3390820007552173, 3.0202020202020203],
 'star': [4.948207617834758, 46.32]}