In [13]:
import pandas as pd
import numpy as np
import os

np.random.seed(42)

In [14]:
# translate SNANA types
types_names = {90:'Ia', 67: '91bg', 52:'Iax', 42:'II', 62:'Ibc', 
               95: 'SLSN', 15:'TDE', 64:'KN', 88:'AGN', 92:'RRL', 65:'M-dwarf',
               16:'EB',53:'Mira', 6:'MicroL', 991:'MicroLB', 992:'ILOT', 
               993:'CART', 994:'PISN',995:'MLString'}

SNANA_types = {90:11, 62:{1:3, 2:13}, 42:{1:2, 2:12, 3:14},
               67:41, 52:43, 64:51, 95:60, 994:61, 992:62,
               993:63, 15:64, 88:70, 92:80, 65:81, 16:83,
               53:84, 991:90, 6:{1:91, 2:93}}

SNANA_names = {11: 'Ia', 3:'Ibc', 13: 'Ibc', 2:'II', 12:'II', 14:'II',
               41: '91bg', 43:'Iax', 51:'KN', 60:'SLSN', 61:'PISN', 62:'ILOT',
               63:'CART', 64:'TDE', 70:'AGN', 80:'RRL', 81:'M-dwarf', 83:'EB',
               84:'Mira', 90:'MicroLB', 91:'MicroL', 93:'MicroL'}

In [10]:
output_root = '/media2/RESSPECT2/clean_output/'

for field in ['DDF', 'WFD']:
    for version in range(6):

        # create directory structure
        dir_list = [output_root + field + '/',
            output_root + field + '/v' + str(version) + '/',
            output_root + field + '/v' + str(version) + '/cospar/',
            output_root + field + '/v' + str(version) + '/fitres/', 
            output_root + field + '/v' + str(version) + '/M0DIF/',
            output_root + field + '/v' + str(version) + '/posteriors/',
            output_root + field + '/v' + str(version) + '/posteriors/csv/',
            output_root + field + '/v' + str(version) + '/posteriors/pkl',
            output_root + field + '/v' + str(version) + '/samples/',
            output_root + field + '/v' + str(version) + '/stan_input/',
            output_root + field + '/v' + str(version) + '/stan_summary/',
           ]

        for name in dir_list:
            if not os.path.isdir(name):
                os.makedirs(name)


In [15]:
# read zenodo metadata
fname = '/media/RESSPECT/data/PLAsTiCC/PLAsTiCC_zenodo/plasticc_test_metadata.csv'
test_metadata = pd.read_csv(fname)

# separate fields
ddf_flag = test_metadata['ddf_bool'].values == 1
ids_ddf = test_metadata['object_id'].values[ddf_flag]
ids_wfd = test_metadata['object_id'].values[~ddf_flag]

# Create perfect samples

## For DDF

In [7]:
# read all Ias in DDF
salt2_Ia_DDF = pd.read_csv('../SALT2_fit/Ia/results/master_fitres.fitres', comment='#', delim_whitespace=True)
salt2_Ia_DDF['zHD'] = salt2_Ia_DDF['SIM_ZCMB']          # requirement of so SALT2mu can work

# choose sample size
nobjs = 6000

# choose number of versions of the same sample to generate
v = 0

for i in range(1, v):
    perfect_Ia_DDF = salt2_Ia_DDF.sample(n=nobjs, replace=False)
    #perfect_Ia_DDF.to_csv(data_dir + 'DDF/v' + str(i) +  '/samples/perfect' + \
    #                      str(nobjs) + '.csv', sep=' ', index=False)

## For WFD

In [8]:
# read all Ias in WFD
fnames_Ia = os.listdir('../SALT2_fit/Ia/results/')

# remove unecessary files and folders
fnames_Ia.remove('master_fitres.fitres')
fnames_Ia.remove('salt3')
fnames_Ia.remove('.ipynb_checkpoints')

salt2_WFD = []

for name in fnames_Ia:
    fitres_temp = pd.read_csv('../SALT2_fit/Ia/results/' + name, delim_whitespace=True, 
                              comment='#')
    fitres_temp['zHD'] = fitres_temp['SIM_ZCMB']
    salt2_WFD.append(fitres_temp)

salt2_Ia_WFD = pd.concat(salt2_WFD, ignore_index=True)

# choose sample size
nobjs = 6000

# choose number of versions of the same sample to generate
v = 6

for i in range(v):
    perfect_Ia_WFD = salt2_Ia_WFD.sample(n=nobjs, replace=False)
    #perfect_Ia_WFD.to_csv(data_dir + 'WFD/v' + str(i) + '/samples/perfect' + \
    #                      str(nobjs) + '.csv', sep=' ', index=False)

FileNotFoundError: [Errno 2] File Ia/results/master_fitres_77.fitres does not exist: 'Ia/results/master_fitres_77.fitres'

# Create Random samples

## For DDF

In [17]:
# list of classes surviving SALT2 fit
surv_class_DDF = ['91bg', 'AGN', 'CART', 'Ia', 'Iax', 'Ibc', 'II', 'TDE']

# read all SALT2 fit results for DDF
all_DDF = []
for obj_type in surv_class_DDF:
    data_temp = pd.read_csv(obj_type + '/results/master_fitres.fitres', comment='#', delim_whitespace=True)
    data_temp['zHD'] = data_temp['SIM_ZCMB']
    data_temp.fillna(-99, inplace=True)
    all_DDF.append(data_temp)
    
all_surv_DDF = pd.concat(all_DDF, ignore_index=True)
all_surv_DDF.fillna(-99, inplace=True)

# choose sample size
nobjs = 6000

# choose number of versions of the same sample to generate
v = 2

for i in range(1, v):
    random_DDF = all_surv_DDF.sample(n=nobjs, replace=False)
    #random_DDF.to_csv(data_dir + 'DDF/v' + str(i) + '/samples/random' + \
    #                  str(nobjs) + '.csv', sep=' ', index=False)

## For WFD

In [12]:
# list of classes surviving SALT2 fit
surv_class_WFD = ['91bg', 'AGN', 'CART', 'Ia', 'Iax', 'Ibc', 'II', 'TDE', 'ILOT', 'PISN', 'SLSN']

# read all SALT2 fit results for WFD
all_WFD = []
for obj_type in surv_class_WFD:
    flist = os.listdir(obj_type + '/results/')
    flist.remove('salt3')                  # remove directory of temporary SALT files
    flist.remove('master_fitres.fitres')   # remove DDF file
    if '.ipynb_checkpoints' in flist:
        flist.remove('.ipynb_checkpoints')
    
    for name in flist:
        data_temp = pd.read_csv(obj_type + '/results/' + name, comment='#', delim_whitespace=True)
        data_temp['zHD'] = data_temp['SIM_ZCMB']
        data_temp.fillna(-99, inplace=True)
        all_WFD.append(data_temp)
        
    
all_surv_WFD = pd.concat(all_WFD, ignore_index=True)
all_surv_WFD.fillna(-99, inplace=True)

# choose sample size
nobjs = 6000

# choose number of versions of the same sample to generate
v = 7

for i in range(6, v):
    random_WFD = all_surv_WFD.sample(n=nobjs, replace=False)
    #random_WFD.to_csv(data_dir + 'WFD/v' + str(i) + '/samples/perfect' + \
    #                  str(nobjs) + '.csv', sep=' ', index=False)

# Create Fiducial samples

## For DDF

In [16]:
# read results from avocado
fname_DDF = data_dir + 'DDF/avocado/avocado_DDF.csv'
avocado_DDF = pd.read_csv(fname_DDF, names=['object_id','6','15','16','42','52','53','62','64','65','67','88',
                                           '90','92','95'], skiprows=1)

# determine final classification
class_final_DDF = []
for i in range(avocado_DDF.shape[0]):
    indx = np.argsort(avocado_DDF.iloc[i].values[1:])[-1]
    code = int(avocado_DDF.keys()[indx + 1])
    class_final_DDF.append(types_names[code])
class_final_DDF = np.array(class_final_DDF)

# get photometrically classified Ia
flag_class_Ia_DDF = class_final_DDF == 'Ia'
avocado_DDF_Ia = avocado_DDF[flag_class_Ia_DDF]

# get SALT2 fit for objs photometrically classified as Ia
avocado_DDF_Ia_fitres_flag = np.array([item in avocado_DDF_Ia['object_id'].values for item in all_surv_DDF['CID'].values])
all_avocado_DDF_Ia = all_surv_DDF[avocado_DDF_Ia_fitres_flag]

# choose sample size
nobjs = 6000

# choose number of versions of the same sample to generate
v = 2

for i in range(1, v):
    fiducial_DDF = all_avocado_DDF_Ia.sample(n=nobjs, replace=False)
    #fiducial_DDF.to_csv(data_dir + 'DDF/v' + str(i) + '/samples/fiducial' + \
    #                  str(nobjs) + '.csv', sep=' ', index=False)

## For WFD

In [23]:
# read results from avocado
fname_WFD =  data_dir + 'WFD/avocado/avocado_WFD.csv'
avocado_WFD = pd.read_csv(fname_WFD, names=['object_id','6','15','16','42','52','53','62','64','65','67','88',
                                           '90','92','95'], skiprows=1)

# determine final classification
class_final_WFD = []
for i in range(avocado_WFD.shape[0]):
    indx = np.argsort(avocado_WFD.iloc[i].values[1:])[-1]
    code = int(avocado_WFD.keys()[indx + 1])
    class_final_WFD.append(types_names[code])
    
# get photometrically classified Ia
class_final_WFD = np.array(class_final_WFD)
flag_class_Ia_WFD = class_final_WFD == 'Ia'
avocado_WFD_Ia = avocado_WFD[flag_class_Ia_WFD]

# get SALT2 fit for objs photometrically classified as Ia
avocado_WFD_Ia_fitres_flag = np.array([item in avocado_WFD_Ia['object_id'].values 
                                       for item in all_surv_WFD['CID'].values])
all_avocado_WFD_Ia = all_surv_WFD[avocado_WFD_Ia_fitres_flag]

# choose sample size
nobjs = 3000

# choose number of versions of the same sample to generate
v = 7

for i in range(6, v):
    fiducial_WFD = all_avocado_WFD_Ia.sample(n=nobjs, replace=False)
    #fiducial_WFD.to_csv(data_dir + 'WFD/v' + str(i) + '/samples/perfect' + \
    #                  str(nobjs) + '.csv', sep=' ', index=False)

# Create single contaminant samples

## For DDF

In [22]:
# levels of contamination
cont_DDF = {'II': [0.28, 0.25, 0.1, 0.05, 0.02, 0.01],
            'Ibc': [0.05, 0.02, 0.01],
            'Iax': [0.14, 0.1, 0.05, 0.02, 0.01],
            'CART': [0.009], 
            '91bg': [0.002],
            'AGN': [0.001]}

complete_names ={'II': 'SNII', 'Ibc': 'SNIbc', 'Iax': 'SNIax', 'CART':'CART',
                 '91bg':'SNIa-91bg', 'AGN':'AGN'}

# choose sample size
nobjs = 3000

# choose number of versions of the same sample to generate
v = 6

for i in range(1, v):
    for obj_class in list(cont_DDF.keys()):
        # read all contaminants surviving SALT2 fit
        sample_cont = pd.read_csv(obj_class + '/results/master_fitres.fitres', comment='#',
                              delim_whitespace=True)
        sample_cont['zHD'] = sample_cont['SIM_ZCMB']
    
        for perc in cont_DDF[obj_class]:
            Ia_temp = salt2_Ia_DDF.sample(n=int((1 - perc) * nobjs), replace=False)
            cont_temp = sample_cont.sample(n=int(perc * nobjs), replace = False)
            sample_final = pd.concat([Ia_temp, cont_temp], ignore_index=True)
            sample_final.fillna(-99, inplace=True)
        
            if obj_class not in ['CART', '91bg', 'AGN']:
                sample_final.to_csv(data_dir + 'DDF/v' + str(i) + '/samples/' + str(int(100 - 100 * perc)) + \
                                    'SNIa' + str(int(100 * perc)) + complete_names[obj_class] + '.csv', 
                                     sep=' ', index=False)
            else:
                sample_final.to_csv(data_dir + 'DDF/v' + str(i) + '/samples/' + str(round(100 - 100 * perc, 1)) + \
                                    'SNIa' + str(round(100 * perc, 1)) + complete_names[obj_class] + '.csv', 
                                     sep=' ', index=False)

## For WFD

In [96]:
# levels of contamination
cont_WFD = {'II': [0.28, 0.25, 0.1, 0.05, 0.02, 0.01],
            'Ibc': [0.1, 0.05, 0.02, 0.01],
            'Iax': [0.25, 0.1, 0.05, 0.02, 0.01],
            '91bg': [0.05, 0.02, 0.01],
            'AGN': [0.05, 0.02, 0.01],
            'TDE': [0.004],
            'CART': [0.003]}

complete_names ={'II': 'SNII', 'Ibc': 'SNIbc', 'Iax': 'SNIax', 'CART':'CART',
                 '91bg':'SNIa-91bg', 'AGN':'AGN', 'TDE':'TDE'}

# choose sample size
nobjs = 3000

# choose number of versions of the same sample to generate
v = 7

for i in range(6, v):
    for obj_class in list(cont_WFD.keys()):
        # read all contaminants surviving SALT2 fit
        flist = os.listdir(obj_class + '/results/')
        flist.remove('salt3')
        flist.remove('master_fitres.fitres')
        if '.ipynb_checkpoints' in flist:
            flist.remove('.ipynb_checkpoints')
        
        sample_cont = []
        for name in flist:
            temp_cont = pd.read_csv(obj_class + '/results/' + name, comment='#',
                                  delim_whitespace=True)
            temp_cont['zHD'] = temp_cont['SIM_ZCMB']
            sample_cont.append(temp_cont)
            
        sample_cont2 = pd.concat(sample_cont, ignore_index=True)
    
        for perc in cont_WFD[obj_class]:
            Ia_temp2 = salt2_Ia_WFD.sample(n=int((1-perc)*ntot), replace=False)
            cont_temp2 = sample_cont2.sample(n=int(perc*ntot), replace = False)
            sample_final = pd.concat([Ia_temp2, cont_temp2], ignore_index=True)
            sample_final.fillna(-99, inplace=True)
        
            #sample_final.to_csv(data_dir + 'WFD/v' + str(i) + '/samples/' + str(int(100 - 100 * perc)) + \
            #              'SNIa' + str(int(100 * perc)) + complete_names[obj_class] + '.csv', 
            #              sep=' ', index=False)