In [1]:
DATA_PATH = '../data/'
LIGHTCURVES_PATH = DATA_PATH + 'lightcurves/'

In [2]:
import pandas as pd
import inputs2

In [3]:
def unique_ids_list(df_lcs):
    return df_lcs.index.get_level_values('ID').unique().format()

def print_num_ids_shape(df_lcs):
    unique_ids = unique_ids_list(df_lcs)
    print('Num IDs: {}  Shape: {}'.format(len(unique_ids), df_lcs.shape))

#### Load Transient Catalog

In [4]:
df_cat = inputs2.load_transient_catalog()

In [5]:
df_cat.head()

Unnamed: 0_level_0,class
ID,Unnamed: 1_level_1
TranID1611101400424116462,Flare
TranID1611101210274176502,HPM
TranID1611101150154137989,HPM
TranID1611091630244128695,HPM
TranID1611091600294129774,CV


#### Load Transient Lightcurves

In [7]:
filename = 'transient_lightcurves_clean.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_lcs = pd.read_pickle(filepath)
print_num_ids_shape(df_lcs)

Num IDs: 4869  Shape: (440469, 3)


##### Check that all Lightcurves TransientID's are in Catalog

In [8]:
all_exist = True
transID_cat_list = df_cat.index.unique()
for lcs_id in df_lcs.index.get_level_values('ID').unique():
    all_exist = all_exist and (lcs_id in transID_cat_list)
print('All exist:', str(all_exist))

All exist: True


##### Check that all Catalog TransientID's are in Lightcurves 

In [9]:
all_exist = True
lcs_missing_transientID_list = []
transID_lcs_list = df_cat.index.unique()
for cat_id in df_lcs.index.get_level_values('ID').unique():
    id_exists = (cat_id in transID_lcs_list)
    all_exist = all_exist and id_exists
    if not id_exists: lcs_missing_transientID_list.append(cat_id)
print('All exist:', str(all_exist))
print('Missing: {} Transients'.format(len(lcs_missing_transientID_list)))

All exist: True
Missing: 0 Transients


##### Merge dataframes with inner join

In [10]:
df_merge = df_cat.copy().join(df_lcs.copy().groupby(level=0, axis=0).count(), how='inner')
df_merge.rename(columns={'Mag':'ObsCount'}, inplace=True)

In [11]:
print(df_merge.shape)

(4869, 4)


##### Count number of objects with at least 5 observations

In [12]:
df_merge_filtered = df_merge[df_merge.ObsCount >= 5]
df_merge_filtered.shape[0]

4269

##### Top 10 count of transients by class (with at least 5 observations)

In [13]:
df = df_merge_filtered[['class','ObsCount']].groupby('class').count()
df = df.rename(columns={'ObsCount':'ObjCount'}).sort_values('ObjCount', ascending=False)
df.head(20).transpose()

class,SN,CV,AGN,HPM,Blazar,SN?,Flare,AGN?,Unknown,CV?,Var,Ast?,SN/CV,SN/AGN,YSO,Mira,Blazar?,Flare?,Var?,Ast
ObjCount,1293,862,425,306,237,236,207,130,114,55,47,31,28,23,21,20,19,18,12,12


##### Describe observation count for transients with at least 5 observations

In [14]:
df_lcs[df_lcs.index.get_level_values('ID').isin(df_merge_filtered.index)].groupby(level=0, axis=0).count().describe()

Unnamed: 0,Mag,Magerr,MJD
count,4269.0,4269.0,4269.0
mean,102.81026,102.81026,102.81026
std,113.786057,113.786057,113.786057
min,5.0,5.0,5.0
25%,14.0,14.0,14.0
50%,48.0,48.0,48.0
75%,166.0,166.0,166.0
max,564.0,564.0,564.0
