In [6]:
DATA_PATH = '../../data/'
LIGHTCURVES_PATH = DATA_PATH + 'lightcurves/'

In [7]:
import pandas as pd

Load Transient Catalog

In [8]:
filename = 'transient_catalog.pickle'
indir = DATA_PATH; filepath = indir + filename
df_cat = pd.read_pickle(filepath)
print(df_cat.TransientID.unique().shape)

(5539,)


Load Transient Lightcurves

In [10]:
filename = 'transient_lightcurves.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_lcs = pd.read_pickle(filepath)
print(df_lcs.TransientID.unique().shape)

(4984,)


Check that all Lightcurves TransientID's are in Catalog

In [11]:
all_exist = True
transID_cat_list = df_cat.TransientID.unique()
for lcs_id in df_lcs.TransientID.unique():
    all_exist = all_exist and (lcs_id in transID_cat_list)
print('All exist:', str(all_exist))

All exist: True


Check that all Catalog TransientID's are in Lightcurves 

In [12]:
all_exist = True
lcs_missing_transientID_list = []
transID_lcs_list = df_lcs.TransientID.unique()
for cat_id in df_cat.TransientID.unique():
    id_exists = (cat_id in transID_lcs_list)
    all_exist = all_exist and id_exists
    if not id_exists: lcs_missing_transientID_list.append(cat_id)
print('All exist:', str(all_exist))
print('Missing: {} Transients'.format(len(lcs_missing_transientID_list)))

All exist: False
Missing: 555 Transients


Merge dataframes with inner join

In [13]:
df_merge = df_cat.copy().merge(df_lcs.copy().groupby('TransientID',as_index=False).count(), how='inner')
df_merge.rename(columns={'Mag':'ObsCount'}, inplace=True)
df_merge = df_merge[['TransientID', 'Classification', 'ObsCount']]

In [14]:
print(df_merge.shape)

(4984, 3)


Top 10 count of transients by class

In [15]:
df = df_merge[['Classification','ObsCount']].groupby('Classification').count()
df = df.rename(columns={'ObsCount':'ObjCount'}).sort_values('ObjCount', ascending=False)
df.head(10).transpose()

Classification,SN,CV,HPM,AGN,SN?,Blazar,Flare,Unknown,AGN?,CV?
ObjCount,1539,943,436,429,294,239,215,188,133,68


Describe transients observation count

In [16]:
df_lcs.sort_values(['MJD'])['MJD'].iloc[0]

53464.153988194696

In [17]:
df_lcs.groupby('TransientID').count().describe()

Unnamed: 0,Mag,Magerr,MJD
count,4984.0,4984.0,4984.0
mean,90.584671,90.584671,90.584671
std,112.330606,112.330606,112.330606
min,1.0,1.0,1.0
25%,9.0,9.0,9.0
50%,35.0,35.0,35.0
75%,140.0,140.0,140.0
max,880.0,880.0,880.0


Count number of objects with at least 5 observations

In [18]:
df_merge_filtered = df_merge[df_merge.ObsCount >= 5]
df_merge_filtered.shape[0]

4384

Top 10 count of transients by class (with at least 5 observations)

In [19]:
df = df_merge_filtered[['Classification','ObsCount']].groupby('Classification').count()
df = df.rename(columns={'ObsCount':'ObjCount'}).sort_values('ObjCount', ascending=False)
df.head(20).transpose()

Classification,SN,CV,AGN,HPM,SN?,Blazar,Flare,AGN?,Unknown,CV?,Var,Ast?,SN/CV,SN/AGN,YSO,Mira,Blazar?,Flare?,Var?,Ast
ObjCount,1295,862,427,412,239,237,207,132,114,55,47,31,28,23,21,20,19,18,12,12


Describe observation count for transients with at least 5 observations

In [20]:
df_lcs[df_lcs.TransientID.isin(df_merge_filtered.TransientID)].groupby('TransientID').count().describe()

Unnamed: 0,Mag,Magerr,MJD
count,4384.0,4384.0,4384.0
mean,102.623631,102.623631,102.623631
std,114.634489,114.634489,114.634489
min,5.0,5.0,5.0
25%,15.0,15.0,15.0
50%,48.0,48.0,48.0
75%,163.25,163.25,163.25
max,880.0,880.0,880.0
