In [182]:
import pandas as pd
import numpy as np

I have three 4 files with lightcurves:

1.-lasair_bts_batch_0.csv (ids from bts)

2.-lasair_bts_batch_1.csv (more ids from bts)

3.-lasair_tns_csv (ids from tns)

4.-yse_lcs.csv (from yse dataset)

They all have more or less the same fields, except for objId. While the objects from bts and tns have the ZTF ID, the ones from YSE have the IAU ID. So what I have to do is:

1.- Concatenate files

2.- Add IAU IDs missing

3.- Drop duplicates + sanity check

4.- Go over the criteria I had previously decided on and filter accordingly

5.- Filter the metadata file again if necessary

6.- Ensure that IAU ID given is unique

In [184]:
meta = pd.read_csv('../data/real/scrap/real_metadata.csv')
bts_0 = pd.read_csv('../data/real/scrap/lasair_bts_batch_0_lcs.csv')
bts_1 = pd.read_csv('../data/real/scrap/lasair_bts_batch_1_lcs.csv')
tns = pd.read_csv('../data/real/scrap/lasair_tns_lcs.csv')
yse = pd.read_csv('../data/real/scrap/yse_lcs.csv')

In [185]:
iauid_missing = pd.concat([bts_0,bts_1,tns],ignore_index=True)
print(iauid_missing.shape)
print(iauid_missing.objId.unique().shape)
iauid_missing.head()

(267879, 7)
(7643,)


Unnamed: 0,jd,magpsf,sigmapsf,magzpsci,fid,isdiffpos,objId
0,2459851.0,17.5986,0.169676,24.837601,2,f,ZTF17aabtvsy
1,2459851.0,18.264799,0.192368,24.7288,2,t,ZTF17aabtvsy
2,2459865.0,18.349199,0.130138,26.0732,1,t,ZTF17aabtvsy
3,2459865.0,18.296801,0.114468,26.1078,2,t,ZTF17aabtvsy
4,2459867.0,18.238001,0.099298,26.124001,1,t,ZTF17aabtvsy


First, let's drop whatever is not in the metadata file

In [186]:
iauid_missing = iauid_missing[iauid_missing['objId'].isin(meta['ZTFID'].unique())]

In [187]:
iauid_missing.objId.unique().shape

(7626,)

In [188]:
meta_ids = meta[meta['ZTFID'].isin(iauid_missing.objId.unique())][['ZTFID','IAUID']]
meta_ids.shape

(7626, 2)

In [189]:
iauid_missing.rename(columns={'objId':'ZTFID'},inplace=True)

In [190]:
iauid_missing.head()

Unnamed: 0,jd,magpsf,sigmapsf,magzpsci,fid,isdiffpos,ZTFID
0,2459851.0,17.5986,0.169676,24.837601,2,f,ZTF17aabtvsy
1,2459851.0,18.264799,0.192368,24.7288,2,t,ZTF17aabtvsy
2,2459865.0,18.349199,0.130138,26.0732,1,t,ZTF17aabtvsy
3,2459865.0,18.296801,0.114468,26.1078,2,t,ZTF17aabtvsy
4,2459867.0,18.238001,0.099298,26.124001,1,t,ZTF17aabtvsy


In [191]:
with_iauid = pd.merge(iauid_missing,meta_ids, on='ZTFID', how='left')
print(with_iauid.shape)
with_iauid.head()

(267184, 8)


Unnamed: 0,jd,magpsf,sigmapsf,magzpsci,fid,isdiffpos,ZTFID,IAUID
0,2459851.0,17.5986,0.169676,24.837601,2,f,ZTF17aabtvsy,SN2022yei
1,2459851.0,18.264799,0.192368,24.7288,2,t,ZTF17aabtvsy,SN2022yei
2,2459865.0,18.349199,0.130138,26.0732,1,t,ZTF17aabtvsy,SN2022yei
3,2459865.0,18.296801,0.114468,26.1078,2,t,ZTF17aabtvsy,SN2022yei
4,2459867.0,18.238001,0.099298,26.124001,1,t,ZTF17aabtvsy,SN2022yei


In [192]:
with_iauid.IAUID.unique().shape

(7626,)

In [193]:
with_iauid['mjd']=with_iauid['jd'] - 2400000.5

In [194]:
yse.head()

Unnamed: 0,mjd,magpsf,sigmapsf,fid,objId,magzpsci,isdiffpos
0,58676.172,17.042,0.018,1,SN2019lbi,27.5,t
1,58676.192,17.116,0.018,2,SN2019lbi,27.5,t
2,58679.187,17.1,0.023,1,SN2019lbi,27.5,t
3,58679.191,17.156,0.023,2,SN2019lbi,27.5,t
4,58797.545,18.021,0.042,2,SN2019lbi,27.5,t


again, we drop everything not in metadata

In [195]:
yse = yse[yse['objId'].isin(meta['IAUID'].unique())]

In [196]:
yse.objId.unique().shape

(1698,)

In [197]:
yse['jd']=yse['mjd'] + 2400000.5

In [198]:
yse.rename(columns={'objId':'IAUID'},inplace=True)

In [214]:
yse_ids=list(yse.IAUID.unique())
len(yse_ids)

1698

In [203]:
lcs = pd.concat([with_iauid,yse],ignore_index=True)

In [204]:
lcs.IAUID.unique().shape

(9230,)

In [205]:
lcs = lcs.sort_values(by=['IAUID','mjd'])
lcs.head()

Unnamed: 0,jd,magpsf,sigmapsf,magzpsci,fid,isdiffpos,ZTFID,IAUID,mjd
27200,2458637.0,18.554399,0.086457,25.9904,2,t,ZTF19aawsday,AT2019hbr,58636.423484
27201,2458642.0,18.8578,0.093859,26.067801,1,t,ZTF19aawsday,AT2019hbr,58641.446678
27202,2458645.0,18.862499,0.099435,26.1015,1,t,ZTF19aawsday,AT2019hbr,58644.448565
27203,2458648.0,19.066799,0.114149,26.039801,1,t,ZTF19aawsday,AT2019hbr,58647.472512
27204,2458651.0,18.4027,0.086108,26.158701,2,t,ZTF19aawsday,AT2019hbr,58650.437442


In [206]:
meta.IAUID.unique().shape

(9278,)

In [207]:
meta = meta[meta.IAUID.isin(lcs.IAUID.unique())]
meta.shape

(9230, 29)

Now I need to go over the criteria I had previously decided on and filter accordingly. 3 things:

1.-at least 3 det points per band

2.-peak mag of at least 19 in at least one band

3.-not appear in lowe gal latitudes (|b| >=7 deg)

In [208]:
#1.- 3 points per band
fid_counts = lcs.groupby(by=['IAUID','fid']).fid.count()
filtered_fid = fid_counts[fid_counts >= 3].reset_index(name='fid_count')
valid_fid_ids = filtered_fid.IAUID.unique()
valid_fid_ids.shape

(9010,)

In [209]:
lcs = lcs[lcs.IAUID.isin(valid_fid_ids)]
lcs.IAUID.unique().shape

(9010,)

In [210]:
meta = meta[meta.IAUID.isin(valid_fid_ids)]
meta.IAUID.unique().shape

(9010,)

In [216]:
meta[meta.IAUID.isin(yse_ids)].shape

(1615, 29)

In [217]:
#2.-peak mag of at least 19 in at least one band
peak=lcs.groupby(by='IAUID').magpsf.min().reset_index()
peak.shape

(9010, 2)

In [218]:
bright_enough=peak[peak.magpsf<=19]
bright_enough.shape

(7214, 2)

In [219]:
lcs = lcs[lcs.IAUID.isin(bright_enough.IAUID)]
lcs.IAUID.unique().shape

(7214,)

In [220]:
meta = meta[meta.IAUID.isin(bright_enough.IAUID)]
meta.IAUID.unique().shape

(7214,)

In [221]:
meta[meta.IAUID.isin(yse_ids)].shape

(219, 29)

In [222]:
#3.-not appear in lowe gal latitudes (|b| >=7 deg)
from astropy.coordinates import SkyCoord
def galactic_lat(ra, dec):
    c=SkyCoord(ra, dec, unit='deg')
    b=c.galactic.b.degree
    return b

In [223]:
meta.keys()

Index(['hostname', 'hostredshift', 'reporting_groups', 'data_sources',
       'classifying_groups', 'associated_groups', 'ZTFID', 'peakt', 'peakfilt',
       'peakmag', 'peakabs', 'duration', 'rise', 'fade', 'hostabs', 'hostcol',
       'b', 'A_V', 'IAUID', 'Confidence', 'p_SNII', 'p_SNIa', 'p_SNIbc',
       'redshift', 'RA_deg', 'Dec_deg', 'subtype', 'type', 'type_code'],
      dtype='object')

In [238]:
meta['gal_lat']=meta.apply(lambda row : galactic_lat(row['RA_deg'],row['Dec_deg']),axis=1)

In [239]:
meta[['RA_deg','Dec_deg','b','gal_lat']]

KeyError: "['b'] not in index"

In [234]:
meta=meta.drop(labels='b',axis=1)
meta.keys()

KeyError: "['b'] not found in axis"

In [240]:
meta.rename(columns={'gal_lat':'b'},inplace=True)
meta.keys()

Index(['hostname', 'hostredshift', 'reporting_groups', 'data_sources',
       'classifying_groups', 'associated_groups', 'ZTFID', 'peakt', 'peakfilt',
       'peakmag', 'peakabs', 'duration', 'rise', 'fade', 'hostabs', 'hostcol',
       'A_V', 'IAUID', 'Confidence', 'p_SNII', 'p_SNIa', 'p_SNIbc', 'redshift',
       'RA_deg', 'Dec_deg', 'subtype', 'type', 'type_code', 'b'],
      dtype='object')

In [241]:
meta=meta[meta.b.abs()>=7]
meta.shape

(7202, 29)

In [242]:
meta[meta.IAUID.isin(yse_ids)].shape

(219, 29)

In [243]:
lcs = lcs[lcs.IAUID.isin(meta.IAUID.unique())]
lcs.IAUID.unique().shape

(7202,)

In [244]:
lcs = lcs.sort_values(by=['IAUID','mjd'])
lcs.head()

Unnamed: 0,jd,magpsf,sigmapsf,magzpsci,fid,isdiffpos,ZTFID,IAUID,mjd
27200,2458637.0,18.554399,0.086457,25.9904,2,t,ZTF19aawsday,AT2019hbr,58636.423484
27201,2458642.0,18.8578,0.093859,26.067801,1,t,ZTF19aawsday,AT2019hbr,58641.446678
27202,2458645.0,18.862499,0.099435,26.1015,1,t,ZTF19aawsday,AT2019hbr,58644.448565
27203,2458648.0,19.066799,0.114149,26.039801,1,t,ZTF19aawsday,AT2019hbr,58647.472512
27204,2458651.0,18.4027,0.086108,26.158701,2,t,ZTF19aawsday,AT2019hbr,58650.437442


In [245]:
meta = meta.sort_values(by='IAUID')
meta['IAUID'].head()

6554    AT2019hbr
6701    AT2019umt
6455     AT2019ye
6736    AT2019zri
6751     AT2020pv
Name: IAUID, dtype: object

alright let's save and get some stats

In [248]:
yse_rem=meta[meta.IAUID.isin(yse_ids)]

In [259]:
yse_rem[(yse_rem.type=='SLSN') ].IAUID

Series([], Name: IAUID, dtype: object)

In [179]:
meta.to_csv('../data/real/real_meta_clean.csv')
lcs.to_csv('../data/real/real_lcs_clean.csv')

In [181]:
meta.groupby(by='type').IAUID.count()

type
II      1318
Ia      5359
Ib/c     468
SLSN      57
Name: IAUID, dtype: int64