# Read the Data

These lines read in the pickle files with simulation data into pandas data frames

In [1]:
import gzip
import pickle
import pandas as pd
import numpy as np

YSE_FILTERS = 'grizXY' # XY are the ZTF g and r bands, respectively


def read_data(filename):
    """Read data from pickled file to a pandas dataframe"""
    with gzip.open(filename, 'rb') as f:
        data = pickle.load(f)

    X = to_dataframe(data)
    y = pd.get_dummies(X.type == 0, prefix='SNIa', drop_first=True)
    X = X.drop(columns=['type'])

    return X, y


def to_dataframe(data):
    """Converts from a python dictionary to a pandas dataframe"""
    for idx in data:
        sn = data[idx]
        for filt in YSE_FILTERS:
            sn['mjd_%s' % filt] = np.array(sn[filt]['mjd'])
            sn['fluxcal_%s' % filt] = np.array(sn[filt]['fluxcal'])
            sn['fluxcalerr_%s' % filt] = np.array(sn[filt]['fluxcalerr'])
            del sn[filt]
        sn.update(sn['header'])
        del sn['header']

    return pd.DataFrame.from_dict(data, orient='index')

Now you can read the data using the following line.  These are pretty big files, they might be a bit slow.  I can try to condense them in the future...

In [None]:
X, y = read_data('yse_ztf_YOUNG.pkl.gz')

The X contain the data itself. You can take a look at what IDs are present in X by using

In [None]:
X.index[:100]

In [None]:
# Get one supernovae - the first in the list
X.iloc[0]

# Plot the Lightcurves