In [167]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import progressbar

In [43]:
df = pd.read_pickle("../data/sne_light_curves.pkl")
features = ["oid","mjd", "magap", "sigmagap", "fid"]

In [44]:
lc = df[df.oid == 'ZTF18acqyvag'][features]

In [45]:
lc.sort_values('mjd', inplace=True)

In [46]:
dt = np.diff(lc.mjd)

In [47]:
dt = np.insert(dt, 0, 0, axis=0)

In [70]:
lcs = []
for oid in df.oid.unique():
    lc = df[df.oid == oid][features]
    if len(lc) > 5:
        
        lc.sort_values('mjd', inplace=True)
        dt = np.diff(lc.mjd)
        dt = np.insert(dt, 0, 0, axis=0)
        lc.drop("mjd",axis=1, inplace=True)
        lc["dt"] = dt
    
        lcs.append(lc)

In [205]:
X_train, X_test = train_test_split(lcs, test_size=0.2,random_state=42)

In [206]:
X_train, X_val = train_test_split(X_train, test_size=0.125,random_state=42)

In [223]:
def prepare_lc(lc):
    dropped_lc = lc.drop("oid",axis=1)
    numpy_lc = dropped_lc[["dt", "magap", "sigmagap", "fid"]].to_numpy()
    return [numpy_lc]

In [208]:
def slice_lc(lc):
    sliced_lcs = []
    dropped_lc = lc.drop("oid",axis=1)
    numpy_lc = dropped_lc[["dt", "magap", "sigmagap", "fid"]].to_numpy()
    for i in range(4,len(numpy_lc)+1):
        sliced_lcs.append(numpy_lc[:i])
    return sliced_lcs

In [209]:
sliced_lcs = []
bar = progressbar.ProgressBar(max_value=len(X_train))
bar.start()
for i,lc in enumerate(X_train):
    sliced_lcs.extend(slice_lc(lc))
    bar.update(i+1)

 97% (1518 of 1557) |################### | Elapsed Time: 0:00:02 ETA:   0:00:00

In [224]:
x_test_prepared = []
bar = progressbar.ProgressBar(max_value=len(X_test))
bar.start()
for i,lc in enumerate(X_test):
    x_test_prepared.extend(prepare_lc(lc))
    bar.update(i+1)

 98% (441 of 446) |##################### | Elapsed Time: 0:00:00 ETA:   0:00:00

In [225]:
x_val_prepared = []
bar = progressbar.ProgressBar(max_value=len(X_val))
bar.start()
for i,lc in enumerate(X_val):
    x_val_prepared.extend(prepare_lc(lc))
    bar.update(i+1)

 97% (218 of 223) |##################### | Elapsed Time: 0:00:00 ETA:   0:00:00

In [234]:
padded_lcs = tf.keras.preprocessing.sequence.pad_sequences(sliced_lcs,dtype="float32", value=-1)

In [235]:
padded_x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test_prepared,dtype="float32", value=-1)
padded_x_val = tf.keras.preprocessing.sequence.pad_sequences(x_val_prepared,dtype="float32", value=-1)

In [236]:
np.save("../data/padded_x_train",padded_lcs)

In [237]:
np.save("../data/padded_x_test",padded_x_test)
np.save("../data/padded_x_val",padded_x_val)