In [1]:
import json
import os
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./label/labels.csv')
shuffled_df = df.sample(frac=1, random_state=42)

N = shuffled_df.shape[0]

n_train = int(0.8*N)
n_val = int(0.1*N)
n_test = N - n_train - n_val

print(N, n_train, n_val, n_test)
df.head()

43054 34443 4305 4306


Unnamed: 0,name,label
0,kplr000757450-2009350155506_llc,1
1,kplr000757450-2010078095331_llc,1
2,kplr000757450-2010174085026_llc,1
3,kplr000757450-2010265121752_llc,1
4,kplr000757450-2010355172524_llc,1


In [3]:
train_fnames = shuffled_df['name'].values[0:n_train]
train_labels = shuffled_df['label'].values[0:n_train]

val_fnames = shuffled_df['name'].values[n_train:n_train+n_val]
val_labels = shuffled_df['label'].values[n_train:n_train+n_val]

test_fnames = shuffled_df['name'].values[n_train+n_val:]
test_labels = shuffled_df['label'].values[n_train+n_val:]

In [4]:
import tensorflow as tf

ds_train = tf.data.Dataset.from_tensor_slices((train_fnames,train_labels))
ds_val = tf.data.Dataset.from_tensor_slices((val_fnames,val_labels))
ds_test = tf.data.Dataset.from_tensor_slices((test_fnames,test_labels))




In [5]:
def preprocesar_datos(name, label):
    flux = tf.io.read_file("./data/" + name + "/flux.csv")
    flux = tf.strings.split(flux, sep=',')
    flux = tf.strings.to_number(flux)
    power = tf.io.read_file("./data/" + name + "/power.csv")
    power = tf.strings.split(power, sep=',')
    power = tf.strings.to_number(power)
    return (flux, power), label

In [6]:
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_VAL = 100

train_batch = ds_train.map(preprocesar_datos).batch(BATCH_SIZE_TRAIN)
val_batch = ds_val.map(preprocesar_datos).batch(BATCH_SIZE_VAL)
ds_test = ds_test.map(preprocesar_datos)

In [7]:
from tensorflow.keras import layers, Model
#Model Definition

#Inputs
inputFlux = layers.Input((4000,1),)
inputPower = layers.Input((5000,1),)

#Flux branch
x = layers.Conv1D(32, 10, activation='relu')(inputFlux)
x = layers.MaxPool1D(3)(x)
x = layers.Conv1D(32, 10, activation='relu')(x)
x = layers.MaxPool1D(3)(x)
x = layers.Flatten()(x)
x = Model(inputs=inputFlux, outputs=x)

#Power branch
y = layers.Conv1D(32, 10, activation='relu')(inputPower)
y = layers.MaxPool1D(3)(y)
y = layers.Conv1D(32, 10, activation='relu')(y)
y = layers.MaxPool1D(3)(y)
y = layers.Flatten()(y)
y = Model(inputs=inputPower, outputs=y)

combined = layers.concatenate([x.output, y.output])

z = layers.Dense(100, activation='sigmoid')(combined)
z = layers.Dense(10, activation='sigmoid')(z)
z = layers.Dense(1, activation='sigmoid')(z)

model = Model(inputs=[x.input, y.input], outputs=z)
model.summary()



Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 4000, 1)]            0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 5000, 1)]            0         []                            
                                                                                                  
 conv1d (Conv1D)             (None, 3991, 32)             352       ['input_1[0][0]']             
                                                                                                  
 conv1d_2 (Conv1D)           (None, 4991, 32)             352       ['input_2[0][0]']             
                                                                                          

In [8]:
from datetime import datetime
from tensorflow import keras
#Compilación del modelo
model.compile(
    loss=keras.losses.BinaryCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=[
              keras.metrics.BinaryAccuracy(),
              keras.metrics.Recall(),
              keras.metrics.Precision(),
              keras.metrics.AUC(num_thresholds=25)
            ]
    )

#Definición del directorio donde guardar los datos del entrenamiento
logdir = "./Logs/A/"
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
model_checkpoint = keras.callbacks.ModelCheckpoint(
    filepath="./Models/A/best_model.keras",
    verbose=1,
    save_best_only=True,
    mode="max",
    monitor="val_binary_accuracy",
)

In [9]:
training_history = model.fit(
    train_batch,
    verbose=1, # Suppress chatty output; use Tensorboard instead
    epochs=50,
    validation_data = val_batch,
    callbacks=[tensorboard_callback, model_checkpoint]
)

Epoch 1/50

Epoch 1: val_binary_accuracy improved from -inf to 0.74564, saving model to ./Models/A\best_model.keras
Epoch 2/50
Epoch 2: val_binary_accuracy improved from 0.74564 to 0.77003, saving model to ./Models/A\best_model.keras
Epoch 3/50
Epoch 3: val_binary_accuracy improved from 0.77003 to 0.78374, saving model to ./Models/A\best_model.keras
Epoch 4/50
Epoch 4: val_binary_accuracy improved from 0.78374 to 0.79233, saving model to ./Models/A\best_model.keras
Epoch 5/50
Epoch 5: val_binary_accuracy improved from 0.79233 to 0.79675, saving model to ./Models/A\best_model.keras
Epoch 6/50
Epoch 6: val_binary_accuracy improved from 0.79675 to 0.79907, saving model to ./Models/A\best_model.keras
Epoch 7/50
Epoch 7: val_binary_accuracy improved from 0.79907 to 0.80232, saving model to ./Models/A\best_model.keras
Epoch 8/50
Epoch 8: val_binary_accuracy improved from 0.80232 to 0.80302, saving model to ./Models/A\best_model.keras
Epoch 9/50
Epoch 9: val_binary_accuracy improved from 0.80