### Load ground truth

In [1]:
folder = "../../KOI_Data/"
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df_sets = pd.read_csv(folder+"/koi_sets_unb.csv") 
mask_train = (df_sets["Set"] == "Train").values
mask_test = (df_sets["Set"] == "Test").values
mask_unlabeled = (df_sets["Set"] == "Unlabeled").values


df_meta = pd.read_csv(folder+"/koi_metadata.csv")
df_meta_train = df_meta[mask_train]
df_meta_test = df_meta[mask_test]
df_meta_unb = df_meta[mask_test]

y_train = ((df_meta_train["NExScI Disposition"]=="CONFIRMED")*1).values
y_test = ((df_meta_train["NExScI Disposition"]=="CONFIRMED")*1).values

N_train = y_train.shape[0]
N_test = y_test.shape[0]
y_train

array([0, 0, 0, ..., 1, 1, 0])

### Simmulate data (light curves)

In [2]:
import numpy as np

T = 70000

X_train = np.random.rand(N_train,T)
X_test = np.random.rand(N_test,T)
X_train.shape

(4692, 70000)

In [11]:
import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Input,Conv1D,Dense,Flatten, MaxPool1D, Reshape,UpSampling1D
from keras.layers import GlobalAveragePooling1D,GlobalMaxPool1D, TimeDistributed, GRU,LSTM

#def Conv1DTranspose(input_tensor, filters, kernel_size, strides=2, padding='same',activation='linear'):
#    x = Lambda(lambda x: K.expand_dims(x, axis=2))(input_tensor)
#    x = Conv2DTranspose(filters=filters, kernel_size=(kernel_size, 1), strides=(strides, 1), padding=padding,activation=activation)(x)
#    x = Lambda(lambda x: K.squeeze(x, axis=2))(x)
#    return x

In [4]:
#need data with extra dim
X_train = np.expand_dims(X_train,axis=-1)
X_test = np.expand_dims(X_test,axis=-1)

### Fully convolutional

In [5]:
#dropa or BN??

def encoder_model(input_dim, latent_dim, L=1, filters=8,kernel_s =10, pool=5):
    it = Input(shape=(input_dim,1)) 
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(it)
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
    f1 = MaxPool1D(pool)(f1)
    for _ in range(L-1):
        filters = int(filters*2)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = MaxPool1D(pool)(f1)

    #d1 = GlobalMaxPool1D()(f1) #or global maxpooling
    redim_shape = (int(f1.shape[1]),int(f1.shape[2]))
    d1 = Flatten()(f1) 
    d1 = Dense(512,activation='relu')(d1)
    out_latent= Dense(latent_dim, activation='linear')(d1)
    return Model(inputs=it, outputs=out_latent), redim_shape

def decoder_model(input_dim, redim_shape, L=1, filters=8, kernel_s=10, pool=5):
    it = Input(shape=(input_dim,))
    d1 = Dense(512, activation='relu')(it)
    d1 = Dense(int(np.prod(redim_shape)), activation='relu')(d1)
    f1 = Reshape(redim_shape)(d1)
    
    filters = int(filters*2**(L-1))
    for _ in range(L):
        f1 = UpSampling1D(pool)(f1)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        filters = int(filters/2)
    out_x = Conv1D(1, kernel_s, strides=1, padding='same', activation='linear')(f1)
    return Model(inputs=it, outputs=out_x)

### convolutional and then rnn (as text)

In [20]:
def encoder_model(input_dim, latent_dim, L1=1, filters=8,kernel_s =10, pool=5, L2=1, units=32):
    it = Input(shape=(input_dim,1)) 
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(it)
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
    f1 = MaxPool1D(pool)(f1)
    for _ in range(L1-1):
        filters = int(filters*2)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = MaxPool1D(pool)(f1)
    T = f1.shape[1]
    for _ in range(L2):
        f1 = GRU(units,return_sequences=True)(f1)
        units = int(units*2)
    redim_shape = (int(T),int(f1.shape[2]))
    #global max pooling?
    #d1 = GlobalMaxPool1D()(f1) #or global maxpooling
    d1 = GRU(int(units/2),return_sequences=False)(f1)
    d1 = Dense(512,activation='relu')(d1)
    out_latent= Dense(latent_dim, activation='linear')(d1)
    return Model(inputs=it, outputs=out_latent), redim_shape

#... falta decoder..

### RNN jerárquica

In [25]:
def encoder_model(input_dim, latent_dim, L=1, units=8):
    it = Input(shape=(input_dim,1))
    #for level in range(Level:
    
    splits = 10
    T_w = int(input_dim/splits) #y si sobra?...
    f1 = Reshape([splits, T_w, 1])(it)
    
    aux_units = units
    for _ in range(L):
        f1 = TimeDistributed(GRU(aux_units, return_sequences=True))(f1)
        aux_units = int(aux_units*2)
    #terminar..
    f1 = TimeDistributed(GRU(int(units/2), return_sequences=False))(f1)
    
    #ultimo nivel..
    aux_units = units
    for _ in range(L):
        f1 = GRU(aux_units, return_sequences=True)(f1)
        aux_units = int(aux_units*2)

    redim_shape = (splits, int(f1.shape[2]))
    #terminar..
    d1 = GRU(int(aux_units/2), return_sequences=False)(f1)
    d1 = Dense(512, activation='relu')(d1)
    out_latent= Dense(latent_dim, activation='linear')(d1)
    return Model(inputs=it, outputs=out_latent), redim_shape

### Dynamic K-max pooling

In [24]:
from keras.engine import Layer, InputSpec
import tensorflow as tf

class KMaxPooling(Layer):
    """
    K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension). TensorFlow backend.
    """
    def __init__(self, k=1, **kwargs):
        super().__init__(**kwargs)
        self.input_spec = InputSpec(ndim=3)
        self.k = k

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.k, input_shape[2])

    def call(self, inputs):
        # swap last two dimensions since top_k will be applied along the last dimension
        shifted_input = tf.transpose(inputs, [0, 2, 1])
        
        # extract top_k, returns two tensors [values, indices]
        top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] 
        top_k = -tf.nn.top_k(-shifted_input, k=self.k, sorted=True, name=None)[0] #min
        return tf.transpose(top_k, [0, 2, 1])
    
import math
def K_l(L,seq_len,k_top,layer=1): #dinamyc k-max
    return max(k_top, math.ceil( seq_len*(L-layer)/L) )

In [17]:
def encoder_model(input_dim, latent_dim, L=1, filters=8,kernel_s =10, pool=5, K_top=500):
    #k_top = 500 #podría ser el periodo más corto entre los datos...-- largo final de codificacion conv...
    it = Input(shape=(input_dim,1)) 
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(it)
    f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
    f1 = KMaxPooling(k = K_l(L, input_dim, K_top, layer=1))(f1)
    for l in range(L-1):
        filters = int(filters*2)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = Conv1D(filters, kernel_s, strides=1, padding='same', activation='relu')(f1)
        f1 = KMaxPooling(k = K_l(L, input_dim, K_top, layer=l+2))(f1) #como se desencodea??

    #d1 = GlobalMaxPool1D()(f1) #or global maxpooling--inverse es repeat vector..
    redim_shape = (int(f1.shape[1]),int(f1.shape[2]))
    d1 = Flatten()(f1) 
    d1 = Dense(512,activation='relu')(d1)
    out_latent= Dense(latent_dim, activation='linear')(d1)
    return Model(inputs=it, outputs=out_latent), redim_shape

### build Encoder

In [26]:
#encoder, redim_shape = encoder_model(T, 32, L=3, filters=8, pool=5)
#encoder, redim_shape = encoder_model(T, 32, L1=3, filters=8, pool=5,L2=3, units=32)
#encoder, redim_shape = encoder_model(T, 32, L=3, filters=8, pool=5, K_top=100)
encoder, redim_shape = encoder_model(T, 32, L=3, units=8)

encoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 70000, 1)          0         
_________________________________________________________________
reshape_9 (Reshape)          (None, 10, 7000, 1)       0         
_________________________________________________________________
time_distributed_16 (TimeDis (None, 10, 7000, 8)       240       
_________________________________________________________________
time_distributed_17 (TimeDis (None, 10, 7000, 16)      1200      
_________________________________________________________________
time_distributed_18 (TimeDis (None, 10, 7000, 32)      4704      
_________________________________________________________________
time_distributed_19 (TimeDis (None, 10, 4)             444       
_________________________________________________________________
gru_30 (GRU)                 (None, 10, 8)             312       
__________

### build Decoder

In [21]:
decoder = decoder_model(32, redim_shape, L=3, filters=8, pool=5)
decoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 32)                0         
_________________________________________________________________
dense_17 (Dense)             (None, 512)               16896     
_________________________________________________________________
dense_18 (Dense)             (None, 17920)             9192960   
_________________________________________________________________
reshape_7 (Reshape)          (None, 560, 32)           0         
_________________________________________________________________
up_sampling1d_20 (UpSampling (None, 2800, 32)          0         
_________________________________________________________________
conv1d_56 (Conv1D)           (None, 2800, 32)          10272     
_________________________________________________________________
conv1d_57 (Conv1D)           (None, 2800, 32)          10272     
__________

### build Autoencoder

In [28]:
it = Input(shape=(T,1))
out = decoder(encoder(it))
autoencoder = Model(it,out)
autoencoder.summary()

NameError: name 'decoder' is not defined

In [27]:
#quizas sea necesario definir una loss ...
autoencoder.compile(loss='mse',optimizer='adam') #focal loss?

NameError: name 'autoencoder' is not defined

In [25]:
def train_model(model,X,y,epochs=1,batch_size=32):
    return model.fit(X,y, epochs=epochs, batch_size=batch_size, validation_split=0.2)
    

In [None]:
train_model(autoencoder, X_train, X_train,batch_size=256)

Train on 3753 samples, validate on 939 samples
Epoch 1/1


In [None]:
autoencoder.predict(X_train)

In [4]:
from evaluation import calculate_metrics
aux = calculate_metrics(y_train,np.ones((N_train,1)))

                F1 macro  F1 micro    F1 raw  F1 weighted  Precision raw  \
False Positive  0.274583  0.274583  0.000000     0.207868       0.000000   
Confirmed       0.274583  0.274583  0.549165     0.207868       0.378517   

                Recall raw  
False Positive         0.0  
Confirmed              1.0  


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
