In [1]:
import tensorflow as tf
from tensorflow import keras

from keras.layers import Conv1D , Dropout , Flatten , MaxPooling1D, Dense, Input, BatchNormalization
from keras.layers.core import Lambda
from keras.models import Model , load_model

import numpy as np
import matplotlib.pyplot as plt
import random
import h5py

In [2]:
def ApogeeDR14GaiaDR2(dim_t , dim_n): 
    """
    INPUT: 
    dim_t - number of time steps of spectrum 
    dim_n - number of features of spectrum
    """
    
    #SPECTRUM TO LUINOSITY
    dim_1 = 1 # number of corrected magnitude for one example 
    units = 1 #number of final output for one example+
    
    inputs_spectra = Input(shape=(dim_t, dim_n), name="pseudo-lum-input") 
    inputs_mag = Input(shape=(dim_1,), name="K_mag")
    
    x_parallax = Conv1D(filters=2, kernel_size=3, activation='relu')(inputs_spectra)
    x_parallax = BatchNormalization()(x_parallax)
    x_parallax = MaxPooling1D(pool_size=2)(x_parallax)
    
    x_parallax = Conv1D(filters=4, kernel_size=3, activation='relu')(x_parallax)
    x_parallax = BatchNormalization()(x_parallax)
    x_parallax = MaxPooling1D(pool_size=2)(x_parallax)
    
    #x_parallax = MaxPooling1D(pool_size=2)(x_parallax)
    
    x_parallax = Flatten()(x_parallax)
    x_parallax = Dense(128, activation='relu')(x_parallax) 
    #x_parallax = Dense(128, activation='tanh')(x_parallax)
    x_parallax = Dense(64, activation='relu')(x_parallax) 
    #x_parallax = Dense(64, activation='tanh')(x_parallax)
    x_parallax = Dense(32, activation='relu')(x_parallax)
    x_parallax = Dense(units, activation='softplus', name="pseudo-lum")(x_parallax) 
      
    #OFFSET CORRECTION : (optimization)
    inputs_offset = Input(shape=(3,), name="offset-input")
    x_offset = Dense(64, activation='relu')(inputs_offset)
    x_offset = Dense(32, activation='relu')(x_offset) 
    x_offset = Dense(units, activation='tanh', name="offset")(x_offset) 
    
    #Functions
    outputs_parallax = Lambda(lambda function: tf.math.multiply(function[0], tf.math.pow(10., 
                              tf.math.multiply(-0.2, function[1]))),
                              name='parallax')([x_parallax, inputs_mag])
    
    outputs_parallax_with_offset = Lambda(lambda function: tf.math.add(function[0], function[1]),
                                          name="sum-parallax-offset")([outputs_parallax, x_offset]) 
    
    #Model setup
    model =  Model(inputs = [inputs_spectra,inputs_mag, inputs_offset],outputs = [outputs_parallax_with_offset])
    
    return model 

#### Data load 

In [None]:
# Cargamos los datos
#path_local_data = '/home/anell/Desktop/Bovy/AnellExercises/Fits_files'
path_local_data = '/home/bapanes/Research-Now/local/astronn-local/apo-gaia/'

with h5py.File(f'{path_local_data}/apogeedr14_gaiadr2_with_spectrum_probando_rendimiento_1.h5','r') as F:  
    parallax = np.array(F['parallax'])
    parallax_error = np.array(F['parallax_err'])
    spectra = np.array(F['spectra'])
    Kmag = np.array(F['corrected_magnitude_K'])
    bp_rp = np.array(F['bp_rp'])
    Gmag = np.array(F['phot_g_mean_mag'])
    teff = np.array(F['NN_teff'])
    apogee_id = np.array(F['APOGEE_ID'])
    snr = np.array(F['SNR'])
    fe_h = np.array(F['Fe/H'])
    path_spectra = np.array(F['Path_spectra'])

In [4]:
parallax.shape , spectra.shape , Kmag.shape

((57696,), (57696, 7514), (57696,))

In [5]:
#Establecemos las variables que entrarán a la red y corregimos sus dimensiones
X = np.expand_dims(spectra, axis = 2)
Y = np.expand_dims(parallax, axis = 1)
K_mag = np.expand_dims(Kmag, axis = 1)

# Normalizamos Gmag , el color (G_bp - G_rp) y teff
Gmag_std = np.std(Gmag)
Gmag_mean = np.mean(Gmag)
Gmag_norm = (Gmag - Gmag_mean) / Gmag_std

bp_rp_std = np.std(bp_rp)
bp_rp_mean = np.mean(bp_rp)
bp_rp_norm = (bp_rp - bp_rp_mean) / bp_rp_std

teff_std = np.std(teff)
teff_mean = np.mean(teff)
teff_norm = (teff - teff_mean) / teff_std

G_mag = np.expand_dims(Gmag_norm, axis=1)
Bp_Rp = np.expand_dims(bp_rp_norm, axis=1)
Teff = np.expand_dims(teff_norm, axis=1)

X_offset = np.concatenate((G_mag, Bp_Rp , Teff), axis = 1) 

In [6]:
print(X.shape , Y.shape, K_mag.shape, X_offset.shape)

(57696, 7514, 1) (57696, 1) (57696, 1) (57696, 3)


#### SNR cut

In [7]:
high_snr_idx = []
low_snr_idx = []

for i in range(len(snr)):
    if snr[i] >= 200:           
        high_snr_idx.append(i)
    else:
        low_snr_idx.append(i)

In [8]:
print(len(high_snr_idx), len(low_snr_idx))

25913 31783


In [9]:
X_train = X[high_snr_idx]
Y_train = Y[high_snr_idx]
K_mag_train = K_mag[high_snr_idx]
X_offset_train = X_offset[high_snr_idx]

#### Model

In [27]:
n_timesteps, n_features = X_train.shape[1], X_train.shape[2]

Global_model = ApogeeDR14GaiaDR2(n_timesteps , n_features)

Global_model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
pseudo-lum-input (InputLayer)   (None, 7514, 1)      0                                            
__________________________________________________________________________________________________
conv1d_14 (Conv1D)              (None, 7512, 2)      8           pseudo-lum-input[0][0]           
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 7512, 2)      8           conv1d_14[0][0]                  
__________________________________________________________________________________________________
max_pooling1d_11 (MaxPooling1D) (None, 3756, 2)      0           batch_normalization_5[0][0]      
____________________________________________________________________________________________

In [28]:
Global_model = ApogeeDR14GaiaDR2(n_timesteps , n_features)
Global_model.compile(optimizer='adam', loss='mse', metrics=['mse'])
Global_model.fit([X_train, K_mag_train , X_offset_train], Y_train, 
                 epochs=10, batch_size=128, verbose=1, shuffle="batch")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f240456de10>

In [None]:
pred = Global_model.predict([X_train, K_mag_train , X_offset_train])

In [None]:
target = Y_train

In [None]:
plt.scatter(target, pred, c="g", alpha=0.5)

plt.xlabel("target")
plt.ylabel("pred")

plt.ylim(0,100)
plt.xlim(0,100)
plt.show()