In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt
from keras.layers import Conv1D , Dropout , Flatten , MaxPooling1D, Dense, Input
from keras.layers.core import Lambda
from keras.models import Model , load_model
import random
import h5py

### Descarga y distribución de datos

In [2]:
# Cargamos los datos
path_local_data = '/home/anell/Desktop/Bovy/AnellExercises/Fits_files'
with h5py.File(f'{path_local_data}/apogeedr14_gaiadr2_with_spectrum_probando_rendimiento_1.h5') as F:  
    parallax = np.array(F['parallax'])
    parallax_error = np.array(F['parallax_err'])
    spectra = np.array(F['spectra'])
    Kmag = np.array(F['corrected_magnitude_K'])
    bp_rp = np.array(F['bp_rp'])
    Gmag = np.array(F['phot_g_mean_mag'])
    teff = np.array(F['NN_teff'])
    apogee_id = np.array(F['APOGEE_ID'])
    snr = np.array(F['SNR'])
    fe_h = np.array(F['Fe/H'])
    path_spectra = np.array(F['Path_spectra'])

  This is separate from the ipykernel package so we can avoid doing imports until


In [3]:
parallax.shape , spectra.shape , Kmag.shape

((57696,), (57696, 7514), (57696,))

In [4]:
cut_snr =[]
for i in range(len(snr)):
    if snr[i] > 20:
        cut_snr.append(i)
        
parallax = parallax[cut_snr]
parallax_error = parallax_error[cut_snr]
spectra = spectra[cut_snr]
Kmag = Kmag[cut_snr]
bp_rp = bp_rp[cut_snr]
Gmag = Gmag[cut_snr]
teff = teff[cut_snr]
apogee_id = apogee_id[cut_snr]
snr = snr[cut_snr]
fe_h = fe_h[cut_snr]
path_spectra = path_spectra[cut_snr]

In [5]:
parallax.shape, spectra.shape, Kmag.shape

((57565,), (57565, 7514), (57565,))

In [6]:
# Normalizamos Gmag , el color (G_bp - G_rp) y teff
Gmag_std = np.std(Gmag)
Gmag_mean = np.mean(Gmag)
Gmag_norm = (Gmag - Gmag_mean) / Gmag_std

bp_rp_std = np.std(bp_rp)
bp_rp_mean = np.mean(bp_rp)
bp_rp_norm = (bp_rp - bp_rp_mean) / bp_rp_std

teff_std = np.std(teff)
teff_mean = np.mean(teff)
teff_norm = (teff - teff_mean) / teff_std


#EStablecemos las variables que entrarán a la red y corregimos sus dimensiones
X = np.expand_dims(spectra,axis = 2)
Y = np.expand_dims(parallax,axis=1)
Y_error = np.expand_dims(parallax_error,axis=1)
K_mag = np.expand_dims(Kmag,axis=1)
G_mag = np.expand_dims(Gmag_norm,axis=1)
Bp_Rp = np.expand_dims(bp_rp_norm,axis=1)
Teff = np.expand_dims(teff_norm,axis=1)
Snr = np.expand_dims(snr,axis=1)
FeH = np.expand_dims(fe_h,axis=1)
Teff_without_norm = np.expand_dims(teff,axis=1)

X_offset = np.concatenate((G_mag, Bp_Rp , Teff), axis = 1) 

#Aleatorizamos la muestra
idx = []
for i in range(len(X)):
    idx.append(i)
random.seed(20)
random.shuffle(idx)
    
X = X[idx]                                       # shape: (15644, 7514 , 1)   
Y = Y[idx]                                       # shape: (15644, 1)  
K_mag = K_mag[idx]                               # shape: (15644, 1) 
X_offset = X_offset[idx]                         # shape: (15644, 3)
SNR = Snr[idx]                                   # shape: (15644, 1)
FeH = FeH[idx]                                   # shape: (15644, 1)
Teff_without_norm = Teff_without_norm[idx]       # shape: (15644, 1)

In [7]:
X.shape , Y.shape, parallax.shape

((57565, 7514, 1), (57565, 1), (57565,))

In [8]:
#Definición del modelo
def ApogeeDR14GaiaDR2(dim_t , dim_n): 
    """
    INPUT: 
    dim_t - number of time steps of spectrum 
    dim_n - number of features of spectrum
    """
    
    #SPECTRUM TO LUINOSITY
    dim_1 = 1 # number of corrected magnitude for one example 
    units = 1 #number of final output for one example
    inputs_spectra = Input(shape=(dim_t, dim_n)) 
    inputs_mag = Input(shape=(dim_1,), name="ApparentMagnitude-input")
    
    x_parallax = Conv1D(filters=4, kernel_size=2, activation='relu')(inputs_spectra)
    x_parallax = MaxPooling1D(pool_size=2)(x_parallax)
    x_parallax = Flatten()(x_parallax)
    x_parallax = Dense(164, activation='relu')(x_parallax) #relu
    x_parallax = Dense(164, activation='tanh')(x_parallax) #tanh
    x_parallax = Dense(64, activation='relu')(x_parallax) 
    x_parallax = Dense(64, activation='tanh')(x_parallax)
    x_parallax = Dense(32, activation='relu')(x_parallax)
    x_parallax = Dense(units, activation='softplus')(x_parallax)
    
    outputs_parallax = Lambda(lambda function: tf.math.multiply(function[0], tf.math.pow(10., 
                              tf.math.multiply(-0.2, function[1]))),
                              name='luminosity-to-parallax')([x_parallax, inputs_mag])
   
    #OFFSET CORRECTION : (optimization)
    inputs_offset = Input(shape=(3,), name="Offset-input")
    x_offset = Dense(64, activation='relu')(inputs_offset)
    x_offset = Dense(32, activation='relu')(x_offset) 
    x_offset = Dense(units, activation='tanh')(x_offset) 
    
    outputs_parallax_with_offset = Lambda(lambda function: tf.math.add(function[0], function[1]),
                                          name="Sum-parallax-offset")([outputs_parallax, x_offset]) 
    
    #Model setup
    model =  Model(inputs = [inputs_spectra,inputs_mag, inputs_offset],outputs = [outputs_parallax_with_offset])
    
    return model 

In [9]:
#Formación del set de entrenamiento
good_idx_snr = []
bad_idx_snr = []
for i in range(len(SNR)):
    if SNR[i] >= 200:           
        good_idx_snr.append(i)
    else:
        bad_idx_snr.append(i)

#SET ENTRENAMIENTO
X_train_snr = np.concatenate((X[good_idx_snr][:4000],X[bad_idx_snr][:2000]),axis = 0)
Y_train_snr = np.concatenate((Y[good_idx_snr][:4000],Y[bad_idx_snr][:2000]),axis = 0)
K_mag_train_snr = np.concatenate((K_mag[good_idx_snr][:4000],K_mag[bad_idx_snr][:2000]),axis = 0)
X_offset_train_snr = np.concatenate((X_offset[good_idx_snr][:4000],X_offset[bad_idx_snr][:2000]),axis = 0)

idx_random = []
for i in range(len(X_train_snr)):
    idx_random.append(i)
random.seed(1000)
random.shuffle(idx_random)

X_train_snr = X_train_snr[idx_random]
Y_train_snr = Y_train_snr[idx_random]
K_mag_train_snr = K_mag_train_snr[idx_random]
X_offset_train_snr = X_offset_train_snr[idx_random]

#SET DE VALIDACIÓN
#X_val_snr = np.concatenate((X[good_idx_snr][4000:5500],X[bad_idx_snr][2000:2500]),axis = 0)
#Y_val_snr = np.concatenate((Y[good_idx_snr][4000:5500],Y[bad_idx_snr][2000:2500]),axis = 0)
#K_mag_val_snr = np.concatenate((K_mag[good_idx_snr][4000:5500],K_mag[bad_idx_snr][2000:2500]),axis = 0)
#X_offset_val_snr = np.concatenate((X_offset[good_idx_snr][4000:5500],X_offset[bad_idx_snr][2000:2500]),axis = 0)

#idx_random_val = []
#for i in range(len(X_val_snr)):
#    idx_random_val.append(i)
#random.seed(5000)
#random.shuffle(idx_random_val)

#X_val_snr = X_val_snr[idx_random_val]
#Y_val_snr = Y_val_snr[idx_random_val]
#K_mag_val_snr = K_mag_val_snr[idx_random_val]
#X_offset_val_snr = X_offset_val_snr[idx_random_val]

In [10]:
n_timesteps, n_features = X_train_snr.shape[1], X_train_snr.shape[2]

Global_model = ApogeeDR14GaiaDR2(n_timesteps , n_features)

Global_model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 7514, 1)]    0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 7513, 4)      12          input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D)    (None, 3756, 4)      0           conv1d[0][0]                     
__________________________________________________________________________________________________
flatten (Flatten)               (None, 15024)        0           max_pooling1d[0][0]              
_______________________________________________________________________________________

In [11]:
#Paré el entrenamiento antes para que no se sobreajustara
Global_model.compile(optimizer='adam', loss='mse', metrics=['mse'])
Global_model.fit([X_train_snr, K_mag_train_snr , X_offset_train_snr], Y_train_snr, epochs=300, batch_size=128, verbose=1)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 1

Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 

Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


<tensorflow.python.keras.callbacks.History at 0x7f0b5c616990>

In [18]:
Global_model.save('probando_rendimiento_1_SNR(4000-2000).h5')

In [None]:
#Global_model.compile(optimizer='adam', loss='mse', metrics=['mse'])

#path_local = "/home/anell/Desktop/AnellExercises/Models/"

#callbacks = [ModelCheckpoint(f'{path_local}/model_probando_rendimiento_1_SNR(4000-2000)_2.h5', verbose=1, save_best_only=True)]

#Global_model.fit([X_train_snr,K_mag_train_snr,X_offset_train_snr], Y_train_snr, batch_size=128, #shuffle='batch', 
#                 epochs=300, callbacks=callbacks, 
#                 validation_data=([X_val_snr,K_mag_val_snr,X_offset_val_snr], Y_val_snr))