In [1]:
import sys
import os
from glob import glob

import matplotlib.pyplot as plt
import pandas as pd

# Avoids warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
import tensorflow as tf

import ScalableLib.classifier.Multiband as multiband

2024-09-03 19:40:34.979745: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-03 19:40:34.979771: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-03 19:40:34.980820: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


the filtering of the times must be done in the spine, not in each band. Spit all the times then slect the best one. 

In [2]:
# To see if the system recognises the GPU
device = 1
devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.set_visible_devices(devices[device], 'GPU')
tf.config.experimental.set_memory_growth(device=devices[device], enable=True)

device_name = tf.config.experimental.get_device_details(devices[device])['device_name']
print("Using {}".format(device_name))

Using NVIDIA GeForce RTX 2080 Ti


Find the different folds and train a model using the stored data.

In [3]:
survey = 'Gaia'
path = os.path.join('../../02_CreateRecords/Gaia/Folds/*',)
folds = glob(path)
folds.sort()
folds

['../../02_CreateRecords/Gaia/Folds/Fold_1',
 '../../02_CreateRecords/Gaia/Folds/Fold_2',
 '../../02_CreateRecords/Gaia/Folds/Fold_3',
 '../../02_CreateRecords/Gaia/Folds/Fold_4',
 '../../02_CreateRecords/Gaia/Folds/Fold_5',
 '../../02_CreateRecords/Gaia/Folds/Fold_6',
 '../../02_CreateRecords/Gaia/Folds/Fold_7']

Create folder results

In [4]:
if not os.path.exists('./Results'):
    os.mkdir('./Results')


Define the arguments for all the models.

In [5]:
train_args = {
            'hidden_size_bands':[128, 128, 128],
            'hidden_size_central':[128, 128],
            'fc_layers_bands':[128,128,128],
            'fc_layers_central':[128,128,128], # Neurons of each layer
            'regression_size':[128, 128],#each element is a layer with that size.
            'buffer_size':10000,
            'epochs':1000,
            'num_threads':7,
            'batch_size':512,
            # 'batch_size':1024,
            'dropout':0.40,
            'lr':[[1e-3]*2, 0.5e-3], # [[band1, band2], central]
            'val_steps':50,
            'max_to_keep':0, # Not Used 
            'steps_wait':500, 
            'use_class_weights':False,# Not Used
            'mode' : 'classifier+regression',
            }
loss_weights = {'Class':300.0, 'T_eff':20.0,'Radius':1e0}
# loss_weights = {'Class':300.0, 'T_eff':20.0,'Radius':1e0}

callbacks_args = {'patience': 20,
                  'mode':'max',
                  'restore_best_weights':True,
                  'min_delta': 0.001
                 }
train_args_specific={
                    'phys_params': ['T_eff', 'Radius'],
                    'use_output_bands' : True,  # Working
                    'use_output_central' : False, # Not used
                    'use_common_layers' : False, # NOT Working
                    'bidirectional_central' : False,# Working
                    'bidirectional_band' : False,# Not Working
                    'layer_norm_params' : None, # Used to normalyze common layers
                    'use_gated_common' : False, # Working
                    'l1':0.0,
                    'l2':0.0,   
                    'N_skip' : 8, # Cannot be greater than the number of timesteps
                    'use_raw_input_central': True,
                    'train_steps_central' : 2,
                    'print_report' : True,
                    'loss_weights_central' : loss_weights,
                    'callbacks_args':callbacks_args
                    }



In [6]:
for fold in folds:
    tf.keras.backend.clear_session()
    # Set the fold path
    base_dir = fold+'/'
    
    # Set the save path for this fold. Create folder if needed
    path_results_fold = fold.replace('../02_CreateRecords', '.').replace('/Folds/', '/Results/')

    if not os.path.exists(path_results_fold):
        os.mkdir(path_results_fold)    

    train_args_specific['save_dir'] = path_results_fold
    train_args_specific['metadata_pre_path'] = os.path.join(fold, 'metadata_preprocess.json')
    train_args_specific['path_scalers'] =  os.path.join(fold,'scalers')
    # Define the train args
    train_args = {**train_args, **train_args_specific}

    train_files = os.path.join(fold, 'train/*.tfrecord')
    val_files = os.path.join(fold, 'val/*.tfrecord')
    test_files = os.path.join(fold, 'test/*.tfrecord')
    
    new = multiband.Network()    
    new.train(train_args, train_files, val_files, test_files)
    new.train_loop()


.././Gaia/Results/Fold_1/Models/20240903-1940
Start training


I0000 00:00:1725406856.932022 1362404 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Early Stopping
              precision    recall  f1-score   support

         CEP       0.67      0.74      0.70      1166
  DSCT_SXPHE       0.80      0.62      0.70       680
     MIRA_SR       1.00      1.00      1.00      8000
        RRAB       0.87      0.90      0.89      8000
         RRC       0.78      0.73      0.76      3451
       T2CEP       0.58      0.44      0.50       236

    accuracy                           0.89     21533
   macro avg       0.78      0.74      0.76     21533
weighted avg       0.89      0.89      0.89     21533

{'R2': {'T_eff': 0.8498881371206601, 'Radius': 0.411020652321615}, 'RMSE': {'T_eff': 482.62323, 'Radius': 33.48507}}
.././Gaia/Results/Fold_2/Models/20240903-2310
Start training
Early Stopping
              precision    recall  f1-score   support

         CEP       0.65      0.75      0.70      1166
  DSCT_SXPHE       0.81      0.59      0.69       680
     MIRA_SR       1.00      1.00      1.00      8000
        RRAB       0.87      0.9

In [7]:
import numpy as np
# no target values are nan
for batch in new.dataset_train:
    val = np.isnan(batch[1]['Radius'].numpy()).sum()
    if val >0:
        print('Found NaN!')
        break

In [8]:
# No predictions are nan
for batch in new.dataset_train:
    r_pred = new.model_central(batch[0])['Radius'].numpy()
    val = np.isnan(r_pred).sum()
    if val >0:
        print('Found NaN!')
        break

Some batches are empty of Raiuds values

In [9]:
vals = []
for batch in new.dataset_train:
    val =batch[1]['Radius'].numpy()
    val2 = np.sum(batch[1]['Radius']<-90)/val.shape[0]
    vals.append(val2)
    if val2 ==1:
        print('Found NaN!')


In [10]:
np.sum(batch[1]['Radius']<-90)/val.shape[0]

0.87

In [11]:
new.model_central.compiled_loss.metrics

[<keras.src.metrics.base_metric.Mean at 0x7f5ffc521540>,
 <keras.src.metrics.base_metric.Mean at 0x7f5f82a5ab00>,
 <keras.src.metrics.base_metric.Mean at 0x7f608016a290>,
 <keras.src.metrics.base_metric.Mean at 0x7f5fbe4b9c00>]