In [1]:
import sys
import os
from glob import glob

import matplotlib.pyplot as plt
import pandas as pd

# Avoids warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
import tensorflow as tf

import ScalableLib.classifier.Multiband as multiband

2024-08-28 09:23:26.594974: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-28 09:23:26.595008: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-28 09:23:26.600715: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# To see if the system recognises the GPU
device = 0
devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.set_visible_devices(devices[device], 'GPU')
tf.config.experimental.set_memory_growth(device=devices[device], enable=True)

device_name = tf.config.experimental.get_device_details(devices[device])['device_name']
print("Using {}".format(device_name))

Using NVIDIA GeForce RTX 3080


Find the different folds and train a model using the stored data.

In [3]:
survey = 'ZTF'
path = os.path.join('../../02_CreateRecords/', survey, 'Folds/Fold_*',)
folds = glob(path)
folds.sort()
folds

['../../02_CreateRecords/ZTF/Folds/Fold_1',
 '../../02_CreateRecords/ZTF/Folds/Fold_2',
 '../../02_CreateRecords/ZTF/Folds/Fold_3',
 '../../02_CreateRecords/ZTF/Folds/Fold_4',
 '../../02_CreateRecords/ZTF/Folds/Fold_5',
 '../../02_CreateRecords/ZTF/Folds/Fold_6',
 '../../02_CreateRecords/ZTF/Folds/Fold_7']

Create folder results

In [4]:
if not os.path.exists('./Results'):
    os.mkdir('./Results')


Define the arguments for all the models.

In [5]:
train_args = {
            'hidden_size_bands':[128,128, 128],
            'hidden_size_central':[128, 128],
            'fc_layers_bands':[128,128,128],
            'fc_layers_central':[128,128,128], # Neurons of each layer
            'regression_size':[128, 128],#each element is a layer with that size.
            'buffer_size':10000,
            'epochs':1000,
            'num_threads':7,
            'batch_size':128,
            'dropout':0.30,
            'lr':[[5e-3]*2, 2.5e-3], # [[band1, band2], central]
            'val_steps':50,
            'max_to_keep':0, # Not Used 
            'steps_wait':500, 
            'use_class_weights':True,# Not Used as intended, for initialization
            'mode' : 'classifier'
            }
loss_weights = {'Class':1.0}

callbacks_args = {'patience': 20,
                  'mode':'max',
                  'restore_best_weights':True,
                  'min_delta': 0.001
                 }
train_args_specific={
                    'phys_params': [],
                    'use_output_bands' : True,  # Working
                    'use_output_central' : False, # Not used
                    'use_common_layers' : False, # NOT Working
                    'bidirectional_central' : False,# Working
                    'bidirectional_band' : False,# Not Working
                    'layer_norm_params' : None, # Used to normalyze common layers
                    'use_gated_common' : False, # Working
                    'l1':0.0,
                    'l2':0.0,  
                    'N_skip': 3, # Cannot be greater than the number of timesteps
                    'use_raw_input_central': False,
                    'train_steps_central' : 2,
                    'print_report' : True,
                    'loss_weights_central' : loss_weights,
                    'callbacks_args':callbacks_args
                    }



In [None]:
for fold in folds[::-1]:
    tf.keras.backend.clear_session()
    # Set the fold path
    base_dir = fold+'/'
    
    # Set the save path for this fold. Create folder if needed
    path_results_fold = fold.replace('../../02_CreateRecords/'+survey+'/', './').replace('/Folds/', '/Results/')
    if not os.path.exists(path_results_fold):
        os.mkdir(path_results_fold)
    
    train_args_specific['save_dir'] = path_results_fold
    train_args_specific['metadata_pre_path'] = base_dir+'metadata_preprocess.json'
    train_args_specific['path_scalers'] =  os.path.join(fold,'scalers.pkl')
                                 
    # Define the train args
    train_args = {**train_args, **train_args_specific}


    train_files = base_dir+'train/*.tfrecord'
    val_files = base_dir+'val/*.tfrecord'
    test_files = base_dir+'test/*.tfrecord' 
    
    new = multiband.Network()
    new.train(train_args, train_files, val_files, test_files)    
    new.train_loop()

./Results/Fold_7/Models/20240828-0924
Start training


I0000 00:00:1724851471.984069  618524 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Early Stopping
              precision    recall  f1-score   support

         AGN       0.64      0.58      0.61       484
      Blazar       0.75      0.55      0.63       218
     CV/Nova       0.79      0.83      0.81       152
           E       0.93      0.94      0.93      2000
         LPV       0.99      1.00      1.00      1863
         QSO       0.88      0.92      0.90      2000
         RRL       0.94      0.93      0.93      2000
        SNIa       0.98      0.98      0.98       101
         YSO       0.85      0.73      0.79       159

    accuracy                           0.91      8977
   macro avg       0.86      0.83      0.84      8977
weighted avg       0.91      0.91      0.91      8977

./Results/Fold_6/Models/20240828-2030
Start training
Early Stopping
              precision    recall  f1-score   support

         AGN       0.67      0.60      0.63       484
      Blazar       0.74      0.56      0.64       218
     CV/Nova       0.81      0.84      0.82      