In [1]:
import os
from glob import glob

# Avoids warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
import tensorflow as tf
import numpy as np
import pandas as pd

from ScalableLib.mixture.layers import create_models
from ScalableLib.classifier import Multiband as multiband

2024-10-02 14:40:17.706468: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-02 14:40:17.706502: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-02 14:40:17.707579: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# To see if the system recognises the GPU
device = 0
devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.set_visible_devices(devices[device], 'GPU')
tf.config.experimental.set_memory_growth(device=devices[device], enable=True)

device_name = tf.config.experimental.get_device_details(devices[device])['device_name']
print("Using {}".format(device_name))

Using NVIDIA GeForce RTX 3080


Find the different folds and train a model using the stored data.

In [3]:
survey = 'Gaia'
path = os.path.join('../../02_CreateRecords/', survey, 'Folds/Fold_*',)
folds = glob(path)
folds.sort()
folds

['../../02_CreateRecords/Gaia/Folds/Fold_1',
 '../../02_CreateRecords/Gaia/Folds/Fold_2',
 '../../02_CreateRecords/Gaia/Folds/Fold_3',
 '../../02_CreateRecords/Gaia/Folds/Fold_4',
 '../../02_CreateRecords/Gaia/Folds/Fold_5',
 '../../02_CreateRecords/Gaia/Folds/Fold_6',
 '../../02_CreateRecords/Gaia/Folds/Fold_7']

Create folder results

In [4]:
if not os.path.exists('./Results'):
    os.mkdir('./Results')


Define the arguments for all the models.

In [5]:
train_args = {
            'hidden_size_bands':[128, 128, 128],
            'hidden_size_central':[128, 128],
            'fc_layers_bands':[128,128,128],
            'fc_layers_central':[128,128,128], # Neurons of each layer
            'regression_size':[128, 128],#each element is a layer with that size.
            'buffer_size':10000,
            'epochs':1000,
            'num_threads':7,
            'batch_size':512,
            'dropout':0.40,
            'lr':[[1e-3]*2, 0.5e-3], # [[band1, band2], central]
            'val_steps':50,
            'max_to_keep':0, # Not Used 
            'steps_wait':500, 
            'use_class_weights':False,# Not Used
            'mode' : 'classifier'
            }
loss_weights = {'Class':1.0}

callbacks_args = {'patience': 20,
                  'mode':'max',
                  'restore_best_weights':True,
                  'min_delta': 0.001
                 }
train_args_specific={
                    'phys_params': [],
                    'use_output_bands' : True,  # Working
                    'use_output_central' : False, # Not used
                    'use_common_layers' : False, # NOT Working
                    'bidirectional_central' : False,# Working
                    'bidirectional_band' : False,# Not Working
                    'layer_norm_params' : None, # Used to normalyze common layers
                    'use_gated_common' : False, # Working
                    'l1':0.0,
                    'l2':0.0,   
                    'N_skip' : 8, # Cannot be greater than the number of timesteps
                    'use_raw_input_central': True,
                    'train_steps_central' : 2,
                    'print_report' : True,
                    'loss_weights_central' : loss_weights,
                    'callbacks_args':callbacks_args
                    }



In [6]:
sauces = []
for fold in folds:
    tf.keras.backend.clear_session()
    # Set the fold path
    base_dir = fold+'/'
    
    # Set the save path for this fold. Create folder if needed
    path_results_fold = fold.replace('../../02_CreateRecords/'+survey+'/', './').replace('/Folds/', '/Results/')
    if not os.path.exists(path_results_fold):
        os.mkdir(path_results_fold)    
        
    train_args_specific['save_dir'] = path_results_fold
    train_args_specific['metadata_pre_path'] = base_dir+'metadata_preprocess.json'  
    train_args_specific['path_scalers'] =  os.path.join(fold,'scalers.pkl')
    # Define the train args
    train_args = {**train_args, **train_args_specific}
    
    train_files = base_dir+'train/*.tfrecord'
    val_files = base_dir+'val/*.tfrecord'
    test_files = base_dir+'test/*.tfrecord'
    
    new = multiband.Network()    
    new.train(train_args, train_files, val_files, test_files)
    # Create the Mixture of Experts model, from the original multiband model
    model_base = create_models(new)
    model = model_base.creat_split_models(train_args)

    # Define the early stopping callback
    es = tf.keras.callbacks.EarlyStopping(monitor='accuracy', **callbacks_args)
    # Fit the model to the original data
    model.fit(new.dataset_train, validation_data=new.dataset_val, epochs=300,callbacks = [es] )

    # Store the alpha coefficients per fold
    sauces_ = {i.name:i for i in model.layers if 'Sauce_' in i.name}
    scales = {key:tf.nn.softmax(sauces_[key].scale).numpy() for key in sauces_.keys()}
    sauces.append(scales)
    # Test
    dfs = []
    for batch in new.dataset_test:
        prediction = model(batch[0])
        y_pred = prediction['Class'].numpy().argmax(axis=1)
        y_pred = [new.trans[i] for i in y_pred]
        ID = batch[0]['ID'].numpy()

        y_true = batch[1]['Class'].numpy().argmax(axis=1)
        y_true = [new.trans[i] for i in y_true]

        df = pd.DataFrame(np.array([ID, y_pred, y_true]).transpose(), columns=['ID', 'Class', 'Pred'])
        df.ID = df.ID.str.decode('UTF-8')
        dfs.append(df)
    dfs = pd.concat(dfs, axis=0)
    dfs.to_csv(path_results_fold+'/Classification_test.dat', index=False, index_label=False)
    
pd.DataFrame(sauces).to_csv(path_results_fold+'/sauces.dat', index=False, index_label=False)    

./Results/Fold_1/Models/20241002-1440
Epoch 1/300


I0000 00:00:1727894461.234394   30155 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
./Results/Fold_2/Models/20241002-1450
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoc