## DRAGON *DualFinder*: An Instance of the DRAGON CNN Architecture Specialized for Dual AGN Detection.
### Authors: Isaac Moskowitz and Jeremy Ng
### Collaborators: C. Meg Urry (PI), Aritra Ghosh. 
#### Began June 7, 2024


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import torch.nn as nn
import logging
import pandas as pd
import csv
import sys
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.model_selection import cross_val_score 
from keras import backend as K
import os
from os.path import exists
#import tensorflow_addons as tfa
import shutil
#from dual_finder import DualFinder, loadModelClass
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")
print(tf.config.list_physical_devices())
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
from astropy.io import fits

sys.path.append("DRAGON_Dual_Finder/dual_finder/cnn/")
sys.path.append("DRAGON_Dual_Finder/dual_finder/optimize/")
sys.path.append("DRAGON_Dual_Finder/dual_finder/preprocess_data/")
sys.path.append("DRAGON_Dual_Finder/dual_finder/visualize/")
from create_cnn import ModelCreator
from load_model import loadModelClass
from train_cnn import DualFinder
from extract_feature_maps import FeatureExtractor
from fits_utils import plot_dataset_sample
from process_data import make_datasets_other_bands, create_dataset
from optimize_hyperparameters import OptimizeHyperparameters
from visualize_performance import load_training_history, plot_training_progress, plot_grouped_training_progress, VisualizeOptimization

2024-06-14 09:48:31.735435: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-14 09:48:32.532198: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-14 09:48:32.532245: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-14 09:48:32.537731: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-14 09:48:32.940995: I tensorflow/core/platform/cpu_feature_g

GPU is available
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12883915840427824154
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 32647086080
locality {
  bus_id: 1
  links {
  }
}
incarnation: 10116416161925150891
physical_device_desc: "device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:18:00.0, compute capability: 7.0"
xla_global_id: 416903419
]


2024-06-14 09:48:55.773654: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:18:00.0, compute capability: 7.0


In [2]:
#os.chdir("~/")
#!pwd
empty_sky_filepath = "data_preprocessing/training_datasets/empty_space_dataset/empty_space_train_data/"
single_AGN_filepath = "data_preprocessing/training_datasets/single_AGN_datasets/confirmed_single_AGN/"
dual_AGN_filepath = "data_preprocessing/training_datasets/dual_AGN_datasets/train_data/"
offset_AGN_filepath = 'data_preprocessing/training_datasets/offset_AGN_datasets/train_data/'
stellar_filepath = "data_preprocessing/training_datasets/stellar_dataset/train_data/"
train_data_labels, val_data_labels, test_data_labels = create_dataset(empty_sky_filepath = empty_sky_filepath,
                                                                      dual_image_filepath = dual_AGN_filepath,
                                                                      stellar_filepath = stellar_filepath,
                                                                     offset_image_filepath = offset_AGN_filepath,
                                                                     single_image_filepath = single_AGN_filepath)
train_dataset, train_labels, train_filepaths = train_data_labels
val_dataset, val_labels, val_filepaths = val_data_labels
test_dataset, test_labels, test_filepaths = test_data_labels


2024-06-14 09:48:57,505 - INFO - Loading images from data_preprocessing/training_datasets/empty_space_dataset/empty_space_train_data/ with label empty_sky...
100%|██████████| 9215/9215 [00:39<00:00, 230.45it/s]
0it [00:00, ?it/s]2024-06-14 09:49:38.073264: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:18:00.0, compute capability: 7.0
9215it [00:55, 165.57it/s]
2024-06-14 09:50:35,293 - INFO - Loading images from data_preprocessing/training_datasets/single_AGN_datasets/confirmed_single_AGN/ with label single_AGN...


Loaded 36860 images with 36860 labels from data_preprocessing/training_datasets/empty_space_dataset/empty_space_train_data/


100%|██████████| 2000/2000 [00:08<00:00, 226.17it/s]
2000it [00:20, 99.98it/s] 
2024-06-14 09:51:04,775 - INFO - Loading images from data_preprocessing/training_datasets/dual_AGN_datasets/train_data/ with label dual_AGN...


Loaded 12000 images with 12000 labels from data_preprocessing/training_datasets/single_AGN_datasets/confirmed_single_AGN/
Length of single AGN images: 12000


100%|██████████| 35492/35492 [01:43<00:00, 344.56it/s]
2024-06-14 09:52:49,437 - INFO - expanding dims
2024-06-14 09:52:49,438 - INFO - Loading images from data_preprocessing/training_datasets/offset_AGN_datasets/train_data/ with label offset_AGN...


Loaded 35492 images with 35492 labels from data_preprocessing/training_datasets/dual_AGN_datasets/train_data/


100%|██████████| 28756/28756 [01:42<00:00, 279.52it/s]
2024-06-14 09:54:33,427 - INFO - expanding dims
2024-06-14 09:54:33,428 - INFO - Loading images from data_preprocessing/training_datasets/stellar_dataset/train_data/ with label star_AGN_align...


Loaded 28756 images with 28756 labels from data_preprocessing/training_datasets/offset_AGN_datasets/train_data/


100%|██████████| 28319/28319 [01:48<00:00, 261.61it/s]
2024-06-14 09:56:22,900 - INFO - expanding dims


Loaded 28319 images with 28319 labels from data_preprocessing/training_datasets/stellar_dataset/train_data/
Length of stellar images: (28319, 94, 94)
Total images: (141427, 94, 94, 1)
Total labels: (141427,)
Total filepaths: (141427,)
Train_dataset: (91927, 94, 94, 1)
Train_labels: (91927,)
Train_filepaths: (91927,)
Val_dataset: (28285, 94, 94, 1)
Val_labels: (28285,)
Val_filepaths: (28285,)
Test_dataset: (21215, 94, 94, 1)
Test_labels: (21215,)
Test_filepaths: (21215,)


In [3]:
print(np.shape(val_dataset))
print(np.shape(val_labels))

(28285, 94, 94, 1)
(28285,)


In [None]:
image_shape = (94,94,1)
epoch = 20
batch_size = 64
init_learning_rate = 1e-5
num_classes = 5
model_type = 'dualfinder'
importance_score = [1.0, 1.0]

dual_finder_instance = DualFinder(train_dataset, val_dataset, image_shape, train_labels, val_labels, epoch, batch_size, init_learning_rate, num_classes, model_type, importance_score, display_architecture = True)
history, dual_finder_model = dual_finder_instance.trainCNN(save_feature_maps = False)

['dual_AGN' 'offset_AGN' 'offset_AGN' ... 'star_AGN_align' 'dual_AGN'
 'empty_sky']
['offset_AGN' 'offset_AGN' 'offset_AGN' ... 'empty_sky' 'offset_AGN'
 'dual_AGN']
Converting to list
Converting to list
train_labels shape: (91927, 5)
val_labels shape: (28285, 5)
Unique train labels: [0. 1.]
Unique val labels: [0. 1.]
5
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 94, 94, 1)         0         
                                                                 
 conv2d_7 (Conv2D)           (None, 94, 94, 64)        640       
                                                                 
 dropout_8 (Dropout)         (None, 94, 94, 64)        0         
                                                                 
 batch_normalization_7 (Bat  (None, 94, 94, 64)        256       
 chNormalization)                                             

2024-06-14 10:19:41,930 - INFO - 'save_feature_maps' == False, NOT saving feature maps


train_images shape: (91927, 94, 94, 1)
train_labels shape: (91927, 5)
val_images shape: (28285, 94, 94, 1)
val_labels shape: (28285, 5)
Epoch 1/20


2024-06-14 10:19:47.609402: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_1/dropout_8/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer




2024-06-14 10:22:07,352 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 2/20


2024-06-14 10:24:23,426 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 3/20


2024-06-14 10:26:39,835 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 4/20


2024-06-14 10:28:55,809 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 5/20


2024-06-14 10:31:12,240 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 6/20


2024-06-14 10:33:28,255 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 7/20


2024-06-14 10:35:44,600 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 8/20


2024-06-14 10:38:00,673 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 9/20


2024-06-14 10:40:17,168 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 10/20


2024-06-14 10:42:33,201 - INFO - Assets written to: ../saved_dual_finder_models/_checkpoint_training/assets


Epoch 11/20

In [None]:
#print(history)
#print(type(history))
#print(history.history)
accuracy = history.history['accuracy']
loss = history.history['loss']
precision = history.history['precision']
recall = history.history['recall']
f1score = history.history['f1_score']

val_accuracy = history.history['val_accuracy']
val_loss = history.history['val_loss']
val_precision = history.history['val_precision']
val_recall = history.history['val_recall']
val_f1score = history.history['val_f1_score']
training_epochs = np.arange(0, epoch)
fig_save_filepath = 'DRAGON_Dual_Finder/saved_training_figures/'
if not exists(fig_save_filepath):
    os.makedirs(fig_save_filepath)
fig, ax = plot_training_progress(loss, accuracy, training_epochs, save_filepath = fig_save_filepath, training_run = "DRAGON DualFinder",
                                 recall = recall, precision = precision, f1_score = f1score,
                                 val_loss = val_loss, val_acc = val_accuracy, val_recall = val_recall, val_precision = val_precision, 
                                 val_f1_score = val_f1score)

In [None]:
eval_labels_encoded, _ = dual_finder_instance.encode_labels(test_labels, test_labels)
dual_finder_model.evaluate(test_dataset, eval_labels_encoded, batch_size = batch_size, verbose = 1)

In [None]:
len("data_preprocessing/training_datasets/")

In [None]:
dual_finder_instance.predict(dual_finder_model, test_dataset, test_filepaths)