# This notebook is for checking for null in .npy files

In [2]:
from load_MWA_data import get_files_list, load_feature_datasets

base_dir = '/home/isaaccolleran/Documents/sgan/'

# labelled training files
path_to_data = base_dir + 'MWA_cands/'
pfd_files, pfd_labels = get_files_list(path_to_data, 'training_labels.csv')

# validation files
path_to_validation = base_dir + 'MWA_validation/'
validation_files, validation_labels = get_files_list(path_to_validation, 'validation_labels.csv')

# unlabelled training files
path_to_unlabelled = base_dir + 'MWA_unlabelled_cands/'
unlabelled_files, unlabelled_labels = get_files_list(path_to_unlabelled, 'training_labels.csv')

# loading the physical data
dm_curve_data, freq_phase_data, pulse_profile_data, time_phase_data = load_feature_datasets(pfd_files)
validation_dm_curve_data, validation_freq_phase_data, validation_pulse_profile_data, validation_time_phase_data = load_feature_datasets(validation_files)
unlabelled_dm_curve_data, unlabelled_freq_phase_data, unlabelled_pulse_profile_data, unlabelled_time_phase_data = load_feature_datasets(unlabelled_files)

# combining labels and data
dm_curve_dataset = [dm_curve_data, pfd_labels]
dm_curve_validation_dataset = [validation_dm_curve_data, validation_labels]
dm_curve_unlabelled_dataset = [unlabelled_dm_curve_data, unlabelled_labels]

In [3]:
print(dm_curve_data.shape, freq_phase_data.shape)

print(dm_curve_data[0,].shape, freq_phase_data[0,].shape)

(450, 60, 1) (450, 48, 48, 1)
(60, 1) (48, 48, 1)


In [9]:
import numpy as np

for idx, name in enumerate(pfd_files):
    # 
    # print('')
    # print('-----' , idx, '-----')
    # print(name)

    # dm curve
    inf = np.any(np.isinf(dm_curve_data[idx,]))
    nan = np.any(np.isnan(dm_curve_data[idx,]))
    zero = np.all(dm_curve_data[idx,] == 0)

    if inf or nan:
        print('bad candidate', idx, name)

    # print('DM CURVE: inf = %s , nan = %s' %(inf, nan))

    # freq-phase
    inf = np.any(np.isinf(freq_phase_data[idx,]))
    nan = np.any(np.isnan(freq_phase_data[idx,]))
    zero = np.all(freq_phase_data[idx,] == 0)


    # print('FR-PHASE: inf = %s , nan = %s' %(inf, nan))

    # pulse profile
    inf = np.any(np.isinf(pulse_profile_data[idx,]))
    nan = np.any(np.isnan(pulse_profile_data[idx,]))
    zero = np.all(pulse_profile_data[idx,] == 0)

    # print('PULSE PR: inf = %s , nan = %s' %(inf, nan))

    # time-phase
    inf = np.any(np.isinf(time_phase_data[idx,]))
    nan = np.any(np.isnan(time_phase_data[idx,]))
    zero = np.all(time_phase_data[idx,] == 0)



In [10]:
for idx, name in enumerate(validation_files):
    # 
    # print('')
    # print('-----' , idx, '-----')
    # print(name)

    # dm curve
    inf = np.any(np.isinf(validation_dm_curve_data[idx,]))
    nan = np.any(np.isnan(validation_dm_curve_data[idx,]))

    if inf or nan:
        print('bad candidate', idx, name)

    # print('DM CURVE: inf = %s , nan = %s' %(inf, nan))

    # freq-phase
    inf = np.any(np.isinf(validation_freq_phase_data[idx,]))
    nan = np.any(np.isnan(validation_freq_phase_data[idx,]))

    # print('FR-PHASE: inf = %s , nan = %s' %(inf, nan))

    # pulse profile
    inf = np.any(np.isinf(validation_pulse_profile_data[idx,]))
    nan = np.any(np.isnan(validation_pulse_profile_data[idx,]))

    # print('PULSE PR: inf = %s , nan = %s' %(inf, nan))

    # time-phase
    inf = np.any(np.isinf(validation_time_phase_data[idx,]))
    nan = np.any(np.isnan(validation_time_phase_data[idx,]))

    # print('TI-PHASE: inf = %s , nan = %s' %(inf, nan))


In [11]:
for idx, name in enumerate(unlabelled_files):
    # 
    # print('')
    # print('-----' , idx, '-----')
    # print(name)

    # dm curve
    inf = np.any(np.isinf(unlabelled_dm_curve_data[idx,]))
    nan = np.any(np.isnan(unlabelled_dm_curve_data[idx,]))

    if inf or nan:
        print('bad candidate', idx, name)

    # print('DM CURVE: inf = %s , nan = %s' %(inf, nan))

    # freq-phase
    inf = np.any(np.isinf(unlabelled_freq_phase_data[idx,]))
    nan = np.any(np.isnan(unlabelled_freq_phase_data[idx,]))

    # print('FR-PHASE: inf = %s , nan = %s' %(inf, nan))

    # pulse profile
    inf = np.any(np.isinf(unlabelled_pulse_profile_data[idx,]))
    nan = np.any(np.isnan(unlabelled_pulse_profile_data[idx,]))

    # print('PULSE PR: inf = %s , nan = %s' %(inf, nan))

    # time-phase
    inf = np.any(np.isinf(unlabelled_time_phase_data[idx,]))
    nan = np.any(np.isnan(unlabelled_time_phase_data[idx,]))


In [15]:
from classifiers import Train_SGAN_DM_Curve, Train_SGAN_Freq_Phase, Train_SGAN_Time_Phase, Train_SGAN_Pulse_Profile

batch_size = 16

dm_curve_instance = Train_SGAN_DM_Curve(dm_curve_data, pfd_labels, validation_dm_curve_data, validation_labels, unlabelled_dm_curve_data, unlabelled_labels, batch_size)
pulse_profile_instance = Train_SGAN_Pulse_Profile(pulse_profile_data, pfd_labels, validation_pulse_profile_data, validation_labels, unlabelled_pulse_profile_data, unlabelled_labels, batch_size)
freq_phase_instance = Train_SGAN_Freq_Phase(freq_phase_data, pfd_labels, validation_freq_phase_data, validation_labels, unlabelled_freq_phase_data, unlabelled_labels, batch_size)
time_phase_instance = Train_SGAN_Time_Phase(time_phase_data, pfd_labels, validation_time_phase_data, validation_labels, unlabelled_time_phase_data, unlabelled_labels, batch_size)


# retraining freq_phase model ##################

d_model, c_model = freq_phase_instance.define_discriminator()
generator = freq_phase_instance.define_generator()
gan = freq_phase_instance.define_gan(generator, d_model)
freq_phase_instance.train(generator, d_model, c_model, gan, n_epochs=25)

# d_model, c_model = time_phase_instance.define_discriminator()
# generator = time_phase_instance.define_generator()
# gan = time_phase_instance.define_gan(generator, d_model)
# time_phase_instance.train(generator, d_model, c_model, gan, n_epochs=25)

2021-09-29 11:16:35.458650: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-29 11:16:35.458678: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-29 11:16:36.997034: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-29 11:16:36.997745: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-09-29 11:16:37.020564: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2021-09-29 11:16:37.020612: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: cira-fishbowl-4
2021-09-29 11:16:37.020624: I tensorflow/str

batch per epoch is 252
n_epochs=25, n_batch=16, 1/2=8, b/e=252, steps=6300


2021-09-29 11:16:37.569695: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-29 11:16:37.590385: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3193555000 Hz


>1, c[0.683,62], d[0.452,1.024], g[0.111]
>2, c[0.819,25], d[0.334,1.032], g[0.055]
>3, c[0.650,62], d[0.376,1.056], g[0.090]
>4, c[0.627,75], d[0.339,1.083], g[0.052]
>5, c[0.702,62], d[0.148,1.085], g[0.014]
>6, c[0.897,50], d[0.316,1.028], g[0.067]
>7, c[0.522,75], d[0.382,1.086], g[0.103]
>8, c[0.936,25], d[0.393,1.022], g[0.187]
>9, c[0.650,62], d[0.360,1.034], g[0.182]
>10, c[0.790,25], d[0.339,1.031], g[0.185]
>11, c[0.685,25], d[0.248,0.454], g[0.006]
>12, c[0.864,25], d[0.127,1.499], g[0.077]
>13, c[0.886,25], d[0.312,1.009], g[0.299]
>14, c[0.687,50], d[0.357,0.990], g[0.318]
>15, c[0.673,50], d[0.240,0.977], g[0.286]
>16, c[0.667,62], d[0.098,1.018], g[0.290]
>17, c[0.723,50], d[0.224,0.973], g[0.395]
>18, c[0.705,38], d[0.176,0.967], g[0.686]
>19, c[0.657,75], d[0.018,0.957], g[1.414]
>20, c[0.859,38], d[0.469,0.826], g[1.434]
>21, c[0.727,38], d[0.266,1.154], g[1.138]
>22, c[0.634,62], d[0.274,0.745], g[3.350]
>23, c[0.623,75], d[0.593,0.715], g[3.633]
>24, c[0.732,75], d[