In [18]:
import pandas as pd
import numpy as np
from os import listdir, makedirs
from os.path import join, isfile, basename

# info tables
src_info = f'/data01/homes/dipiano/E4/crab/sim/merged_data.dat'
bkg_info = f'/data01/homes/dipiano/E4/background/sim/merged_data.dat'

# datasets path
src_dataset_path = f'/data01/homes/dipiano/E4/crab/sim'
bkg_dataset_path = f'/data01/homes/dipiano/E4/background/sim'
datapath = {'SRC': src_dataset_path, 'BKG': bkg_dataset_path}

# datasets files
datafiles = {'SRC': [], 'BKG': []}
classes = datafiles.keys()
print(f"Classes: {classes}\n")
for k in classes:
    print(f"Class {k} data from: {datapath[k]}")
    datafiles[k] = sorted([join(datapath[k], f) for f in listdir(datapath[k]) if '.fits' in f and isfile(join(datapath[k], f))])
    
print(f"\nSRC dataset size: {len(datafiles['SRC'])}")
print(f"BKG dataset size: {len(datafiles['BKG'])}")

Classes: dict_keys(['SRC', 'BKG'])

Class SRC data from: /data01/homes/dipiano/E4/crab/sim
Class BKG data from: /data01/homes/dipiano/E4/background/sim

SRC dataset size: 50000
BKG dataset size: 50000


In [19]:
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from astropy.table import Table
from scipy.ndimage import gaussian_filter

# extract data utility
def extract_heatmap(data, trange, smoothing, nbins, save=False, save_name=None):
    data = data[(data['TIME'] >= trange[0]) & (data['TIME'] <= trange[1])] 
    ra = data['RA'].to_numpy()
    dec = data['DEC'].to_numpy()
    heatmap, xe, ye = np.histogram2d(ra, dec, bins=nbins)
    heatmap = gaussian_filter(heatmap, sigma=smoothing)
    if save and save_name is not None:
        np.save(save_name, heatmap, allow_pickle=True, fix_imports=True)
    return heatmap.T

# normalise heatmap
def normalise_heatmap(heatmap, save=False, save_name=None):
    min_value = np.min(heatmap)
    max_value = np.max(heatmap)
    norm_parameters = {'min': min_value, 'max': max_value}
    heatmap = (heatmap - min_value) / (max_value - min_value)
    if save and save_name is not None:
        np.save(save_name, heatmap, allow_pickle=True, fix_imports=True)
    return heatmap

# plot heatmap
def plot_heatmap(heatmap, title='heatmap', norm='linear', show=False, save=False, save_name=None):
    plt.figure()
    plt.title(title)
    if norm == 'linear':
        plt.imshow(heatmap, vmin=0, vmax=1)
    elif norm == 'log':
        plt.imshow(heatmap, norm=SymLogNorm(1, base=10), vmin=0, vmax=1)
    plt.xlabel('x(det) [pixels]')
    plt.ylabel('y(det) [pixels]')
    plt.colorbar()
    if save and save_name is not None:
        plt.savefig(save_name)
    if show:
        plt.show()
    plt.close()
    return

In [None]:
# map parameters
trange = [0, 50]
exposure = trange[1] - trange[0]
smoothing = 3
pixelsize = 0.02
nbins = int(5/pixelsize)
# execution options
plot_data = True
show_plot = False
save_plot = True
normalise_data = True
save_map = False
save_norm = False
# model param
class_sample = 50000
class_train_sample = 80
class_test_sample = class_sample - class_train_sample
total_train_size = class_train_sample * 2
total_test_size = class_test_sample * 2

# image datasets
makedirs(join(src_dataset_path, f'img_{exposure}s'), exist_ok=True)
makedirs(join(src_dataset_path, f'img_{exposure}s_normed'), exist_ok=True)
makedirs(join(bkg_dataset_path, f'img_{exposure}s'), exist_ok=True)
makedirs(join(bkg_dataset_path, f'img_{exposure}s_normed'), exist_ok=True)
pngpath = {'SRC': join(src_dataset_path, 'img'), 'SRC_NORMED': join(src_dataset_path, 'img_normed'),
           'BKG': join(bkg_dataset_path, 'img'), 'BKG_NORMED': join(bkg_dataset_path, 'img_normed')}


# gather data
datasets = {'SRC': [], 'BKG': []}
for k in classes:
    print(f"\nLoad {k} data...")
    for idx, f in enumerate(tqdm(datafiles[k][:class_sample])):
        # load
        heatmap = Table.read(f, hdu=1).to_pandas()
        # integrate exposure
        heatmap = extract_heatmap(heatmap, trange, smoothing, nbins, save=save_map, save_name=datafiles[k][idx].replace('.fits', f'_{exposure}s.npy'))
        # plot map
        if plot_data:
            plot_heatmap(heatmap, title='original', show=show_plot, save=save_plot, save_name=join(pngpath[k], basename(datafiles[k][idx].replace('.fits', f'_{exposure}s.png'))))
        # normalise map
        if normalise_data:
            heatmap = normalise_heatmap(heatmap, save=save_norm, save_name=datafiles[k][idx].replace('.fits', f'_{exposure}s_normed.npy'))
        # plot normalised map
        if plot_data:
            plot_heatmap(heatmap, title='normalised', show=show_plot, save=save_plot, save_name=join(pngpath[f'{k}_NORMED'], basename(datafiles[k][idx].replace('.fits', f'_{exposure}s_normed.png'))))
        # add to dataset
        datasets[k].append(heatmap)
        
print(f"DATASET {datasets.keys()}")
print(f"Sample SRC maps: {len(datasets['SRC'])}")
print(f"Sample BKG maps: {len(datasets['BKG'])}")


Load SRC data...


  0%|          | 0/50000 [00:00<?, ?it/s]