# CNN model to detect anc localise sources

## Collect datasets

In [26]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import join, isfile

# info tables
src_info = f'/data01/homes/dipiano/E4/crab/sim/merged_data.dat'
bkg_info = f'/data01/homes/dipiano/E4/background/sim/merged_data.dat'

# datasets path
src_dataset_path = f'/data01/homes/dipiano/E4/crab/sim'
bkg_dataset_path = f'/data01/homes/dipiano/E4/background/sim'
datapath = {'SRC': src_dataset_path, 'BKG': bkg_dataset_path}

# datasets files
dataset = {'SRC': [], 'BKG': []}
classes = dataset.keys()
print(f"Classes: {classes}\n")
for k in classes:
    print(f"Class {k} data from: {datapath[k]}")
    dataset[k] = sorted([join(datapath[k], f) for f in listdir(datapath[k]) if '.fits' in f and isfile(join(datapath[k], f))])
    
print(f"\nSRC dataset size: {len(dataset['SRC'])}")
print(f"BKG dataset size: {len(dataset['BKG'])}")

Classes: dict_keys(['SRC', 'BKG'])

Class SRC data from: /data01/homes/dipiano/E4/crab/sim
Class BKG data from: /data01/homes/dipiano/E4/background/sim

SRC dataset size: 50000
BKG dataset size: 50000


## Load datasets

In [None]:
from astropy.table import Table
from scipy.ndimage import gaussian_filter

# extract data utility
def extract_heatmap(data, trange, smoothing, nbins):
    data = data[(data['TIME'] >= trange[0]) & (data['TIME'] <= trange[1])] 
    
    ra = data['RA'].to_numpy()
    dec = data['DEC'].to_numpy()
    
    heatmap, xe, ye = np.histogram2d(ra, dec, bins=nbins)
    heatmap = gaussian_filter(heatmap, sigma=smoothing)
    return heatmap.T

# set map parameters
trange = [0, 50]
smoothing = 3
pixelsize = 0.02
nbins = int(5/pixelsize)

# gather data
for k in classes:
    for idx, f in enumerate(dataset[k]):
        heatmap = Table.read(f, hdu=1).to_pandas()
        heatmap = extract_heatmap(heatmap, trange, smoothing, nbins)
        dataset[k][idx] = heatmap