# CNN-CLEANER z20

In [None]:
import pickle
import numpy as np
import pandas as pd
from os import listdir
from os.path import join, isfile, expandvars
from astroai.tools.utils import split_noisy_dataset

# data
zenith = 'z20' 
table = 'cleaner_5sgm.pickle'
path = f'{expandvars("$HOME")}/E4/irf_{zenith}/crab/'
dataset = join(path, table)

# models
cnnname = 'cleaner_z20'

# dataset 
if '.pickle' in table:
    with open(dataset,'rb') as f: ds = pickle.load(f)
    infotable = join(path, table.replace('.pickle', '.dat'))
    gammatable = join(path, table.replace('.pickle', '_gammapy.txt'))
elif '.npy' in table:
    ds = np.load(dataset, allow_pickle=True, encoding='latin1', fix_imports=True).flat[0]
    infotable = join(path, table.replace('.npy', '.dat'))
    gammatable = join(path, table.replace('.npy', '_gammapy.txt'))


train_noisy, train_clean, test_noisy, test_clean = split_noisy_dataset(ds, split=80, reshape=True, binning=200)

In [None]:
len(test_noisy)

In [None]:
test_noisy = test_noisy[:1000]
len(test_noisy)

In [None]:
import tensorflow as tf

root = f'{expandvars("$HOME")}/astroAI/astroai/'
model = tf.keras.models.load_model(join(root, 'models/crta_models', f'{cnnname}.keras'))
predictions = model.predict(test_noisy)

In [None]:
infodata = pd.read_csv(infotable, sep=' ', header=0).sort_values(by=['seed'])
infodata.head()

## Residuals z20

In [None]:
residuals = {'STD': [], 'CNN': []}

for noisy, clean, pred in zip(test_noisy, test_clean, predictions):
    residuals['STD'].append(noisy - clean)
    residuals['CNN'].append(noisy - pred)
    
len(residuals['STD']), len(residuals['CNN'])

## Cumulative FOV counts z20

In [None]:
sum_residual = {'STD': [], 'CNN': []}
for std, cnn in zip(residuals['STD'], residuals['CNN']):
    sum_residual['STD'].append(np.sum(std))
    sum_residual['CNN'].append(np.sum(cnn))

sum_fov = {'NOISY': [], 'STD': [], 'CNN': [], 'DIFF': []}
for orig, std, cnn in zip(test_noisy, test_clean, predictions):
    sum_fov['NOISY'].append(np.sum(orig))
    sum_fov['STD'].append(np.sum(std))
    sum_fov['CNN'].append(np.sum(cnn)) 
    sum_fov['DIFF'].append(np.sum(std) - np.sum(cnn))
    
sum_original_and_diff = {'NOISY': [], 'DIFF': [], 'STD': [], 'CNN': []}
for orig, std, cnn in zip(test_noisy, residuals['STD'], residuals['CNN']):
    sum_original_and_diff['NOISY'].append(np.sum(orig))
    sum_original_and_diff['DIFF'].append(np.sum(std) - np.sum(cnn))
    sum_original_and_diff['STD'].append(np.sum(orig - std))
    sum_original_and_diff['CNN'].append(np.sum(orig - cnn))

## ON excess counts z20

In [None]:
from matplotlib.patches import Circle
from astroai.tools.utils import set_wcs
from astropy.coordinates import SkyCoord

binning = 200
pixelsize = (2 * 2.5) / binning
point_ref = (binning / 2) + (pixelsize / 2)
radius_pix = 0.2/0.025

def create_circular_mask(h, w, center=None, radius=None):

    if center is None: # use the middle of the image
        center = (int(w/2), int(h/2))
    if radius is None: # use the smallest distance between the center and image walls
        radius = min(center[0], center[1], w-center[0], h-center[1])

    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)

    mask = dist_from_center <= radius
    return mask

In [None]:
sum_on_region = {'STD': [], 'CNN': [], 'AP_EXCESS': [], 'DIFF': []}

s = len(train_noisy)
for std, cnn in zip(test_clean, predictions):
    s += 1 
    row = infodata[infodata['seed']==s]
    # sky coordinates
    source_deg = {'ra': row['source_ra'].values[0], 'dec': row['source_dec'].values[0]}
    point_deg = {'ra': row['point_ra'].values[0], 'dec': row['point_dec'].values[0]}
    # pixel coordinates
    w = set_wcs(point_ra=row['point_ra'].values[0], point_dec=row['point_dec'].values[0], 
            point_ref=point_ref, pixelsize=pixelsize)
    x, y = w.world_to_pixel(SkyCoord(row['source_ra'].values[0], row['source_dec'].values[0], 
                                                   unit='deg', frame='icrs'))
    # ON counts with STD cleaning
    h, w = std.shape[:2]
    mask = create_circular_mask(h, w, center=(y, x), radius=radius_pix)
    masked_std = std.copy()
    masked_std[~mask] = 0

    # ON counts with CNN cleaning
    h, w = cnn.shape[:2]
    mask = create_circular_mask(h, w, center=(y, x), radius=radius_pix)
    masked_cnn = cnn.copy()
    masked_cnn[~mask] = 0
    
    sum_on_region['STD'].append(np.sum(masked_std))
    sum_on_region['CNN'].append(np.sum(masked_cnn))
    sum_on_region['DIFF'].append(np.sum(masked_std - masked_cnn))

sum_on_region.keys()

## Rename z20 vars

In [None]:
residuals_z20 = residuals
sum_residual_z20 = sum_residual
sum_fov_z20 = sum_fov
sum_original_and_diff_z20 = sum_original_and_diff
sum_on_region_z20 = sum_on_region

with open('data/cleaner_z20_residuals.pickle', 'wb') as f:
    pickle.dump(residuals, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_z20_sum_residuals.pickle', 'wb') as f:
    pickle.dump(sum_residual, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_z20_sum_fov.pickle', 'wb') as f:
    pickle.dump(sum_fov, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_z20_sum_original_and_diff.pickle', 'wb') as f:
    pickle.dump(sum_original_and_diff, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_z20_sum_on_region.pickle', 'wb') as f:
    pickle.dump(sum_on_region, f, protocol=pickle.HIGHEST_PROTOCOL)

# CNN-cleaner zALL

In [None]:
# data
zenith = 'random' 
table = 'cleaner_5sgm_expALL.npy'
path = f'{expandvars("$HOME")}/E4/irf_{zenith}/crab/'
dataset = join(path, table)

# dataset 
if '.pickle' in table:
    with open(dataset,'rb') as f: ds = pickle.load(f)
    infotable = join(path, table.replace('.pickle', '.dat'))
    gammatable = join(path, table.replace('.pickle', '_gammapy.txt'))
elif '.npy' in table:
    ds = np.load(dataset, allow_pickle=True, encoding='latin1', fix_imports=True).flat[0]
    infotable = join(path, table.replace('.npy', '.dat'))
    gammatable = join(path, table.replace('.npy', '_gammapy.txt'))
    
train_noisy, train_clean, test_noisy, test_clean = split_noisy_dataset(ds, split=80, reshape=True, binning=200)

In [None]:
len(test_noisy)

In [None]:
test_noisy = test_noisy[:1000]
len(test_noisy)

In [None]:
root = f'{expandvars("$HOME")}/astroAI/astroai/'
model = tf.keras.models.load_model(join(root, 'models/crta_models', f'cleaner_zALL.keras'))
predictions = model.predict(test_noisy)

In [None]:
infodata = pd.read_csv(infotable, sep=' ', header=0).sort_values(by=['seed'])
infodata.head()

## Residuals zALL

In [None]:
residuals = {'STD': [], 'CNN': []}

for noisy, clean, pred in zip(test_noisy, test_clean, predictions):
    residuals['STD'].append(noisy - clean)
    residuals['CNN'].append(noisy - pred)
    
len(residuals['STD']), len(residuals['CNN'])

## Cumulative FOV counts zALL

In [None]:
sum_residual = {'STD': [], 'CNN': []}
for std, cnn in zip(residuals['STD'], residuals['CNN']):
    sum_residual['STD'].append(np.sum(std))
    sum_residual['CNN'].append(np.sum(cnn))

sum_fov = {'NOISY': [], 'STD': [], 'CNN': [], 'DIFF': []}
for orig, std, cnn in zip(test_noisy, test_clean, predictions):
    sum_fov['NOISY'].append(np.sum(orig))
    sum_fov['STD'].append(np.sum(std))
    sum_fov['CNN'].append(np.sum(cnn)) 
    sum_fov['DIFF'].append(np.sum(std) - np.sum(cnn))
    
sum_original_and_diff = {'NOISY': [], 'DIFF': [], 'STD': [], 'CNN': []}
for orig, std, cnn in zip(test_noisy, residuals['STD'], residuals['CNN']):
    sum_original_and_diff['NOISY'].append(np.sum(orig))
    sum_original_and_diff['DIFF'].append(np.sum(std) - np.sum(cnn))
    sum_original_and_diff['STD'].append(np.sum(orig - std))
    sum_original_and_diff['CNN'].append(np.sum(orig - cnn))

# ON excess counts zALL

In [None]:
sum_on_region = {'STD': [], 'CNN': [], 'DIFF': []}

s = len(train_noisy)
for std, cnn in zip(test_clean, predictions):
    s += 1 
    row = infodata[infodata['seed']==s]
    # sky coordinates
    source_deg = {'ra': row['source_ra'].values[0], 'dec': row['source_dec'].values[0]}
    point_deg = {'ra': row['point_ra'].values[0], 'dec': row['point_dec'].values[0]}
    # pixel coordinates
    w = set_wcs(point_ra=row['point_ra'].values[0], point_dec=row['point_dec'].values[0], 
            point_ref=point_ref, pixelsize=pixelsize)
    x, y = w.world_to_pixel(SkyCoord(row['source_ra'].values[0], row['source_dec'].values[0], 
                                                   unit='deg', frame='icrs'))
    # ON counts with STD cleaning
    h, w = std.shape[:2]
    mask = create_circular_mask(h, w, center=(y, x), radius=radius_pix)
    masked_std = std.copy()
    masked_std[~mask] = 0

    # ON counts with CNN cleaning
    h, w = cnn.shape[:2]
    mask = create_circular_mask(h, w, center=(y, x), radius=radius_pix)
    masked_cnn = cnn.copy()
    masked_cnn[~mask] = 0
    
    sum_on_region['STD'].append(np.sum(masked_std))
    sum_on_region['CNN'].append(np.sum(masked_cnn))
    sum_on_region['DIFF'].append(np.sum(masked_std - masked_cnn))

sum_on_region.keys()

## Rename zALL vars

In [None]:
residuals_zALL = residuals
sum_residual_zALL = sum_residual
sum_fov_zALL = sum_fov
sum_original_and_diff_zALL = sum_original_and_diff
sum_on_region_zALL = sum_on_region

with open('data/cleaner_zALL_residuals.pickle', 'wb') as f:
    pickle.dump(residuals, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_zALL_sum_residuals.pickle', 'wb') as f:
    pickle.dump(sum_residual, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_zALL_sum_fov.pickle', 'wb') as f:
    pickle.dump(sum_fov, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_zALL_sum_original_and_diff.pickle', 'wb') as f:
    pickle.dump(sum_original_and_diff, f, protocol=pickle.HIGHEST_PROTOCOL)
with open('data/cleaner_zALL_sum_on_region.pickle', 'wb') as f:
    pickle.dump(sum_on_region, f, protocol=pickle.HIGHEST_PROTOCOL)

# PLOTS z20 vs zALL

In [None]:
import astropy.units as u
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

radius_deg = 0.2
radius_pix = radius_deg/0.025
figsize = (10, 10)
histsize = (8, 8)
fs = 16

## RESIDUALS

In [None]:
import matplotlib.patches as mpatches

colors = ['navy', 'orange']
labels = ['20° zenith', 'random zenith']
hatches = ['\\', '//']
legends = [mpatches.Patch(facecolor='none', edgecolor=colors[0], hatch=hatches[0]), 
           mpatches.Patch(facecolor='none', edgecolor=colors[1], hatch=hatches[1])]

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (np.abs(sum_residual_z20['CNN']), np.abs(sum_residual_zALL['CNN']))
#ax.set_title('background residuals', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_sum_residuals_z20_vs_zALL.png')

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_original_and_diff_z20['DIFF'], sum_original_and_diff_zALL['DIFF'])
#ax.set_title('background residuals', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_bkg_residuals_z20_vs_zALL.png')

## CUMULATIVE FOV

In [None]:
colors = ['orange', 'navy']
labels = ['20° zenith', 'random zenith']
hatches = ['\\', '//']
legends = [mpatches.Patch(facecolor='none', edgecolor=colors[0], hatch=hatches[0]), 
           mpatches.Patch(facecolor='none', edgecolor=colors[1], hatch=hatches[1])]

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_original_and_diff_z20['NOISY'], sum_original_and_diff_zALL['NOISY'])
#ax.set_title('fov counts', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in datatset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid() 
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_sum_fov_original_z20_vs_zALL.png')

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_fov_z20['CNN'], sum_fov_zALL['CNN'])
#ax.set_title('fov counts', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_sum_fov_clean_CNN_z20_vs_zALL.png')

In [None]:
colors = ['green', 'maroon']
labels = ['original', 'CNN clean']
hatches = ['\\', '//']
legends = [mpatches.Patch(facecolor='none', edgecolor=colors[0], hatch=hatches[0]), 
           mpatches.Patch(facecolor='none', edgecolor=colors[1], hatch=hatches[1])]

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_original_and_diff_z20['NOISY'], sum_fov_z20['CNN'])
#ax.set_title('fov counts', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in datatset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid() 
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_sum_fov_original_and_clean_CNN_z20.png')

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_original_and_diff_zALL['NOISY'], sum_fov_zALL['CNN'])
#ax.set_title('fov counts', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_sum_fov_original_and_clean_CNN_zALL.png')

## EXCESS COUNTS

In [None]:
colors = ['orange', 'navy']
labels = ['20° zenith', 'random zenith']
hatches = ['\\', '//']
legends = [mpatches.Patch(facecolor='none', edgecolor=colors[0], hatch=hatches[0]), 
           mpatches.Patch(facecolor='none', edgecolor=colors[1], hatch=hatches[1])]

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_original_and_diff_z20['DIFF'], sum_original_and_diff_zALL['DIFF']) 
#ax.set_title('background residuals', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_bkg_residuals_z20_vs_zALL.png')

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_on_region_z20['DIFF'], sum_on_region_zALL['DIFF'])
#ax.set_title('source excess', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(handles=legends, labels=labels, fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()
fig.savefig(f'img/paper_cleaner_hist_excess_z20_vs_zALL.png')

# Statistics

In [None]:
mean_z20 = np.mean(sum_original_and_diff_z20['DIFF'])
std_z20 = np.std(sum_original_and_diff_z20['DIFF'])
mean_zALL = np.mean(sum_original_and_diff_zALL['DIFF'])
std_zALL = np.std(sum_original_and_diff_zALL['DIFF'])

mean_z20 = np.mean(sum_on_region_z20['DIFF'])
std_z20 = np.std(sum_on_region_z20['DIFF'])
mean_zALL = np.mean(sum_on_region_zALL['DIFF'])
std_zALL = np.std(sum_on_region_zALL['DIFF'])

# PLOTS CNN vs STD

In [None]:
colors = ['red', 'navy']
labels = ['standard', 'cnn model']

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_on_region_zALL['STD'], sum_on_region_zALL['CNN'])
#ax.set_title('source excess', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()

# hist
fig = plt.figure(figsize=histsize)
ax = fig.add_subplot(111)

data = (sum_residual_zALL['STD'], sum_residual_zALL['CNN'])
#ax.set_title('background residuals', fontsize=fs*1.5)
n, bins, patches = ax.hist(data, 20, density=False, histtype='step', color=colors, label=labels)
#ax.tick_params(axis='both', labelsize=fs/2)
ax.set_ylabel('samples in dataset', fontsize=fs)
ax.set_xlabel('counts', fontsize=fs)
ax.legend(fontsize=fs)
ax.grid()
for patch, hatch in zip(patches, hatches):
    plt.setp(patch, hatch=hatch)
plt.show()