In [1]:
import os
import numpy as np
from pathlib import Path
import pandas as pd
import consistency
import h5py
import glob

# Calculate KLD

In [2]:
import importlib
importlib.reload(consistency)

# hyperparameters for ACME functions
class_index = 1
radius_count_cutoff = 0.10
box_length = 0.1

# output cache
dataset_name = []
dataset_score = []

In [7]:
#From ground truth find index etc.
gt_attribution = np.load('./attr_analysis/saliency_score/STAR_gt_ci1.npz')['arr_0']
gt_attribution = np.swapaxes(gt_attribution,1,2)
N, L, A = gt_attribution.shape

datafile = ('./dataset/DeepSTARR_data.h5')
dataset = h5py.File(datafile, 'r')
x_test = np.array(dataset['X_test']).astype(np.float32)
y_test = np.array(dataset['Y_test']).astype(np.float32)
act_idx = np.argsort(y_test[:,class_index])[-N:]

#check padding at end
seq_L = x_test.shape[-1]
if seq_L != L:
    gt_attribution = gt_attribution[:,:seq_L,:]
    
#Ground Truth Consistency
gt_X = np.swapaxes(x_test[act_idx],1,2)
attribution_map = consistency.process_attribution_map(gt_attribution)
unit_mask = np.sum(np.ones(gt_X.shape),axis=-1) / 4

phi_1_s, phi_2_s, r_s = consistency.spherical_coordinates_process_2_trad([attribution_map], gt_X, unit_mask, radius_count_cutoff)
LIM, box_length, box_volume, n_bins, n_bins_half = consistency.initialize_integration_2(box_length)
entropic_information = consistency.calculate_entropy_2(phi_1_s, phi_2_s, r_s, n_bins, box_length, box_volume, prior_range=3)
print(entropic_information[0])
dataset_name.append('STAR_gt_ci1')
dataset_score.append(entropic_information[0])

0.52


In [9]:
#Single Dataset Consistency
for dataset in glob.glob('./dataset/*.npz'):
    saliency_name = dataset.split('/')[-1].split('_')
    saliency_name = saliency_name[3] + '_' + saliency_name[4]+ ('_ci%d.npz'%class_index)
    
    attribution_map = np.load('./attr_analysis/saliency_score/'+ saliency_name)['arr_0']
    attribution_map = np.swapaxes(attribution_map,1,2)
    X = np.load(dataset)['arr_0'][act_idx]
    
    if attribution_map.shape[1] != X.shape[1]:
        attribution_map = attribution_map[:,:X.shape[1],:]
    attribution_map = consistency.process_attribution_map(attribution_map)
    
    unit_mask = np.sum(np.ones(X.shape),axis=-1) / 4
    phi_1_s, phi_2_s, r_s = consistency.spherical_coordinates_process_2_trad([attribution_map], X, unit_mask, radius_count_cutoff)
    LIM, box_length, box_volume, n_bins, n_bins_half = consistency.initialize_integration_2(box_length)
    entropic_information = consistency.calculate_entropy_2(phi_1_s, phi_2_s, r_s, n_bins, box_length, box_volume, prior_range=3)
    
    dataset_name.append(saliency_name[:-4])
    dataset_score.append(entropic_information[0])
    
    print(saliency_name[:-4])
    print(entropic_information[0])

dinuc_shuffle_ci1
0.084
sm_tran_ci1
0.525
fm_conv_ci1
0.485
fm_tran_ci1
0.438
sm_conv_ci1
0.504


In [10]:
#Mixed Dataset Consistency
#Single Dataset Consistency
for dataset in glob.glob('./dataset/*.npz'):
    saliency_name = dataset.split('/')[-1].split('_')
    saliency_name = saliency_name[3] + '_' + saliency_name[4]+ ('_ci%d.npz'%class_index)
    
    attribution_map = np.load('./attr_analysis/saliency_score/'+ saliency_name)['arr_0']
    attribution_map = np.swapaxes(attribution_map,1,2)
    X = np.load(dataset)['arr_0'][act_idx]
    X = np.concatenate((X,gt_X))
    
    if attribution_map.shape[1] != X.shape[1]:
        attribution_map = attribution_map[:,:X.shape[1],:]
    attribution_map = np.concatenate((attribution_map,gt_attribution))
    attribution_map = consistency.process_attribution_map(attribution_map)
    
    unit_mask = np.sum(np.ones(X.shape),axis=-1) / 4
    phi_1_s, phi_2_s, r_s = consistency.spherical_coordinates_process_2_trad([attribution_map], X, unit_mask, radius_count_cutoff)
    LIM, box_length, box_volume, n_bins, n_bins_half = consistency.initialize_integration_2(box_length)
    entropic_information = consistency.calculate_entropy_2(phi_1_s, phi_2_s, r_s, n_bins, box_length, box_volume, prior_range=3)
    
    dataset_name.append(saliency_name[:-4]+' + STAR_gt_ci1')
    dataset_score.append(entropic_information[0])
    
    print(saliency_name[:-4]+' + STAR_gt_ci1')
    print(entropic_information[0])

dinuc_shuffle_ci1 + STAR_gt_ci1
0.376
sm_tran_ci1 + STAR_gt_ci1
0.514
fm_conv_ci1 + STAR_gt_ci1
0.459
fm_tran_ci1 + STAR_gt_ci1
0.45
sm_conv_ci1 + STAR_gt_ci1
0.503


In [11]:
df = pd.DataFrame({'Model':dataset_name,'Entropy':dataset_score})
df = df.sort_values('Model')
df.to_csv('./attr_analysis/consistency_score.csv')