In [1]:
from __future__ import division
import json
import numpy as np
from decimal import Decimal
from numpy import expand_dims, array, exp, max
import scipy
from scipy.special import rel_entr
import csv
import statistics

In [2]:
# to suppress division errors
np.seterr(divide='ignore', invalid='ignore')
new_final_source_array = []

In [3]:
# Opening final JSON files
energy_source_data = open('../../../paper3/files/glcm/source_domain_data_new_energy_glcm.json')
correlation_source_data = open('../../../paper3/files/glcm/source_domain_data_new_correlation_glcm.json')
homogeneity_source_data = open('../../../paper3/files/glcm/source_domain_data_new_homogeneity_glcm.json')

#for target data
energy_target_data = open('DenseNet169/glcm/mnist_domain_data_new_energy_glcm.json')
correlation_target_data = open('DenseNet169/glcm/mnist_domain_data_new_correlation_glcm.json')
homogeneity_target_data = open('DenseNet169/glcm/mnist_domain_data_new_homogeneity_glcm.json')

In [4]:
# returns JSON object
energy_source_data_d = json.load(energy_source_data)
correlation_source_data_d = json.load(correlation_source_data)
homogeneity_source_data_d = json.load(homogeneity_source_data)

energy_target_data_d = json.load(energy_target_data)
correlation_target_data_d = json.load(correlation_target_data)
homogeneity_target_data_d = json.load(homogeneity_target_data)

In [5]:
# kdl dictionary
kdl_dictionary = {}

In [6]:
def KL(p, q):
    a = []
    b = []
    for k in p:
        a.append(Decimal(k))

    for l in q:
        b.append(Decimal(l))

    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    return np.sum(np.where(((a != 0) & (b != 0)), a * np.log(a / b), 0))

In [7]:
def softMax(temperature, features_list):
    feat_list = [x / temperature for x in features_list]
    feature_list = exp(feat_list - max(feat_list))
    return feature_list / feature_list.sum(axis=0)

In [8]:
# get the data for the energy target image
energy_target_data_array = []
for p in range(0, len(energy_target_data_d['target_images'])):
    for i, j in energy_target_data_d['target_images'][p].items():
        energy_target_data_array.append((i,np.array(j)))
energy_target_data.close()

In [9]:
# get the data for the correlation target image
correlation_target_data_array = []
for p in range(0, len(correlation_target_data_d['target_images'])):
    for i, j in correlation_target_data_d['target_images'][p].items():
        correlation_target_data_array.append((i,np.array(j)))
correlation_target_data.close()

In [10]:
# get the data for the homogeneity target image
homogeneity_target_data_array = []
for p in range(0, len(homogeneity_target_data_d['target_images'])):
    for i, j in homogeneity_target_data_d['target_images'][p].items():
        homogeneity_target_data_array.append((i,np.array(j)))
homogeneity_target_data.close()

In [11]:
print(len(homogeneity_target_data_array))

5000


In [12]:
###energy comparison code
energy_All_Images = []
energy_average_dkl = []

for x in energy_target_data_array[:4500]:
#for x in target_data_array:
    t_images_btween_0_05 = []
    t_images_btween_05_1 = []
    t_images_btween_1_2 = []
    t_images_grt_2_5 = []
    t_images_grt_5_10 = []
    t_images_grt_10 = []
    
    image_name_class =  x[0].split('_/') #name[0]; class[1]
    compare_min_max = []
    
    
            
    # get the data for the source images
    for m in range(0, len(energy_source_data_d['source_images'])):
        #print("source",source_data_d['source_images'][m][0])
        for k, v in energy_source_data_d['source_images'][m].items():
            # compare the value of v with that of the target image
            # if 'caterpillar' in k.lower():
            # remove the zeros from the list
            new_v = np.array(v).flatten()
            #get the top items in x
            items = np.sort(x[1])
            item_size = items[-len(new_v):]
            new_x = item_size.flatten()
            
            new_d = softMax(0.5, new_x)     
            new_y = softMax(0.5, new_v)
            
            kdl = rel_entr( new_x, new_v, out=None)
            
            compare_min_max.append((image_name_class[0],image_name_class[1],k, max(kdl)))
           
    energy_All_Images.append(compare_min_max)
energy_source_data.close()

In [13]:
###correlation comparison code
correlation_All_Images = []
correlation_average_dkl = []

for x in correlation_target_data_array[:4500]:
#for x in target_data_array:
    t_images_btween_0_05 = []
    t_images_btween_05_1 = []
    t_images_btween_1_2 = []
    t_images_grt_2_5 = []
    t_images_grt_5_10 = []
    t_images_grt_10 = []
    
    image_name_class =  x[0].split('_/') #name[0]; class[1]
    compare_min_max = []
    
    
            
    # get the data for the source images
    for m in range(0, len(correlation_source_data_d['source_images'])):
        #print("source",source_data_d['source_images'][m][0])
        for k, v in correlation_source_data_d['source_images'][m].items():
            # compare the value of v with that of the target image
            # if 'caterpillar' in k.lower():
            # remove the zeros from the list
            new_v = np.array(v).flatten()
            #get the top items in x
            items = np.sort(x[1])
            item_size = items[-len(new_v):]
            new_x = item_size.flatten()
            
            new_d = softMax(0.5, new_x)     
            new_y = softMax(0.5, new_v)
            
            kdl = rel_entr( new_x, new_v, out=None)
            
            compare_min_max.append((image_name_class[0],image_name_class[1],k, max(kdl)))
            
    #####All_Images.append((image_name_class[0],image_name_class[1],list((len(t_images_btween_0_05),len(t_images_btween_05_1),len(t_images_btween_1_2),len(t_images_grt_2_5), len(t_images_grt_5_10),len(t_images_grt_10)))))
    correlation_All_Images.append(compare_min_max)
correlation_source_data.close()

In [14]:
###homogeneity comparison code
homogeneity_All_Images = []
homogeneity_average_dkl = []

for x in homogeneity_target_data_array[:4500]:
#for x in target_data_array:
    t_images_btween_0_05 = []
    t_images_btween_05_1 = []
    t_images_btween_1_2 = []
    t_images_grt_2_5 = []
    t_images_grt_5_10 = []
    t_images_grt_10 = []
    
    image_name_class =  x[0].split('_/') #name[0]; class[1]
    compare_min_max = []
    
    
            
    # get the data for the source images
    for m in range(0, len(homogeneity_source_data_d['source_images'])):
        #print("source",source_data_d['source_images'][m][0])
        for k, v in homogeneity_source_data_d['source_images'][m].items():
            # compare the value of v with that of the target image
            # if 'caterpillar' in k.lower():
            # remove the zeros from the list
            new_v = np.array(v).flatten()
            #get the top items in x
            items = np.sort(x[1])
            item_size = items[-len(new_v):]
            new_x = item_size.flatten()
            
            new_d = softMax(0.5, new_x)     
            new_y = softMax(0.5, new_v)
            
            kdl = rel_entr( new_x, new_v, out=None)
            
            compare_min_max.append((image_name_class[0],image_name_class[1],k, max(kdl)))
           
    #####All_Images.append((image_name_class[0],image_name_class[1],list((len(t_images_btween_0_05),len(t_images_btween_05_1),len(t_images_btween_1_2),len(t_images_grt_2_5), len(t_images_grt_5_10),len(t_images_grt_10)))))
    homogeneity_All_Images.append(compare_min_max)
homogeneity_source_data.close()

In [15]:
energy_sum_dkls = 0
energy_all_nums = []
for df in energy_All_Images:
    energy_sum_dkls = energy_sum_dkls + df[1][3]
    energy_all_nums.append(df[1][3])
    #print(df[1][3])
energy_average_dkl_v = energy_sum_dkls/ len(energy_All_Images)
energy_median_v = statistics.median(energy_all_nums)
#get the values below and above the average
energy_below_avrg = []
energy_above_avrg = []

for fg in energy_All_Images:
    if fg[1][3] < energy_average_dkl_v:
        energy_below_avrg.append(fg[1])
    elif fg[1][3] >= energy_average_dkl_v:
        energy_above_avrg.append(fg[1])

In [16]:
print(len(energy_above_avrg))

2044


In [17]:
correlation_sum_dkls = 0
correlation_all_nums = []
for df in correlation_All_Images:
    correlation_sum_dkls = correlation_sum_dkls + df[1][3]
    correlation_all_nums.append(df[1][3])
    #print(df[1][3])
correlation_average_dkl_v = correlation_sum_dkls/ len(correlation_All_Images)
correlation_median_v = statistics.median(correlation_all_nums)
#get the values below and above the average
correlation_below_avrg = []
correlation_above_avrg = []

for fg in correlation_All_Images:
    if fg[1][3] < correlation_average_dkl_v:
        correlation_below_avrg.append(fg[1])
    elif fg[1][3] >= correlation_average_dkl_v:
        correlation_above_avrg.append(fg[1])

In [18]:
homogeneity_sum_dkls = 0
homogeneity_all_nums = []
for df in homogeneity_All_Images:
    homogeneity_sum_dkls = homogeneity_sum_dkls + df[1][3]
    homogeneity_all_nums.append(df[1][3])
    #print(df[1][3])
homogeneity_average_dkl_v = homogeneity_sum_dkls/ len(homogeneity_All_Images)
homogeneity_median_v = statistics.median(homogeneity_all_nums)
#get the values below and above the average
homogeneity_below_avrg = []
homogeneity_above_avrg = []

for fg in homogeneity_All_Images:
    if fg[1][3] < homogeneity_average_dkl_v:
        homogeneity_below_avrg.append(fg[1])
    elif fg[1][3] >= homogeneity_average_dkl_v:
        homogeneity_above_avrg.append(fg[1])

In [19]:
  
energy_above_other = []
for w in energy_above_avrg:
    image_n = w[1]+"/"+w[0]
    image_c = w[1]
    
    energy_above_other.append((image_n,image_c))    


In [20]:
correlation_above_other = []
for w in correlation_above_avrg:
    image_n = w[1]+"/"+w[0]
    image_c = w[1]
    
    correlation_above_other.append((image_n,image_c))




In [21]:
print(len(correlation_above_avrg))

2146


In [22]:
homogeneity_above_other = []
for w in homogeneity_above_avrg:
    image_n = w[1]+"/"+w[0]
    image_c = w[1]
    
    homogeneity_above_other.append((image_n,image_c))

In [23]:
with open('measures/DKL/DenseNet169/mnist_test_energy_above_glcm_above.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in energy_above_other:
        writer.writerow(l_item)

In [24]:
with open('measures/DKL/DenseNet169/mnist_test_correlation_above_glcm_above.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in correlation_above_other:
        writer.writerow(l_item)
        


In [25]:

with open('measures/DKL/DenseNet169/mnist_test_homogeneity_above_glcm_above.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in homogeneity_above_other:
        writer.writerow(l_item)

In [26]:

    
energy_below_other = []
for z in energy_below_avrg:
    image_n = z[1]+"/"+z[0]
    image_c = z[1]
    energy_below_other.append((image_n,image_c))
    



In [27]:
correlation_below_other = []
for z in correlation_below_avrg:
    image_n = z[1]+"/"+z[0]
    image_c = z[1]
    correlation_below_other.append((image_n,image_c))
    


In [28]:
    
homogeneity_below_other = []
for z in homogeneity_below_avrg:
    image_n = z[1]+"/"+z[0]
    image_c = z[1]
    homogeneity_below_other.append((image_n,image_c))

In [29]:

        
with open('measures/DKL/DenseNet169/mnist_test_energy_above_glcm_below.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in energy_below_other:
        writer.writerow(l_item)
        



In [30]:
with open('measures/DKL/DenseNet169/mnist_test_correlation_above_glcm_below.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in correlation_below_other:
        writer.writerow(l_item)
 

In [31]:
       

with open('measures/DKL/DenseNet169/mnist_test_homogeneity_above_glcm_below.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)  # write the header
    writer.writerow(["file", "label"])
    for l_item in homogeneity_below_other:
        writer.writerow(l_item)