In [7]:
from os import walk
from PIL import Image
import numpy as np
from tqdm import tqdm

EPSILON = 10**(-20)

In [8]:
img1 = np.asarray(Image.open('dataset/support_96/Acadian_Flycatcher_0016_887710060.jpg'))
img2 = np.asarray(Image.open('dataset/query_1/Acadian_Flycatcher_0016_887710060.jpg'))
img3 = np.asarray(Image.open('dataset/query_1/American_Crow_0024_2618947526.jpg'))


In [9]:
def get_histogram_by_grids(img, interval=1, grid_count=1):
    x, y = img.shape[0], img.shape[1]
    grid_size = img.shape[0]//grid_count
    histogram_list = np.zeros(0)
    for i in range(grid_count):
        for j in range(grid_count):
            img_slice = img[i*grid_size:(i+1)*grid_size,j*grid_size:(j+1)*grid_size]
            slice_hist = per_channel_histogram(img_slice, interval)
            normalized_hist = {'red':normalize_histogram(slice_hist['red']), 'green':normalize_histogram(slice_hist['green']), 'blue':normalize_histogram(slice_hist['blue'])}
            histogram_list = np.append(histogram_list, normalized_hist)
    return histogram_list


In [10]:
def per_channel_histogram(img, interval=1):
    x , y = img.shape[0], img.shape[1]
    
    histograms = {
        'red':np.zeros(256, np.int16), 
        'green':np.zeros(256, np.int16), 
        'blue':np.zeros(256, np.int16)
        }

    for i in range(x):
        for j in range(y):
            histograms['red'][img[i][j][0]] += 1
            histograms['green'][img[i][j][1]] += 1
            histograms['blue'][img[i][j][2]] += 1

    if interval == 1:
        return histograms
    
    bin_num = 256 // interval
    
    new_histograms = {
        'red':np.zeros(bin_num, np.int16), 
        'green':np.zeros(bin_num, np.int16), 
        'blue':np.zeros(bin_num, np.int16)
        }

    for i in range(bin_num):
        for j in range(interval):
            new_histograms['red'][i] += histograms['red'][i * interval + j]
            new_histograms['green'][i] += histograms['green'][i * interval + j]
            new_histograms['blue'][i] += histograms['blue'][i * interval + j]

    return new_histograms
    

In [11]:
def normalize_histogram(h):
    norm = np.sum(h)
    normalized_hist = np.divide(h,np.sum(h))
    normalized_hist[normalized_hist==0] = EPSILON / norm
    return normalized_hist

def kl_divergence(query_hist, support_hist):
    division = np.divide(query_hist, support_hist)
    log_div = np.log2(division)
    h_mult = np.multiply(query_hist, log_div)
    divergence = np.sum(h_mult)
    return divergence

In [12]:
def kl_divergence_by_grids(query_hist_list, support_hist_list):
    divergence_array = np.zeros(0)
    hist_count = query_hist_list.shape[0]
    for qh,sh in zip(query_hist_list, support_hist_list):
        divergence = kl_divergence(qh['red'], sh['red']) + kl_divergence(qh['green'], sh['green']) + kl_divergence(qh['blue'], sh['blue'])
        divergence_array = np.append(divergence_array, divergence)
    return np.average(divergence_array)

In [13]:
test = kl_divergence_by_grids(get_histogram_by_grids(img1, grid_count=12), get_histogram_by_grids(img2, grid_count=12))
print(test)

95.6115735352295


In [9]:
hist1 = per_channel_histogram(img1)
hist2 = per_channel_histogram(img2)
red = kl_divergence(normalize_histogram(hist1['red']), normalize_histogram(hist2['red']))
print(red)

0.3801293466459518


In [14]:
# Read Images
support_filenames = next(walk('dataset/support_96'), (None, None, []))[2]
query_1_filenames = next(walk('dataset/query_1'), (None, None, []))[2]
query_2_filenames = next(walk('dataset/query_2'), (None, None, []))[2]
query_3_filenames = next(walk('dataset/query_3'), (None, None, []))[2]

support_images = []
for filename in support_filenames:
    with Image.open('dataset/support_96/{}'.format(filename)) as image:
        support_images.append((filename, np.asarray(image)))

query_1_images = []
for filename in query_1_filenames:
    with Image.open('dataset/query_1/{}'.format(filename)) as image:
        query_1_images.append((filename, np.asarray(image)))
query_2_images = []
for filename in query_2_filenames:
    with Image.open('dataset/query_2/{}'.format(filename)) as image:
        query_2_images.append((filename, np.asarray(image)))

query_3_images = []
for filename in query_3_filenames:
    with Image.open('dataset/query_3/{}'.format(filename)) as image:
        query_3_images.append((filename, np.asarray(image)))

In [15]:
grid_counts = [12, 16, 24, 48]
histogram_list_by_grids = {}
for grid_count in grid_counts:
    support_query_1_hist_lists = {'support_histograms':[], 'query_histograms':[]}
    support_query_2_hist_lists = {'support_histograms':[], 'query_histograms':[]}
    support_query_3_hist_lists = {'support_histograms':[], 'query_histograms':[]}
    # get histograms of query 1
    interval = 8 # best for query 1
    # for name, img in tqdm(support_images):
    #     hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
    #     support_query_1_hist_lists['support_histograms'].append((name, hist_list))
    
    # for name, img in tqdm(query_1_images):
    #     hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
    #     support_query_1_hist_lists['query_histograms'].append((name, hist_list))
    
    # # get histograms of query 2
    # interval = 16 # best for query 2
    # for name, img in tqdm(support_images):
    #     hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
    #     support_query_2_hist_lists['support_histograms'].append((name, hist_list))
    
    # for name, img in tqdm(query_2_images):
    #     hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
    #     support_query_2_hist_lists['query_histograms'].append((name, hist_list))
    
    # get histograms of query 3
    interval = 32 # best for query 3
    for name, img in tqdm(support_images):
        hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
        support_query_3_hist_lists['support_histograms'].append((name, hist_list))
    
    for name, img in tqdm(query_3_images):
        hist_list = get_histogram_by_grids(img, interval=interval, grid_count=grid_count)
        support_query_3_hist_lists['query_histograms'].append((name, hist_list))

    histogram_list_by_grids[grid_count] = {'q1':support_query_1_hist_lists, 'q2':support_query_2_hist_lists, 'q3':support_query_3_hist_lists}


100%|██████████| 200/200 [00:27<00:00,  7.38it/s]
100%|██████████| 200/200 [00:24<00:00,  8.27it/s]
100%|██████████| 200/200 [00:32<00:00,  6.10it/s]
100%|██████████| 200/200 [00:28<00:00,  7.10it/s]
100%|██████████| 200/200 [00:46<00:00,  4.28it/s]
100%|██████████| 200/200 [00:48<00:00,  4.14it/s]
100%|██████████| 200/200 [03:18<00:00,  1.01it/s]
100%|██████████| 200/200 [02:59<00:00,  1.12it/s]


In [36]:
# query 1 spatial grids
query_1_results_by_grids = {}
for grid_count in grid_counts:
    correct_guesses = 0
    support_hist_list = histogram_list_by_grids[grid_count]['q1']['support_histograms']
    query_hist_list = histogram_list_by_grids[grid_count]['q1']['query_histograms']
    for name, support_hist in tqdm(support_hist_list):
        min_divergence = 999999
        for q_name, query_hist in query_hist_list:
            divergence = kl_divergence_by_grids(query_hist, support_hist)
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        if result['support'] == result['query']:
            correct_guesses += 1
    query_1_results_by_grids[f"Grid_count: {grid_count}"] = correct_guesses / 200
                
    

100%|██████████| 200/200 [02:43<00:00,  1.22it/s]
100%|██████████| 200/200 [04:43<00:00,  1.42s/it]
100%|██████████| 200/200 [09:46<00:00,  2.93s/it]
100%|██████████| 200/200 [49:10<00:00, 14.75s/it]


In [None]:
print(query_1_results_by_grids)

In [38]:
# query 2 spatial grids
query_2_results_by_grids = {}
for grid_count in grid_counts:
    correct_guesses = 0
    support_hist_list = histogram_list_by_grids[grid_count]['q2']['support_histograms']
    query_hist_list = histogram_list_by_grids[grid_count]['q2']['query_histograms']
    for name, support_hist in tqdm(support_hist_list):
        min_divergence = 999999
        for q_name, query_hist in query_hist_list:
            divergence = kl_divergence_by_grids(query_hist, support_hist)
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        if result['support'] == result['query']:
            correct_guesses += 1
    query_2_results_by_grids[f"Grid_count: {grid_count}"] = correct_guesses / 200

print(query_2_results_by_grids)

100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
100%|██████████| 200/200 [03:13<00:00,  1.04it/s]
100%|██████████| 200/200 [08:15<00:00,  2.48s/it]
100%|██████████| 200/200 [39:57<00:00, 11.99s/it]

{'Grid_count: 12': 0.215, 'Grid_count: 16': 0.22, 'Grid_count: 24': 0.25, 'Grid_count: 48': 0.26}





In [16]:
# query 3 spatial grids
query_3_results_by_grids = {}
print("wuhuu")
for grid_count in grid_counts:
    correct_guesses = 0
    support_hist_list = histogram_list_by_grids[grid_count]['q3']['support_histograms']
    query_hist_list = histogram_list_by_grids[grid_count]['q3']['query_histograms']
    for name, support_hist in tqdm(support_hist_list):
        min_divergence = 999999
        for q_name, query_hist in query_hist_list:
            divergence = kl_divergence_by_grids(query_hist, support_hist)
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        if result['support'] == result['query']:
            correct_guesses += 1
    query_3_results_by_grids[f"Grid_count: {grid_count}"] = correct_guesses / 200

print(query_3_results_by_grids)

wuhuu


100%|██████████| 200/200 [02:35<00:00,  1.29it/s]
100%|██████████| 200/200 [04:40<00:00,  1.40s/it]
100%|██████████| 200/200 [10:20<00:00,  3.10s/it]
100%|██████████| 200/200 [36:28<00:00, 10.94s/it]

{'Grid_count: 12': 0.405, 'Grid_count: 16': 0.445, 'Grid_count: 24': 0.495, 'Grid_count: 48': 0.56}





In [72]:
# Get Histograms into dict
intervals = [4, 8, 16, 32, 64]
interval_histogram_dict = {}
for q in tqdm(intervals):      
    support_histograms = []
    query_1_histograms = []
    query_2_histograms = []
    query_3_histograms = []
    for name, img in support_images:
        hist = per_channel_histogram(img, q)
        support_histograms.append((name, (normalize_histogram(hist['red']),normalize_histogram(hist['green']), normalize_histogram(hist['blue']))))
    
    for name, img in query_1_images:
        hist = per_channel_histogram(img, q)
        query_1_histograms.append((name, (normalize_histogram(hist['red']),normalize_histogram(hist['green']), normalize_histogram(hist['blue']))))
    
    for name, img in query_2_images:
        hist = per_channel_histogram(img, q)
        query_2_histograms.append((name, (normalize_histogram(hist['red']),normalize_histogram(hist['green']), normalize_histogram(hist['blue']))))
    
    for name, img in query_3_images:
        hist = per_channel_histogram(img, q)
        query_3_histograms.append((name, (normalize_histogram(hist['red']),normalize_histogram(hist['green']), normalize_histogram(hist['blue']))))
    
    interval_histogram_dict[q] = {'sup': support_histograms, 'query_1':query_1_histograms, 'query_2':query_2_histograms, 'query_3':query_3_histograms}

100%|██████████| 5/5 [04:27<00:00, 53.55s/it]


In [73]:
#q3 results
q3_min_results = []
intervals = [4, 8, 16, 32, 64]
q3_accuracies = []
for q in interval_histogram_dict.keys():
    correct_guesses = 0
    for name, hist in tqdm(interval_histogram_dict[q]['sup']):
        min_divergence = 99
        for q_name, q_hist in interval_histogram_dict[q]['query_3']:
            red_divergence = kl_divergence(q_hist[0], hist[0])
            green_divergence = kl_divergence(q_hist[1], hist[1])
            blue_divergence = kl_divergence(q_hist[2], hist[2])
            divergence = (red_divergence + blue_divergence + blue_divergence) / 3
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        q3_min_results.append(result)
        if result['support'] == result['query']:
            correct_guesses += 1
    q3_accuracies.append({'interval': q, 'acc': correct_guesses / 200})
    

100%|██████████| 200/200 [00:01<00:00, 118.36it/s]
100%|██████████| 200/200 [00:00<00:00, 243.31it/s]
100%|██████████| 200/200 [00:00<00:00, 258.59it/s]
100%|██████████| 200/200 [00:00<00:00, 250.59it/s]
100%|██████████| 200/200 [00:00<00:00, 261.16it/s]


In [74]:
q3_accuracies

[{'interval': 4, 'acc': 0.2},
 {'interval': 8, 'acc': 0.2},
 {'interval': 16, 'acc': 0.21},
 {'interval': 32, 'acc': 0.21},
 {'interval': 64, 'acc': 0.19}]

In [75]:
#q2 results
q2_min_results = []
intervals = [4, 8, 16, 32, 64]
q2_accuracies = []
for q in interval_histogram_dict.keys():
    correct_guesses = 0
    for name, hist in tqdm(interval_histogram_dict[q]['sup']):
        min_divergence = 99
        for q_name, q_hist in interval_histogram_dict[q]['query_2']:
            red_divergence = kl_divergence(q_hist[0], hist[0])
            green_divergence = kl_divergence(q_hist[1], hist[1])
            blue_divergence = kl_divergence(q_hist[2], hist[2])
            divergence = (red_divergence + blue_divergence + blue_divergence) / 3
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        q2_min_results.append(result)
        if result['support'] == result['query']:
            correct_guesses += 1
    q2_accuracies.append({'interval': q, 'acc': correct_guesses / 200})
    

100%|██████████| 200/200 [00:00<00:00, 221.70it/s]
100%|██████████| 200/200 [00:00<00:00, 246.35it/s]
100%|██████████| 200/200 [00:00<00:00, 257.68it/s]
100%|██████████| 200/200 [00:00<00:00, 262.80it/s]
100%|██████████| 200/200 [00:00<00:00, 240.06it/s]


In [76]:
q2_accuracies

[{'interval': 4, 'acc': 1.0},
 {'interval': 8, 'acc': 1.0},
 {'interval': 16, 'acc': 1.0},
 {'interval': 32, 'acc': 1.0},
 {'interval': 64, 'acc': 1.0}]

In [77]:
#q1 results
q1_min_results = []
intervals = [4, 8, 16, 32, 64]
q1_accuracies = []
for q in interval_histogram_dict.keys():
    correct_guesses = 0
    for name, hist in tqdm(interval_histogram_dict[q]['sup']):
        min_divergence = 99
        for q_name, q_hist in interval_histogram_dict[q]['query_1']:
            red_divergence = kl_divergence(q_hist[0], hist[0])
            green_divergence = kl_divergence(q_hist[1], hist[1])
            blue_divergence = kl_divergence(q_hist[2], hist[2])
            divergence = (red_divergence + blue_divergence + blue_divergence) / 3
            if divergence < min_divergence:
                min_divergence = divergence
                result = {'support': name, 'query': q_name, 'divergence': divergence}
        q1_min_results.append(result)
        if result['support'] == result['query']:
            correct_guesses += 1
    q1_accuracies.append({'interval': q, 'acc': correct_guesses / 200})
    

100%|██████████| 200/200 [00:00<00:00, 223.70it/s]
100%|██████████| 200/200 [00:00<00:00, 234.45it/s]
100%|██████████| 200/200 [00:00<00:00, 248.62it/s]
100%|██████████| 200/200 [00:00<00:00, 248.41it/s]
100%|██████████| 200/200 [00:01<00:00, 143.00it/s]


In [78]:
q1_accuracies

[{'interval': 4, 'acc': 0.98},
 {'interval': 8, 'acc': 0.98},
 {'interval': 16, 'acc': 0.97},
 {'interval': 32, 'acc': 0.96},
 {'interval': 64, 'acc': 0.9}]