Отберём из тестового датасета 100 картинок, удовлетворяющих следующему условию:

- на картинке больше 1 объекта

Рассматривать будем только объекты классов wood, water и peak (про остальные - считаем что их нет)

In [32]:
import pickle
import os
import numpy as np

In [23]:
import sys
sys.path.append('..')

from osm_object import Image_OSM_object

from circle_diagram import calculate_sectors_count, create_circle_diagram
from circle_diagram import Image_circle_diagrams

In [2]:
def read_pickle(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)
    
def dump_pickle(filepath, obj):
    with open(filepath, 'wb') as f:
        pickle.dump(obj, f)

In [6]:
train_val_test = read_pickle("../dataset/train_val_test_split.pickle")
test_ids = train_val_test['test']

osm_objects_path = '../dataset/osm_objects/'

In [10]:
img_osm_object = read_pickle(os.path.join(osm_objects_path, test_ids[0] + '.pickle'))

In [38]:
def get_all_tags(img_osm_object):
    return [o.tag for o in img_osm_object.osm_objects if o.tag != 'ridge' and o.tag != 'valley' ]

In [39]:
print(get_all_tags(img_osm_object))

['water', 'water', 'wood', 'wood']


In [63]:
result_ids = []

i = 0

bad_count_before_100 = 0
bad_count_after_100 = 0

while len(result_ids) < 100:
    img_id = test_ids[i]
    
    img_osm_object = read_pickle(os.path.join(osm_objects_path, img_id + '.pickle'))
    tags = get_all_tags(img_osm_object)
    
    if len(tags) < 2:
        print(f"{i}:, image: {img_id}, tags: {tags}")
        if i < 100:
            bad_count_before_100 += 1
        else:
            bad_count_after_100 += 1
    else:
        result_ids.append(img_id)
        
    i += 1

2:, image: 3138, tags: []
5:, image: 10798, tags: []
8:, image: 15300, tags: []
10:, image: 11410, tags: []
12:, image: 2041, tags: []
14:, image: 14119, tags: ['peak']
15:, image: 1006, tags: ['wood']
26:, image: 17506, tags: ['peak']
29:, image: 9463, tags: ['water']
31:, image: 3995, tags: ['wood']
33:, image: 18004, tags: ['water']
34:, image: 4221, tags: []
43:, image: 21799, tags: ['water']
50:, image: 18478, tags: ['peak']
51:, image: 6413, tags: []
52:, image: 3595, tags: ['wood']
55:, image: 12939, tags: ['peak']
58:, image: 14014, tags: ['peak']
64:, image: 18344, tags: []
65:, image: 8128, tags: []
67:, image: 5465, tags: []
68:, image: 12265, tags: ['water']
70:, image: 9860, tags: ['peak']
71:, image: 16906, tags: []
72:, image: 1492, tags: []
75:, image: 2342, tags: ['peak']
77:, image: 11515, tags: []
78:, image: 7496, tags: []
80:, image: 17730, tags: []
81:, image: 7126, tags: []
83:, image: 9234, tags: []
85:, image: 20005, tags: []
89:, image: 14428, tags: ['peak']
9

In [64]:
len(result_ids)

100

In [65]:
print(bad_count_before_100)
print(bad_count_after_100)

35
19


**Окей, теперь повторим схему с оценкой точности для этих более качественных картинок**

In [43]:
points = read_pickle('../dataset/100_random_points.pickle')
points_to_centers_6_8 = read_pickle('../dataset/100_points_to_cell_centers_48cells_6x8.pickle')
points_to_centers_3_4 = read_pickle('../dataset/100_points_to_cell_centers_12cells_3x4.pickle')

In [44]:
predictions_folder = '../dataset/predictions/'
predicts_osm_folder = 'predicts_osm'
orig_osm_objects_path = '../dataset/osm_objects/'

predicts_osm_objects_path = os.path.join(predictions_folder, predicts_osm_folder)

In [45]:
bigger_side_blocks_count = 8
sectors_count = calculate_sectors_count(bigger_side_blocks_count)

In [46]:
def calculate_cells_probs(img_cds, agent_cd):
    """
    only fast version
    """
    penalties = []
    
    for i in range(len(img_cds.center_points)):
        penalties.append(img_cds.fast_diagrams[i].dist_to(agent_cd.fast_diagram))
        
    # Добавл. 1e-8 чтобы не было нулей
    similaryties = np.max(penalties) - penalties + 1e-8

    return similaryties / similaryties.sum()

In [47]:
def find_center_point_id(center_points, agent_point):
    for i in range(len(center_points)):
        if center_points[i] == agent_point:
            return i

In [48]:
def is_in_one_of_max_k(cell_probs, center_point_id, max_k):
    is_ok_for_different_k = []
    
    argsort = np.argsort(cell_probs)[::-1]
    for k in range(0, max_k):
        if argsort[k] == center_point_id:
            is_ok_for_different_k += [True for _ in range(max_k - k)]
            break
        else:
            is_ok_for_different_k.append(False)
        
    return is_ok_for_different_k

In [49]:
def is_in_ks_for_one_image(img_id, max_k=48):
    
    filename_pickle = img_id + '.pickle'
    img_osm = read_pickle(os.path.join(predicts_osm_objects_path, filename_pickle))
    img_osm.img_cutted_shape = (img_osm.img_cutted_shape[0], img_osm.img_cutted_shape[1], 3)
    img_cds = Image_circle_diagrams(img_osm, bigger_side_blocks_count, sectors_count, with_fast=True)
    
    is_in_k_one_image = []
    
    for agent_point in points:
        img_osm_for_agent = read_pickle(os.path.join(orig_osm_objects_path, filename_pickle))
        img_osm_for_agent.img_cutted_shape = (img_osm.img_cutted_shape[0], img_osm.img_cutted_shape[1], 3)
        agent_cd = create_circle_diagram(sectors_count, agent_point, img_osm_for_agent)
        agent_cd.make_fast()
        
        cell_probs = calculate_cells_probs(img_cds, agent_cd)
        
        center_point_id = find_center_point_id(img_cds.center_points, points_to_centers_6_8[agent_point][::-1])
        
        is_in_k_one_point = is_in_one_of_max_k(cell_probs, center_point_id, max_k)
        is_in_k_one_image.append(is_in_k_one_point)
        
    return is_in_k_one_image

In [50]:
%%time
is_in_k_one_image = is_in_ks_for_one_image(result_ids[0])

CPU times: user 26.4 s, sys: 20 ms, total: 26.4 s
Wall time: 26.4 s


In [51]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k_one_image)[:, 0].sum() / len(is_in_k_one_image)

0.01

In [None]:
%%time
is_in_k = []

for img_id in result_ids[:10]:
    
    is_in_k += is_in_ks_for_one_image(img_id)
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

In [69]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k)[:, 0].sum() / len(is_in_k)

0.026

In [70]:
1 / 48

0.020833333333333332

Ничего не поменялось...

In [56]:
len(set(['a', 'a', 'a']))

1

**Окей, тогда выкинем ещё и все случаи, когда на картинке все объекты одного типа**

In [71]:
result_ids = []

i = 0

bad_count_before_100 = 0
bad_count_after_100 = 0

while len(result_ids) < 100:
    img_id = test_ids[i]
    
    img_osm_object = read_pickle(os.path.join(osm_objects_path, img_id + '.pickle'))
    tags = get_all_tags(img_osm_object)
    
    if len(set(tags)) < 2:
        print(f"{i}:, image: {img_id}, tags: {tags}")
        if i < 100:
            bad_count_before_100 += 1
        else:
            bad_count_after_100 += 1
    else:
        result_ids.append(img_id)
        
    i += 1

0:, image: 20327, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
2:, image: 3138, tags: []
3:, image: 7130, tags: ['wood', 'wood', 'wood']
4:, image: 10196, tags: ['peak', 'peak', 'peak', 'peak']
5:, image: 10798, tags: []
6:, image: 11093, tags: ['peak', 'peak']
8:, image: 15300, tags: []
10:, image: 11410, tags: []
11:, image: 16382, tags: ['peak', 'peak', 'peak', 'peak']
12:, image: 2041, tags: []
14:, image: 14119, tags: ['peak']
15:, image: 1006, tags: ['wood']
16:, image: 3800, tags: ['wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood']
18:, image: 16832, tags: ['peak', 'peak', 'peak', 'peak']
19:, image: 14651, tags: ['peak', 'peak', 'peak', 'pea

In [72]:
print(bad_count_before_100)
print(bad_count_after_100)

77
193


In [73]:
%%time
is_in_k_one_image = is_in_ks_for_one_image(result_ids[0])

CPU times: user 23 s, sys: 16 ms, total: 23 s
Wall time: 23 s


In [74]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k_one_image)[:, 0].sum() / len(is_in_k_one_image)

0.03

In [75]:
%%time
is_in_k = []

for img_id in result_ids[:10]:
    
    is_in_k += is_in_ks_for_one_image(img_id)
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

1000
CPU times: user 4min 35s, sys: 88 ms, total: 4min 35s
Wall time: 4min 35s


In [76]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k)[:, 0].sum() / len(is_in_k)

0.025

In [77]:
1 / 48

0.020833333333333332

Тоже с прошлой ситуации ничего особенно не поменялось.

**Хорошо, рассмотрим теперь случаи, когда на картинке не более N объектов, например, 5**

In [81]:
result_ids = []

i = 0

bad_count_before_100 = 0
bad_count_after_100 = 0

while len(result_ids) < 100:
    img_id = test_ids[i]
    
    img_osm_object = read_pickle(os.path.join(osm_objects_path, img_id + '.pickle'))
    tags = get_all_tags(img_osm_object)
    
    if len(set(tags)) < 2 or len(tags) > 5:
        print(f"{i}:, image: {img_id}, tags: {tags}")
        if i < 100:
            bad_count_before_100 += 1
        else:
            bad_count_after_100 += 1
    else:
        result_ids.append(img_id)
        
    i += 1

0:, image: 20327, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
1:, image: 9501, tags: ['water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'wood']
2:, image: 3138, tags: []
3:, image: 7130, tags: ['wood', 'wood', 'wood']
4:, image: 10196, tags: ['peak', 'peak', 'peak', 'peak']
5:, image: 10798, tags: []
6:, image: 11093, tags: ['peak', 'peak']
7:, image: 11980, tags: ['water', 'water', 'water', 'peak', 'peak', 'peak', 'peak', 'peak']
8:, image: 15300, tags: []
9:, image: 14346, tags: ['water', 'water', 'water', 'water', 'water', 'water', 'water', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', '

664:, image: 12912, tags: ['water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood']
665:, image: 16543, tags: ['water', 'water', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
666:, image: 14038, tags: ['water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'water', 'wood', 'wood']
668:, image: 8316, tags: ['water', 'water', 'water', 'water', 'water']
669:, image: 20428, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
670:, image: 743, tags: ['wood', 'wood', 'wood']
671:, image: 20589, tags: ['water', 'water', 'water']
672:, 

1219:, image: 4456, tags: ['water']
1220:, image: 6289, tags: ['water', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
1221:, image: 11632, tags: ['water', 'water', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood']
1222:, image: 8979, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak']
1223:, image: 18158, tags: ['peak', 'peak', 'peak', 'peak']
1224:, image: 11122, tags: ['water', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
1225:, image: 20046, tags: ['water', 'water', 'peak', 'peak', 'peak', 'peak']
1226:, image: 13167, tags: ['peak', 'peak']
1227:, image: 13690, tags: ['water']
1228:, image: 6670, tags: ['wood', 'wood']
1229:, image: 11044, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 

In [83]:
print(bad_count_before_100)
print(bad_count_after_100)

93
1060


In [84]:
%%time
is_in_k_one_image = is_in_ks_for_one_image(result_ids[0])

CPU times: user 28.1 s, sys: 44 ms, total: 28.2 s
Wall time: 28.2 s


In [85]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k_one_image)[:, 0].sum() / len(is_in_k_one_image)

0.03

In [88]:
%%time
is_in_k = []

for img_id in result_ids[:10]:
    
    try:
        is_in_k += is_in_ks_for_one_image(img_id)
    except Exception as e:
        print(e)
        continue
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

[Errno 2] No such file or directory: '../dataset/predictions/predicts_osm/21483.pickle'
[Errno 2] No such file or directory: '../dataset/predictions/predicts_osm/6157.pickle'
[Errno 2] No such file or directory: '../dataset/predictions/predicts_osm/5324.pickle'
CPU times: user 2min 44s, sys: 152 ms, total: 2min 45s
Wall time: 2min 45s


In [89]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k)[:, 0].sum() / len(is_in_k)

0.025714285714285714

In [90]:
result_ids = []

i = 0

bad_count_before_100 = 0
bad_count_after_100 = 0

while len(result_ids) < 100:
    img_id = test_ids[i]
    
    img_osm_object = read_pickle(os.path.join(osm_objects_path, img_id + '.pickle'))
    tags = get_all_tags(img_osm_object)
    
    if len(set(tags)) < 2 or len(tags) > 10:
        print(f"{i}:, image: {img_id}, tags: {tags}")
        if i < 100:
            bad_count_before_100 += 1
        else:
            bad_count_after_100 += 1
    else:
        result_ids.append(img_id)
        
    i += 1

0:, image: 20327, tags: ['peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak', 'peak']
2:, image: 3138, tags: []
3:, image: 7130, tags: ['wood', 'wood', 'wood']
4:, image: 10196, tags: ['peak', 'peak', 'peak', 'peak']
5:, image: 10798, tags: []
6:, image: 11093, tags: ['peak', 'peak']
8:, image: 15300, tags: []
9:, image: 14346, tags: ['water', 'water', 'water', 'water', 'water', 'water', 'water', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood', 'wood

In [91]:
print(bad_count_before_100)
print(bad_count_after_100)

86
438


In [92]:
%%time
is_in_k_one_image = is_in_ks_for_one_image(result_ids[0])

CPU times: user 23 s, sys: 28 ms, total: 23 s
Wall time: 23 s


In [93]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k_one_image)[:, 0].sum() / len(is_in_k_one_image)

0.03

In [94]:
%%time
is_in_k = []

for img_id in result_ids[:10]:
    
    try:
        is_in_k += is_in_ks_for_one_image(img_id)
    except Exception as e:
        print(e)
        continue
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

1000
CPU times: user 4min 4s, sys: 164 ms, total: 4min 4s
Wall time: 4min 4s


In [95]:
# точность на одной картинке (да и на всех так же можно посчитать будет)
np.array(is_in_k)[:, 0].sum() / len(is_in_k)

0.027

**Можно прогнаться на всём, но обратить внимание на картинки, на которых точность получается > 0.1. Посмотреть, что это за картинки**

In [97]:
%%time
is_in_k = []

good_imgs = []

for img_id in result_ids[:10]:
    
    try:
        cur_is_in_k =  is_in_ks_for_one_image(img_id)
        
        precision = np.array(cur_is_in_k)[:, 0].sum() / len(cur_is_in_k)
        if precision > 0.1:
            good_imgs.append(img_id)
            print(f"Precision {precision}, img_id: {img_id}")
        
        is_in_k += cur_is_in_k
    except Exception as e:
        print(e)
        continue
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

1000
CPU times: user 4min 4s, sys: 236 ms, total: 4min 4s
Wall time: 4min 4s


In [None]:
%%time
is_in_k = []

good_imgs = []

for img_id in test_ids:
    
    try:
        cur_is_in_k =  is_in_ks_for_one_image(img_id)
        
        precision = np.array(cur_is_in_k)[:, 0].sum() / len(cur_is_in_k)
        if precision >= 0.03:
            good_imgs.append(img_id)
            print(f"Precision {precision}, img_id: {img_id}")
        
        is_in_k += cur_is_in_k
    except Exception as e:
        print(e)
        continue
    
    if len(is_in_k) % 1000 == 0:
        print(len(is_in_k))
        

Precision 0.03, img_id: 9501
Precision 0.03, img_id: 3138
Precision 0.03, img_id: 10196
Precision 0.04, img_id: 10798
Precision 0.03, img_id: 11093
Precision 0.03, img_id: 11980
Precision 0.04, img_id: 14346
1000
Precision 0.03, img_id: 11410
Precision 0.03, img_id: 16382
Precision 0.03, img_id: 9437
Precision 0.03, img_id: 14119
Precision 0.04, img_id: 1006
Precision 0.03, img_id: 3800
Precision 0.03, img_id: 224
Precision 0.03, img_id: 16832
2000
Precision 0.03, img_id: 18649
Precision 0.03, img_id: 16577
Precision 0.03, img_id: 14357
Precision 0.03, img_id: 14705
Precision 0.03, img_id: 17506
Precision 0.03, img_id: 14406
Precision 0.06, img_id: 6482
Precision 0.03, img_id: 9463
3000
Precision 0.03, img_id: 14706
Precision 0.03, img_id: 3995
Precision 0.03, img_id: 18004
Precision 0.03, img_id: 4221
Precision 0.03, img_id: 21615
Precision 0.03, img_id: 2293
Precision 0.03, img_id: 11947
Precision 0.03, img_id: 16286
4000
Precision 0.03, img_id: 21799
Precision 0.03, img_id: 13603
Pr

**Взять хорошую картинку, разобраться, почему не работает (предлагаю 224.png)**