In [37]:
import os
import sys

import numpy as np
import pandas as pd

import torch.nn.functional as F

In [22]:
def do_kaggle_metric(predict, truth, threshold=0.5):

    N = len(predict)
    predict = predict.reshape(N, -1)
    truth   = truth.reshape(N, -1)

    predict = predict > threshold
    truth   = truth > 0.5
    intersection = truth & predict
    union        = truth | predict
    iou = intersection.sum(1) / (union.sum(1) + 1e-8)

    #-------------------------------------------
    result = []
    precision = []
    is_empty_truth   = (truth.sum(1) == 0)
    is_empty_predict = (predict.sum(1) == 0)

    threshold = np.array([0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95])
    for t in threshold:
        p = iou >= t

        tp  = (~is_empty_truth)  & (~is_empty_predict) & (iou > t)
        fp  = (~is_empty_truth)  & (~is_empty_predict) & (iou <= t)
        fn  = (~is_empty_truth)  & ( is_empty_predict)
        fp_empty = ( is_empty_truth)  & (~is_empty_predict)
        tn_empty = ( is_empty_truth)  & ( is_empty_predict)

        p = (tp + tn_empty) / (tp + tn_empty + fp + fp_empty + fn)

        result.append( np.column_stack((tp,fp,fn,tn_empty,fp_empty)) )
        precision.append(p)

    result = np.array(result).transpose(1, 2, 0)
    precision = np.column_stack(precision)
    precision = precision.mean(1)

    return precision, result, threshold


In [18]:
predicts = []
truths = []
for i in range(4):
    predicts.append(np.ones((256, 1600, 4)))
    truths.append(np.zeros((256, 1600, 4)))

In [19]:
predicts[0].shape

(256, 1600, 4)

In [20]:
predicts = np.concatenate(predicts).squeeze()
truths = np.concatenate(truths).squeeze()

In [21]:
predicts.shape

(1024, 1600, 4)

In [23]:
precision, _, _ = do_kaggle_metric(predicts, truths, 0.5)
precision = precision.mean()

In [24]:
precision

0.0

In [3]:
train = pd.read_csv('../input/train.csv')

In [4]:
train.head()

Unnamed: 0,ImageId_ClassId,EncodedPixels
0,0002cc93b.jpg_1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0002cc93b.jpg_2,
2,0002cc93b.jpg_3,
3,0002cc93b.jpg_4,
4,00031f466.jpg_1,


In [38]:
def make_split_label(x):
    if x['class_count'] == 0:
        return 0
    if x['class_count'] <= 2:
        return 5
    if str(x['1']) != 'nan':
        return 1
    if str(x['2']) != 'nan':
        return 2
    if str(x['3']) != 'nan':
        return 3
    if str(x['4']) != 'nan':
        return 4

steel_df = pd.read_csv(os.path.join('..', 'input', 'train.csv'))
steel_df['ImageId'], steel_df['ClassId'] = zip(*steel_df['ImageId_ClassId'].apply(lambda x: x.split('_')))
steel_df = pd.pivot_table(steel_df, index='ImageId', columns='ClassId', values='EncodedPixels', aggfunc=lambda x: x, dropna=False)
steel_df = steel_df.reset_index()
steel_df.columns = [str(i) for i in steel_df.columns.values]
steel_df['class_count'] = steel_df[['1', '2', '3', '4']].count(axis=1)
steel_df['split_label'] = steel_df[['1', '2', '3', '4', 'class_count']].apply(lambda x: make_split_label(x), axis=1)

In [12]:
steel_df.head()

Unnamed: 0,ImageId,1,2,3,4,class_count,split_label
0,0002cc93b.jpg,29102 12 29346 24 29602 24 29858 24 30114 24 3...,,,,1,5
1,00031f466.jpg,,,,,0,0
2,000418bfc.jpg,,,,,0,0
3,000789191.jpg,,,,,0,0
4,0007a71bf.jpg,,,18661 28 18863 82 19091 110 19347 110 19603 11...,,1,5


In [18]:
steel_df['class_count'].value_counts()

1    6239
0    5902
2     425
3       2
Name: class_count, dtype: int64

In [20]:
def run_length_decode(rle, height=256, width=1600, fill_value=1):
    mask = np.zeros((height, width), np.float32)
    if rle != '':
        mask=mask.reshape(-1)
        r = [int(r) for r in rle.split(' ')]
        r = np.array(r).reshape(-1, 2)
        for start,length in r:
            start = start-1  #???? 0 or 1 index ???
            mask[start:(start + length)] = fill_value
        mask=mask.reshape(width, height).T
    return mask

In [22]:
mask1 = run_length_decode('29102 12 29346 24 29602 24 29858 24 30114 24 30370 24 30626 24 30882 24 31139 23 31395 23 31651 23 31907 23 32163 23 32419 23 32675 23 77918 27 78174 55 78429 60 78685 64 78941 68 79197 72 79452 77 79708 81 79964 85 80220 89 80475 94 80731 98 80987 102 81242 105 81498 105 81754 104 82010 104 82265 105 82521 31 82556 69 82779 27 82818 63 83038 22 83080 57 83297 17 83342 50 83555 13 83604 44 83814 8 83866 37 84073 3 84128 31 84390 25 84652 18 84918 8 85239 10 85476 29 85714 47 85960 57 86216 57 86471 58 86727 58 86983 58 87238 59 87494 59 87750 59 88005 60 88261 60 88517 60 88772 61 89028 53 89283 40 89539 32 89667 10 89795 30 89923 28 90050 29 90179 37 90306 27 90434 38 90562 14 90690 38 90817 9 90946 38 91073 3 91202 38 91458 38 91714 38 91969 39 92225 39 92481 39 92737 39 92993 39 93248 40 93504 40 93760 40 94026 30 94302 10 189792 7 190034 21 190283 28 190539 28 190795 28 191051 28 191307 28 191563 28 191819 28 192075 28 192331 28 192587 28 192843 23 193099 14 193355 5')

In [25]:
mask2 = run_length_decode('')

In [26]:
mask3 = run_length_decode('')

In [27]:
mask4 = run_length_decode('')

In [29]:
mask = np.zeros((256, 1600, 4))

In [30]:
mask[:, :, 0] = mask1
mask[:, :, 1] = mask2
mask[:, :, 2] = mask3
mask[:, :, 3] = mask4

In [35]:
label = (mask.reshape(4, -1).sum(1) > 8).astype(np.int32)

In [36]:
label

array([0, 1, 1, 1], dtype=int32)

In [39]:
steel_df.head()

Unnamed: 0,ImageId,1,2,3,4,class_count,split_label
0,0002cc93b.jpg,29102 12 29346 24 29602 24 29858 24 30114 24 3...,,,,1,5
1,00031f466.jpg,,,,,0,0
2,000418bfc.jpg,,,,,0,0
3,000789191.jpg,,,,,0,0
4,0007a71bf.jpg,,,18661 28 18863 82 19091 110 19347 110 19603 11...,,1,5


In [44]:
def make_label(x):
    if x == 0:
        return np.array([0, 0, 0, 0, 0])
    elif x == 1:
        return np.array([1, 0, 0, 0, 0])
    elif x == 2:
        return np.array([0, 1, 0, 0, 0])
    elif x == 3:
        return np.array([0, 0, 1, 0, 0])
    elif x == 4:
        return np.array([0, 0, 0, 1, 0])
    elif x == 5:
        return np.array([0, 0, 0, 0, 1])

In [45]:
steel_df['label'] = steel_df['split_label'].apply(lambda x: make_label(x))

In [46]:
steel_df.head()

Unnamed: 0,ImageId,1,2,3,4,class_count,split_label,label
0,0002cc93b.jpg,29102 12 29346 24 29602 24 29858 24 30114 24 3...,,,,1,5,"[0, 0, 0, 0, 1]"
1,00031f466.jpg,,,,,0,0,"[0, 0, 0, 0, 0]"
2,000418bfc.jpg,,,,,0,0,"[0, 0, 0, 0, 0]"
3,000789191.jpg,,,,,0,0,"[0, 0, 0, 0, 0]"
4,0007a71bf.jpg,,,18661 28 18863 82 19091 110 19347 110 19603 11...,,1,5,"[0, 0, 0, 0, 1]"


KeyError: 'Traceback (most recent call last):\n  File "/root/Severstal/py2/datasets/stage1_datasets.py", line 51, in __getitem__
label = np.array(self.label[index])\n  File "/opt/conda/lib/python3.6/site-packages/pandas/core/series.py", line 767, in __getitem__
result = self.index.get_value(self, key)\n  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3118, in get_value
tz=getattr(series.dtype, \'tz\', None))\n  File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value\n  File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value\n  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc\n  File "pandas/_libs/hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item\n  File "pandas/_libs/hashtable_class_helper.pxi", line 964, in pandas._libs.hashtable.Int64HashTable.get_item\nKeyError: 1\n\nDuring handling of the above exception, another exception occurred:
Traceback (most recent call last):  
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])\n  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>\n    samples = collate_fn([dataset[i] for i in batch_indices])
File "/root/Severstal/py2/datasets/stage1_datasets.py", line 74, in __getitem__
label = np.array(self.label[index])\n  File "/opt/conda/lib/python3.6/site-packages/pandas/core/series.py", line 767, in __getitem__
result = self.index.get_value(self, key)\n  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3118, in get_value\n    tz=getattr(series.dtype, \'tz\', None))
File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value\n  File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 964, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 1