In [None]:
import numpy as np
import pandas as pd
from imutils.object_detection import non_max_suppression
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

In [None]:
sub0 = pd.read_csv('cnn_lr0005_beta96_submission.csv')
sub1 = pd.read_csv('cnn_lr0005_beta96_no_val_submission.csv')
sub2 = pd.read_csv('cnn_lr00035_submission.csv')

In [None]:
def split_string(x):
    if not pd.isna(x):
        return x.split(' ')

In [None]:
def parse_scores(x):
    if type(x) == list:
        scores = [x[k] for k in range(0,len(x),5)]
        for score in range(len(scores)):
            scores[score] = float(scores[score])
        return np.asarray(scores)

In [None]:
def parse_bbox(x):
    if not type(x) == float:
        bbox = [int(x[k]) for k in range(0,len(x)) if k%5 != 0]
        # Add x and y to w and h to fit nms format
        for n in range(2,len(bbox),4):
            bbox[n] += bbox[n-2]
            bbox[n+1] += bbox[n+1-2]
        return np.asarray(bbox).reshape(int(len(bbox)/4),4)

In [None]:
def parse_predictions(df):
    
    df.loc[:, 'PredictionString'] = df.loc[:, 'PredictionString'].str.strip()
    df.loc[:, 'PredictionString'] = df.loc[:, 'PredictionString'].map(split_string)
    df = df.fillna(np.nan)
    
    df['bbox_scores'] = df.loc[:,'PredictionString'].map(parse_scores)
    df = df.fillna(np.nan)
    df['bbox_preds'] = df.loc[:,'PredictionString'].map(parse_bbox)
    df = df.fillna(np.nan)

    df.loc[df['bbox_scores'].isnull(),['bbox_scores']] = df.loc[df['bbox_scores'].isnull(),'bbox_scores'].apply(lambda x: np.asarray([]))
    df.loc[df['bbox_preds'].isnull(),['bbox_preds']] = df.loc[df['bbox_preds'].isnull(),'bbox_preds'].apply(lambda x: np.asarray([]))

    return df

In [None]:
sub0.head(30)

In [None]:
sub1.head(30)

In [None]:
sub2.head(30)

In [None]:
sub0 = parse_predictions(sub0)
sub1 = parse_predictions(sub1)
sub2 = parse_predictions(sub2)

In [None]:
sub0.head(20)

In [None]:
sub1.head(20)

In [None]:
sub2.head(20)

In [None]:
def check_scores(x):    
    return x<0.6

In [None]:
def remove_under_threshold(df):
    for row in range(len(df)):
        if len(df['bbox_scores'].map(check_scores)[row]) != 0:
            delete_list = [elem for elem in np.where(df['bbox_scores'].map(check_scores)[row] == True)[0]]
            df.loc[row, 'bbox_scores'] = np.delete(df.loc[row, 'bbox_scores'], delete_list)
            df.loc[row, 'bbox_preds'] = np.delete(df.loc[row, 'bbox_preds'], delete_list, 0)
            
    return df

In [None]:
sub0 = remove_under_threshold(sub0)
sub1 = remove_under_threshold(sub1)
sub2 = remove_under_threshold(sub2)

In [None]:
sub0.head(20)

In [None]:
sub1.head(20)

In [None]:
sub2.head(20)

In [None]:
def nms_3_fold(row, df1, df2, df3, t=0.3):
    
    df_list = [df1.loc[row, 'bbox_preds'],
               df2.loc[row, 'bbox_preds'],
               df3.loc[row, 'bbox_preds']
               ]
    
    non_empty_df = [df for df in df_list if len(df) != 0]
    
    try:
        a = np.concatenate((non_empty_df), axis=0)
    except:
        return np.asarray([], dtype = np.float64)
    b = np.concatenate((df1.loc[row, 'bbox_scores'],
                        df2.loc[row, 'bbox_scores'],
                        df3.loc[row, 'bbox_scores'])
                        , axis=0)
    
    return non_max_suppression(a, b, overlapThresh=t)

In [None]:
nms = [nms_3_fold(n,sub0,sub1,sub2,t=.1) for n in range(len(sub0))]

In [None]:
nms

In [None]:
def parse_nms(preds):
    for x in preds:
        if len(x) != 0:
            for n in range(len(x)):
                for t in range(2,len(x[n])):
                    x[n][t] -= x[n][t-2]

In [None]:
parse_nms(nms)

In [None]:
nms

Convert arrays to string and parse after getting bbox dimensions in correct format

In [None]:
def nms_to_string(nms):
    nms_strings = [np.array2string(array, precision=2, separator=' ') for array in nms]
    for x in range(len(nms_strings)):
        if nms_strings[x] == '[]':
            nms_strings[x] = np.nan
        else:
            nms_strings[x] = nms_strings[x].replace(']','').replace('[','').replace('\n','').split(' ')
            nms_strings[x] = [string for string in nms_strings[x] if string is not '']
            print(nms_strings[x])
            for n in range(0,(len(nms_strings[x]) + int(len(nms_strings[x])/4)),5):
                nms_strings[x].insert(n, '0.99')
            print(nms_strings[x])
            nms_strings[x] = str(nms_strings[x]).replace(']','').replace('[','').replace("'",'').replace(',','').replace('  ',' ')
            print(nms_strings[x])
            
    return nms_strings

In [None]:
nms_strings = nms_to_string(nms)

In [None]:
df = sub0[['patientId','PredictionString']].copy()

In [None]:
df['PredictionString'] = nms_strings

In [None]:
df.head(30)

In [None]:
df.to_csv('nms_t0.6_submission.csv', index = False)