In [1]:
import pandas as pd   



In [2]:
# Functions
def GeneralEnsemble(dets, iou_thresh = 0.5, weights=None):
    assert(type(iou_thresh) == float)
    
    ndets = len(dets)
    
    if weights is None:
        w = 1/float(ndets)
        weights = [w]*ndets
    else:
        assert(len(weights) == ndets)
        
        s = sum(weights)
        for i in range(0, len(weights)):
            weights[i] /= s

    out = list()
    used = list()
    
    for idet in range(0,ndets):
        det = dets[idet]
        for box in det:
            if box in used:
                continue
                
            used.append(box)
            # Search the other detectors for overlapping box of same class
            found = []
            for iodet in range(0, ndets):
                odet = dets[iodet]
                
                if odet == det:
                    continue
                
                bestbox = None
                bestiou = iou_thresh
                for obox in odet:
                    if not obox in used:
                        # Not already used
                        if box[4] == obox[4]:
                            # Same class
                            iou = computeIOU(box, obox)
                            if iou > bestiou:
                                bestiou = iou
                                bestbox = obox
                                
                if not bestbox is None:
                    w = weights[iodet]
                    found.append((bestbox,w))
                    used.append(bestbox)
                            
            # Now we've gone through all other detectors
            if len(found) == 0:
                new_box = list(box)
                new_box[5] /= ndets
                out.append(new_box)
            else:
                allboxes = [(box, weights[idet])]
                allboxes.extend(found)
                
                xc = 0.0
                yc = 0.0
                bw = 0.0
                bh = 0.0
                conf = 0.0
                
                wsum = 0.0
                for bb in allboxes:
                    w = bb[1]
                    wsum += w

                    b = bb[0]
                    xc += w*b[0]
                    yc += w*b[1]
                    bw += w*b[2]
                    bh += w*b[3]
                    conf += w*b[5]
                    #print(f"conf = {conf}")
                
                xc /= wsum
                yc /= wsum
                bw /= wsum
                bh /= wsum    

                new_box = [xc, yc, bw, bh, box[4], conf]
                out.append(new_box)
    return out
    
def getCoords(box):
    x1 = float(box[0]) - float(box[2])/2
    x2 = float(box[0]) + float(box[2])/2
    y1 = float(box[1]) - float(box[3])/2
    y2 = float(box[1]) + float(box[3])/2
    return x1, x2, y1, y2
    
def computeIOU(box1, box2):
    x11, x12, y11, y12 = getCoords(box1)
    x21, x22, y21, y22 = getCoords(box2)
    
    x_left   = max(x11, x21)
    y_top    = max(y11, y21)
    x_right  = min(x12, x22)
    y_bottom = min(y12, y22)

    if x_right < x_left or y_bottom < y_top:
        return 0.0    
        
    intersect_area = (x_right - x_left) * (y_bottom - y_top)
    box1_area = (x12 - x11) * (y12 - y11)
    box2_area = (x22 - x21) * (y22 - y21)        
    
    iou = intersect_area / (box1_area + box2_area - intersect_area)
    return iou

# Function to transform the YOLOV5 output to the format the Ensemble function expects. 

def transform_object(df,tmp,flag):
    list_of_floats=[]
    for item in tmp:
        list_of_floats.append(float(item))
        
    tm=int(len(list_of_floats)/6)
    list1=[]
    list2=[]
    checkobj=[]
    for i in range(tm):
          
        j=i*6
        k=j
        l=k+6
            
        checkobj.append(k)
        xmin=list_of_floats[k+2]
        ymin=list_of_floats[k+3]
        xmax=list_of_floats[k+4]
        ymax=list_of_floats[k+5]
            
        box_w=xmax-xmin
        box_h=ymax-ymin
        box_x=xmin+(box_w/2)
        box_y=ymin+box_h/2
            
        list1=[box_x,box_y,box_w,box_h,int(list_of_floats[k]),list_of_floats[k+1]]
        
        list2.append(list1)        
        list1=[]
    if flag==0:
        return checkobj
    else:
        return list2
    
#https://www.kaggle.com/prashantkikani/vinbigdata-ensemble-post-processing?scriptVersionId=56245340

def divide(l, n):
    '''
    divide submission string into group of 6
    '''
    for i in range(0, len(l), n):  
        yield l[i:i + n]

In [3]:
#Read the output of the different object detection models 

df0 = pd.read_csv("pred/k-fold0-submission.csv")
    
df1 = pd.read_csv("pred/k-fold1-submission.csv")
    
df2 = pd.read_csv("pred/k-fold2-submission.csv")
    
df3 = pd.read_csv("pred/k-fold3-submission.csv")
    
df4 = pd.read_csv("pred/k-fold4-submission.csv")

#Lists for storing each frame of the files

final_list=[]
input_file_data0=[]
input_file_data1=[]
input_file_data2=[]
input_file_data3=[]
input_file_data4=[]

In [4]:
# Process the lists from each of the csvs frame by frame.

for i in range(len(df0)):    
    #File 0
    a= df0.iloc[i,:]
    tmp0=a[1]
    tmp0=tmp0.split()
    
    # File 1
    a1= df1.iloc[i,:]
    tmp1=a1[1]
    tmp1=tmp1.split()
            
    # File 2
    a2= df2.iloc[i,:]
    tmp2=a2[1]
    tmp2=tmp2.split()
        
    # File 3
    a3= df3.iloc[i,:]
    tmp3=a3[1]
    tmp3=tmp3.split()
        
    # File 4
    a4= df4.iloc[i,:]
    tmp4=a4[1]
    tmp4=tmp4.split()
        
    #Convert the frames into specific format of Ensemble function
        
    for j in range(5): 
        globals()['input_file_data%s' % j].append(transform_object(eval('df'+str(j)),eval('tmp'+str(j)),1))
              
    
    

In [5]:
#Apply the Ensemble function

from tqdm.notebook import tqdm

df = df0.copy()

for i in tqdm(range(len(df0))):
    final_list=[input_file_data0[i],input_file_data1[i],input_file_data2[i],input_file_data3[i],input_file_data4[i]]
    ens = GeneralEnsemble(final_list,iou_thresh = 0.4)
    lst = []
    for j in ens:
        lst.append(j[4])
        lst.append(j[5])
        lst.append(j[0] - j[2]/2)
        lst.append(j[1] - j[3]/2)
        lst.append(j[0] + j[2]/2)
        lst.append(j[1] + j[3]/2)
    df.iloc[i,1] = lst
 

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [6]:
df.head()

Unnamed: 0,file,PredictionString
0,8b44752d98acbc233b093260c0f2061d,"[14, 0.2, 0.0, 0.0, 1.0, 1.0, 11, 0.06, 1707.0..."
1,3ca907c03b1409d8a703591f6ca7882e,"[11, 0.12000000000000002, 619.5, 357.5, 923.5,..."
2,b0634373edac7074e2ab28a088ee0612,"[14, 0.2, 0.0, 0.0, 1.0, 1.0]"
3,af814787d788b29059ae3419a3b38dbd,"[14, 0.2, 0.0, 0.0, 1.0, 1.0, 9, 0.02, 153.0, ..."
4,fe5c2e4756820873cd6e8332ad4c2e12,"[14, 0.2, 0.0, 0.0, 1.0, 1.0, 11, 0.02, 999.0,..."


In [9]:
for index, row in df.iterrows():
    if row["PredictionString"] == [14, 0.2, 0.0, 0.0, 1.0, 1.0]:
        row["PredictionString"] = [14, 1, 0, 0, 1, 1]

In [10]:
for index, row in df.iterrows():
    row["PredictionString"] = ' '.join(str(e) for e in row["PredictionString"])

df

Unnamed: 0,file,PredictionString
0,8b44752d98acbc233b093260c0f2061d,1 4 0 . 2 0 . 0 0 . 0 1 . 0 1 . 0 ...
1,3ca907c03b1409d8a703591f6ca7882e,1 1 0 . 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 ...
2,b0634373edac7074e2ab28a088ee0612,1 4 0 0 0 1 1
3,af814787d788b29059ae3419a3b38dbd,1 4 0 . 2 0 . 0 0 . 0 1 . 0 1 . 0 ...
4,fe5c2e4756820873cd6e8332ad4c2e12,1 4 0 . 2 0 . 0 0 . 0 1 . 0 1 . 0 ...
...,...,...
2995,e33d9cf93e5e673950f7448dc79de500,1 4 0 . 2 0 . 0 0 . 0 1 . 0 1 . 0 ...
2996,6facd092dfec22384fd9446fd84c1ce3,3 0 . 7 4 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 ...
2997,b7c7ddcf253b397213116453bb085158,8 0 . 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 ...
2998,f592ac6b878d2b7929e3723347f2cfd4,1 4 0 . 2 0 . 0 0 . 0 1 . 0 1 . 0 ...


In [9]:


preds = df['PredictionString'].tolist()
grouped_preds = [list(divide(pred.split(), 6)) for pred in preds]
grouped_preds[:5]

[[['14', '0.2', '0.0', '0.0', '1.0', '1.0'],
  ['11', '0.06', '1707.0', '2034.0', '1773.0', '2122.0'],
  ['10', '0.06', '1707.0', '2031.0', '1773.0', '2122.0']],
 [['11', '0.12000000000000002', '619.5', '357.5', '923.5', '466.5'],
  ['11', '0.08', '651.5', '359.99999999999994', '944.0', '425.99999999999994'],
  ['11', '0.02', '1532.0', '359.0', '1783.0', '472.0'],
  ['3', '0.13999999999999999', '917.0', '1495.0', '1897.0', '1953.0'],
  ['14', '0.2', '0.0', '0.0', '1.0', '1.0'],
  ['0', '0.04', '1195.0', '772.0', '1441.0', '1081.0']],
 [['14', '1', '0', '0', '1', '1']],
 [['14', '0.2', '0.0', '0.0', '1.0', '1.0'],
  ['9', '0.02', '153.0', '205.0', '790.0', '1138.0'],
  ['3', '0.14', '626.0', '772.0', '1363.5', '1133.0']],
 [['14', '0.2', '0.0', '0.0', '1.0', '1.0'],
  ['11', '0.02', '999.0', '322.0', '1318.0', '395.0']]]

In [10]:
new_preds = []

for pred in grouped_preds:
    temp = ''
    # each box is a tuple of 6 i.e. (class, confidence, xmin, ymin, xmax, ymax)
    for box in pred:
        # if we found some bounding-box i.e. `len(pred) > 1` & class is "No finding".
        if len(pred) > 1 and box[0] == '14':
            # Make the probability 0.
            box[1] = '0'
        temp += ' '.join(box) + ' '
    new_preds.append(temp.strip())
    
new_preds[:5]

['14 0 0.0 0.0 1.0 1.0 11 0.06 1707.0 2034.0 1773.0 2122.0 10 0.06 1707.0 2031.0 1773.0 2122.0',
 '11 0.12000000000000002 619.5 357.5 923.5 466.5 11 0.08 651.5 359.99999999999994 944.0 425.99999999999994 11 0.02 1532.0 359.0 1783.0 472.0 3 0.13999999999999999 917.0 1495.0 1897.0 1953.0 14 0 0.0 0.0 1.0 1.0 0 0.04 1195.0 772.0 1441.0 1081.0',
 '14 1 0 0 1 1',
 '14 0 0.0 0.0 1.0 1.0 9 0.02 153.0 205.0 790.0 1138.0 3 0.14 626.0 772.0 1363.5 1133.0',
 '14 0 0.0 0.0 1.0 1.0 11 0.02 999.0 322.0 1318.0 395.0']

In [11]:
df['PredictionString'] = new_preds
df.head()

Unnamed: 0,file,PredictionString
0,8b44752d98acbc233b093260c0f2061d,14 0 0.0 0.0 1.0 1.0 11 0.06 1707.0 2034.0 177...
1,3ca907c03b1409d8a703591f6ca7882e,11 0.12000000000000002 619.5 357.5 923.5 466.5...
2,b0634373edac7074e2ab28a088ee0612,14 1 0 0 1 1
3,af814787d788b29059ae3419a3b38dbd,14 0 0.0 0.0 1.0 1.0 9 0.02 153.0 205.0 790.0 ...
4,fe5c2e4756820873cd6e8332ad4c2e12,14 0 0.0 0.0 1.0 1.0 11 0.02 999.0 322.0 1318....


In [12]:
df.to_csv('submission.csv', index=False)