In [1]:
!pip install -q tensorflow-io
!pip install ensemble-boxes

Collecting ensemble-boxes
  Downloading ensemble_boxes-1.0.4-py3-none-any.whl (14 kB)
Installing collected packages: ensemble-boxes
Successfully installed ensemble-boxes-1.0.4


In [2]:
import pandas as pd
import numpy as np
import os
import cv2
from tqdm import tqdm

import tensorflow as tf
import tensorflow_io as tfio
from ensemble_boxes import *

import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df=pd.read_csv(r'../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
df=df[df['class_id']!=14]
df["class_id"] = df["class_id"] + 1 # Incrementing by 1

images=df.image_id.unique()

print(f'Total records: {len(df)}')
print(f'Number of images: {len(images)}')

Total records: 36096
Number of images: 4394


In [4]:
path=r'../input/vinbigdata-chest-xray-abnormalities-detection/train/'

for i in tqdm(range(len(images))):

    image_bytes = tf.io.read_file(path+images[i]+'.dicom')
    img = tfio.image.decode_dicom_image(image_bytes, dtype = tf.uint16)
    
    img = tf.squeeze(img, axis = 0)

    h,w,_ = img.shape

    tmp=df[df['image_id']==images[i]]

    tmp[["x_min", "x_max"]] = tmp[["x_min", "x_max"]]/w
    tmp[["y_min", "y_max"]] = tmp[["y_min", "y_max"]]/h

    boxes_list = tmp[["x_min", "y_min", "x_max", "y_max"]].values.tolist()
    scores_list = [1]*len(boxes_list)
    labels_list = list(tmp["class_id"])

    # Applying WBF
    boxes, _, labels = weighted_boxes_fusion(boxes_list = [boxes_list],
                                             scores_list = [scores_list],
                                             labels_list = [labels_list],
                                             weights = None, 
                                             iou_thr = 0.3, 
                                             skip_box_thr = 0.0001)
    
    tmp_reduced = pd.DataFrame(boxes, columns = ["x_min", "y_min", "x_max", "y_max"])
    tmp_reduced[["x_min", "x_max"]] = tmp_reduced[["x_min", "x_max"]]*w
    tmp_reduced[["y_min", "y_max"]] = tmp_reduced[["y_min", "y_max"]]*h
    tmp_reduced['image_id']=images[i]
    tmp_reduced['height']=h
    tmp_reduced['width']=w
    tmp_label = pd.DataFrame(labels, columns = ["class_id"])
    
    tmp_reduced = pd.concat([tmp_reduced,tmp_label],axis=1)

    if "df_reduce" in globals():
        df_reduce=pd.concat([df_reduce,tmp_reduced],axis=0)
    else:
        df_reduce=tmp_reduced.copy(deep=True)

df_reduce = df_reduce.reset_index(drop=True)

df_reduce = df_reduce.astype({"x_min": int, 
                              "y_min": int, 
                              "x_max": int, 
                              "y_max": int,
                              "height": int,
                              "width": int,
                              "class_id": str})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
100%|██████████| 4394/4394 [2:04:20<00:00,  1.70s/it]  


In [6]:
print(df_reduce.shape)

(21836, 8)


In [7]:
df_reduce.to_csv("train.csv", index=False)

In [12]:
# Creating LabelMap for tensorflow
LabelMap = df[['class_id','class_name']]
LabelMap = LabelMap.drop_duplicates().reset_index(drop = True)
LabelMap.sort_values(by='class_id',inplace=True)

#Generate label map
label_map_path=r'/kaggle/working/'
def label_map(data):
    msg = ''
    
    for i in range(len(data)):
        msg = msg + "item {\n"
        msg = msg + " id: " + str(data.iloc[i,0]) + "\n"
        msg = msg + " name: '" + data.iloc[i,1] + "'\n}\n\n"
    return msg[:-1]

Label_Map = label_map(LabelMap)

with open(label_map_path + "label_map.pbtxt", "w") as f:
    f.write(Label_Map)
    f.close()