In [1]:
#|default_exp process

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

# Install and load all libraries

In [3]:
#|export
from fastai.vision.all import *
from skimage.measure import label,regionprops,find_contours
from evalutils import DetectionAlgorithm
from evalutils.validators import UniquePathIndicesValidator,DataFrameValidator
from evalutils.exceptions import ValidationError
import json, random, SimpleITK, gc, cv2
from typing import Tuple, Dict
from pandas import DataFrame
import os

In [4]:
from nbdev.export import nb_export

In [5]:
#|export
def splitter(df):
    train = df.index[~df['valid']].to_list()
    valid = df.index[df['valid']].to_list()
    return train, valid

In [6]:
#|export
vocab=L([['bipolar_dissector', 'bipolar_forceps', 'blank', 'cadiere_forceps', 'clip_applier', 'force_bipolar', 'grasping_retractor', 'monopolar_curved_scissor', 'nan', 'needle_driver', 'permanent_cautery_hook_spatula', 'prograsp_forceps', 'stapler', 'suction_irrigator', 'tip_up_fenestrated_grasper', 'vessel_sealer'],['bipolar_dissector', 'bipolar_forceps', 'blank', 'cadiere_forceps', 'clip_applier', 'force_bipolar', 'grasping_retractor', 'monopolar_curved_scissor', 'nan', 'needle_driver', 'permanent_cautery_hook_spatula', 'prograsp_forceps', 'stapler', 'suction_irrigator', 'tip_up_fenestrated_grasper', 'vessel_sealer'],['bipolar_dissector', 'bipolar_forceps', 'blank', 'cadiere_forceps', 'clip_applier', 'force_bipolar', 'grasping_retractor', 'monopolar_curved_scissor', 'nan', 'needle_driver', 'permanent_cautery_hook_spatula', 'prograsp_forceps', 'stapler', 'suction_irrigator', 'tip_up_fenestrated_grasper', 'vessel_sealer'],['bipolar_dissector', 'bipolar_forceps', 'blank', 'cadiere_forceps', 'clip_applier', 'force_bipolar', 'grasping_retractor', 'monopolar_curved_scissor', 'nan', 'needle_driver', 'permanent_cautery_hook_spatula', 'prograsp_forceps', 'stapler', 'suction_irrigator', 'tip_up_fenestrated_grasper', 'vessel_sealer']])

In [7]:
#|export
c=L([len(v) for v in vocab])

In [8]:
#|export
def cfg (i): return c[:i].sum()

In [9]:
#|export
# defining error rate for each robotic hand tools
def usm1_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs): return error_rate(preds[:,:cfg(1)], usm1_targs)
def usm2_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs): return error_rate(preds[:,cfg(1):cfg(2)], usm2_targs)
def usm3_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs): return error_rate(preds[:,cfg(2):cfg(3)], usm3_targs)
def usm4_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs): return error_rate(preds[:,cfg(3):cfg(4)], usm4_targs)

In [10]:
#|export
# defining combined error rate 
def combo_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs): 
    return usm1_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs)+usm2_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs)+usm3_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs)+usm4_err(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs)

In [11]:
#|export
# defining error rate for each robotic hand tools for raw preds from the learner 
def usm1_err_raw(preds,targs): return error_rate(preds[:,:cfg(1)].softmax(dim=1).argmax(dim=1), targs)
def usm2_err_raw(preds,targs): return error_rate(preds[:,cfg(1):cfg(2)].softmax(dim=1).argmax(dim=1), targs)
def usm3_err_raw(preds,targs): return error_rate(preds[:,cfg(2):cfg(3)].softmax(dim=1).argmax(dim=1), targs)
def usm4_err_raw(preds,targs): return error_rate(preds[:,cfg(3):cfg(4)].softmax(dim=1).argmax(dim=1), targs)

In [12]:
#|export
# defining loss function for each robotic hand tools
def usm1_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs): return CrossEntropyLossFlat(reduction='mean')(preds[:,:cfg(1)], usm1_targs,**kwargs)
def usm2_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs): return CrossEntropyLossFlat(reduction='mean')(preds[:,cfg(1):cfg(2)], usm2_targs,**kwargs)
def usm3_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs): return CrossEntropyLossFlat(reduction='mean')(preds[:,cfg(2):cfg(3)], usm3_targs,**kwargs)
def usm4_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs): return CrossEntropyLossFlat(reduction='mean')(preds[:,cfg(3):cfg(4)], usm4_targs,**kwargs)

In [13]:
#|export
# defining combined loss
def combo_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs): 
    return usm1_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs)+usm2_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs)+usm3_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs)+usm4_loss(preds,usm1_targs,usm2_targs,usm3_targs,usm4_targs,**kwargs)

In [14]:
#|export
# configuring metrics and loss for learner
metrics_cfg = [usm1_loss,usm2_loss,usm3_loss,usm4_loss,usm1_err,usm2_err,usm3_err,usm4_err, combo_err]

In [15]:
#|export
# error rate fns for inference and validation
def usm_err_raw(preds,targs): return error_rate(preds, targs)
def combo_err_raw(preds, targs): 
    return usm_err_raw(preds[:,:cfg(1)].softmax(dim=1),targs[0])+usm_err_raw(preds[:,cfg(1):cfg(2)].softmax(dim=1),targs[1])+usm_err_raw(preds[:,cfg(2):cfg(3)].softmax(dim=1),targs[2])+usm_err_raw(preds[:,cfg(3):cfg(4)].softmax(dim=1),targs[3])

In [16]:
#|export 
def get_image_mask(fn):
    f=Path(str(fn).replace('images', 'masks').replace('jpg','png'))
    return PILMask.create(f) 
    

In [17]:
#|export
def custom_accuracy(inp, targ):
    targ = targ.squeeze(1)
    return (inp.argmax(dim=1)==targ).float().mean()

In [18]:
#|export
execute_in_docker = False

In [19]:
#|export
class VideoLoader():
    def load(self, *, fname):
        path = Path(fname)
        print(path)
        if not path.is_file():
            raise IOError(f"Could not load {fname} using {self.__class__.__qualname__}.")
        return [{"path": fname}]

    # only path valid
    def hash_video(self, input_video):
        pass

In [20]:
#|export
class UniqueVideoValidator(DataFrameValidator):
    """
    Validates that each video in the set is unique
    """

    def validate(self, *, df: DataFrame):
        try:
            hashes = df["video"]
        except KeyError:
            raise ValidationError("Column `video` not found in DataFrame.")

        if len(set(hashes)) != len(hashes):
            raise ValidationError("The videos are not unique, please submit a unique video for each case.")

In [21]:
v=VideoLoader()
v.load(fname='./test/input/vid_1_short.mp4')

test/input/vid_1_short.mp4


[{'path': './test/input/vid_1_short.mp4'}]

In [22]:
#|export
class Surgtoolloc_det(DetectionAlgorithm):
    def __init__(self):
        super().__init__(
            index_key='input_video',
            file_loaders={'input_video': VideoLoader()},
            input_path=Path("/input/") if execute_in_docker else Path("./test/input/"),
            output_file=Path("/output/surgical-tool-presence.json") if execute_in_docker else Path(
                "./test/output/surgical-tool-presence.json"),
            validators=dict(input_video=(UniquePathIndicesValidator(),)),
        )
        
        # loading ensemble learner
        ensem_path=Path('/opt/algorithm/cls') if execute_in_docker else Path("test/algorithm/cls")
        segmen_path=Path('/opt/algorithm/seg') if execute_in_docker else Path("./test/algorithm/seg")
        self.ensem_learner=[load_learner(m, cpu=False) for m in ensem_path.ls() if m.suffix=='.pkl']
        self.crop_learner=load_learner(segmen_path/'seg_v1.pkl', cpu=False)
        self.codes = ["Background", "Foreground"]

        self.tool_list = ["needle_driver",
                          "monopolar_curved_scissor",
                          "force_bipolar",
                          "clip_applier",
                          "tip_up_fenestrated_grasper",
                          "cadiere_forceps",
                          "bipolar_forceps",
                          "vessel_sealer",
                          "suction_irrigator",
                          "bipolar_dissector",
                          "prograsp_forceps",
                          "stapler",
                          "permanent_cautery_hook_spatula",
                          "grasping_retractor"]


    def crop_images(self, src):
        fs=get_image_files(src)
        preds,_ = self.crop_learner.get_preds(dl=self.crop_learner.dls.test_dl(fs))
        for p, f in zip(preds,self.crop_learner.dl.items):

            fn = f.name

            im=PILImage.create(f)
            (h,w)=im.shape
            mask=PILMask.create((np.array(p.argmax(0))*255).astype(np.uint8))
            mask=Resize((h,w), ResizeMethod.Squish) (mask)

            lbl = label(np.array(mask))
            props = regionprops(lbl)
            x1,y1,x2,y2=props[0].bbox[0],props[0].bbox[2],props[0].bbox[1],props[0].bbox[3]

            im_c = PILImage.create(np.array(im)[x1:y1,x2:y2])
            im_c.save(src/fn)
    
    def extract_images(self, video_file):     
    
        # start the loop
        count = 0
        src=Path(self._input_path)
        
        for i in get_image_files(src): os.remove(i) 
        
        # read the video file    
        cap = cv2.VideoCapture(str(src/video_file))
        
        while True:
            is_read, f = cap.read()
            if not is_read:
                # break out of the loop if there are no frames to read
                break
            name = str(src/f'{count}.jpg')
            cv2.imwrite(name,f)
            count+=1
        cap.release()

    
    def tool_detection_model_output(self):
        
        random_tool_predictions = [random.randint(0, len(self.tool_list) - 1), random.randint(0, len(self.tool_list) - 1)]

        return [self.tool_list[random_tool_predictions[0]], self.tool_list[random_tool_predictions[1]]]

    def tool_detect_json_sample(self):
        # single output dict
        slice_dict = {"slice_nr": 1}
        tool_boolean_dict = {i: False for i in self.tool_list}

        single_output_dict = {**slice_dict, **tool_boolean_dict}

        return single_output_dict

    def process_case(self, *, idx, case):

        # Input video would return the collection of all frames (cap object)
        input_video_file_path = case #VideoLoader.load(case)
        # Detect and score candidates
        scored_candidates = self.predict(case.path) #video file > load evalutils.py

        # return
        # Write resulting candidates to result.json for this case
        return scored_candidates

    def save(self):
        with open(str(self._output_file), "w") as f:
            json.dump(self._case_results[0], f)


    def predict(self, fname) -> Dict:
        """
        Inputs:
        fname -> video file path
        
        Output:
        tools -> list of prediction dictionaries (per frame) in the correct format as described in documentation 
        """
        
        print('Loading, extracting and cropping video file: ' + str(fname))
        self.extract_images(fname)
        self.crop_images(self._input_path)

        fs=get_image_files(self._input_path)
        
        num_frames = len(fs)
        
        ###                                                                     ###
        ###  TODO: adapt the following part for YOUR submission: make prediction
        ###                                                                     ###
        
        print(num_frames)

        # generate output json
        all_frames_predicted_outputs = []
        all_undefined_tools=[]
        
        tta_res=[]
        prs_items=[]
        for learn in self.ensem_learner:
            tta_res.append(learn.tta(dl=learn.dls.test_dl(fs)))
            if len(prs_items)<1:
                prs_items=learn.dl.items

        tta_prs=first(zip(*tta_res))
        tta_prs+=tta_prs[:1]
        tta_prs=torch.stack(tta_prs)

        lbls=[]
        for i in range(len(c)):
            arm_preds = tta_prs[:,:,cfg(i):cfg(i+1)].mean(0);
            arm_idxs = arm_preds.argmax(dim=1)
            arm_vocab = np.array(vocab[i])
            lbls.append(arm_vocab[arm_idxs])

        for usm1,usm2,usm3,usm4,f in zip(lbls[0],lbls[1],lbls[2],lbls[3],prs_items):
            frame_dict=self.tool_detect_json_sample()
            frame_dict['slice_nr']=int(f.stem)
            frame_dict[usm1]=True if usm1 in frame_dict.keys() else all_undefined_tools.append(usm1)
            frame_dict[usm2]=True if usm2 in frame_dict.keys() else all_undefined_tools.append(usm2)
            frame_dict[usm3]=True if usm3 in frame_dict.keys() else all_undefined_tools.append(usm3)
            frame_dict[usm4]=True if usm4 in frame_dict.keys() else all_undefined_tools.append(usm4)
            frame_dict.pop("nan", None)
            frame_dict.pop("blank", None)
            frame_dict.pop("out_of_view", None)
            all_frames_predicted_outputs.append(frame_dict) 

        print(f'List of undefined tools: {set(all_undefined_tools)}.')
        tools=sorted(all_frames_predicted_outputs, key=lambda d: d['slice_nr']) 

        return tools

In [23]:
%time pred_json=Surgtoolloc_det().predict('vid_1_short.mp4')

Loading, extracting and cropping video file: vid_1_short.mp4


60


List of undefined tools: {'nan'}.
CPU times: user 2min 9s, sys: 47.7 s, total: 2min 56s
Wall time: 1min 45s


In [24]:
pred_json

[{'slice_nr': 0,
  'needle_driver': False,
  'monopolar_curved_scissor': True,
  'force_bipolar': False,
  'clip_applier': False,
  'tip_up_fenestrated_grasper': False,
  'cadiere_forceps': True,
  'bipolar_forceps': True,
  'vessel_sealer': False,
  'suction_irrigator': False,
  'bipolar_dissector': False,
  'prograsp_forceps': False,
  'stapler': False,
  'permanent_cautery_hook_spatula': False,
  'grasping_retractor': False},
 {'slice_nr': 1,
  'needle_driver': False,
  'monopolar_curved_scissor': True,
  'force_bipolar': False,
  'clip_applier': False,
  'tip_up_fenestrated_grasper': False,
  'cadiere_forceps': True,
  'bipolar_forceps': True,
  'vessel_sealer': False,
  'suction_irrigator': False,
  'bipolar_dissector': False,
  'prograsp_forceps': False,
  'stapler': False,
  'permanent_cautery_hook_spatula': False,
  'grasping_retractor': False},
 {'slice_nr': 2,
  'needle_driver': False,
  'monopolar_curved_scissor': True,
  'force_bipolar': False,
  'clip_applier': False,
  't

In [None]:
#|export
if __name__ == "__main__":
    Surgtoolloc_det().process()

In [25]:
nb_export('09_inference.ipynb', '.')