# <center> Tool Usage Evaluation on Test Set Videos </center>

In [1]:
import cv2
from tqdm.notebook import tqdm
import sys
import os
sys.path.append('./yolov7')
import yolov7
from load_model import load_model
from smooth_results import SmoothedVideo
import pandas as pd
import json
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Create Tool Usage Dict
with open('.//HW1_dataset//tool_usage//tools.txt', 'r') as file:
  # Read the contents of the file
  contents = file.read()

# Strip the leading "tool_usage =" and trailing newlines from the string
contents = contents.strip().lstrip('tool_usage =')
tool_usage_dict = json.loads(contents)
tool_usage_dict = {v: k.capitalize() for k, v in tool_usage_dict.items()}
tool_usage_dict['T0'] = 'Empty'

In [3]:
tool_usage_dict

{'T0': 'Empty', 'T1': 'Needle_driver', 'T2': 'Forceps', 'T3': 'Scissors'}

In [4]:
real_labels = {'Video Name': [], 'Min Frame': [], 'Max Frame': [], 'Hand': [], 'Class': []}
for video in os.listdir('./videos'):
    for hand in ['Right', 'Left']:
        with open(f'.//HW1_dataset//tool_usage//tools_{hand.lower()}//{video.split(".")[0]}.txt', 'r') as file:
              contents = file.readlines()
        for row in contents:
            minimum, maximum, label = row.split(' ')
            label = label.replace('\n', '')
            real_labels['Video Name'].append(video)
            real_labels['Min Frame'].append(minimum)
            real_labels['Max Frame'].append(maximum)
            real_labels['Hand'].append(hand)
            real_labels['Class'].append(label)
            
real_labels = pd.DataFrame(real_labels)
real_labels['Min Frame'] = real_labels['Min Frame'].astype(int)
real_labels['Max Frame'] = real_labels['Max Frame'].astype(int)
real_labels

Unnamed: 0,Video Name,Min Frame,Max Frame,Hand,Class
0,P022_balloon1.wmv,0,251,Right,T0
1,P022_balloon1.wmv,252,2893,Right,T1
2,P022_balloon1.wmv,2894,2923,Right,T0
3,P022_balloon1.wmv,2924,3088,Right,T3
4,P022_balloon1.wmv,3089,3253,Right,T0
...,...,...,...,...,...
98,P026_tissue1.wmv,10289,11607,Left,T0
99,P026_tissue1.wmv,11608,13090,Left,T2
100,P026_tissue1.wmv,13091,13345,Left,T0
101,P026_tissue1.wmv,13346,13786,Left,T2


In [5]:
classes_names = {'Right_Scissors': 0, 'Left_Scissors': 1, 'Right_Needle_driver': 2, 'Left_Needle_driver': 3,
                 'Right_Forceps': 4, 'Left_Forceps': 5, 'Right_Empty': 6, 'Left_Empty': 7}

In [6]:
def get_video_tool_usage_predictions(model, video_path, desc, smooth_thres):
    
    tools_usages = []
    sv = SmoothedVideo(model, smooth_thres=smooth_thres)
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)   
    property_id = int(cv2.CAP_PROP_FRAME_COUNT)
    length = int(cv2.VideoCapture.get(cap, property_id))

    # Check if the video file is opened successfully
    if not cap.isOpened():
        print('Error opening video file')

    # Read the frames of the video one by one
    for t in tqdm(range(length), desc=desc):
        
        # Read the next frame
        ret, frame = cap.read()

        # Check if the frame was read successfully
        if not ret:
            break
        
        im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        __, tool_usage = sv.make_smooth(im_rgb)
        tools_usages.append(tool_usage)

    # Release the video file
    cap.release()

    # Close all windows
    cv2.destroyAllWindows()
    
    return tools_usages

In [7]:
cfgs = ['./yolov7/cfg/training/yolov7-tiny-exp1.yaml',
        './yolov7/cfg/training/yolov7-tiny-exp1.yaml',
        './yolov7/cfg/training/yolov7-tiny-exp1.yaml']
weights = ['./yolov7/runs/train/exp/weights/best.pt',
          './yolov7/runs/train/exp2/weights/best.pt',
          './yolov7/runs/train/exp3/weights/best.pt']
conf = [0.95, 0.87, 0.87]

In [8]:
experiments = []

for m in range(3):
    model = load_model(cfgs[m], weights[m])
    video_results = {'Video Name': [], 'Frame': [], 'Hand': [], 'Prediction': []}
    for video in os.listdir('./videos'):
        labels = get_video_tool_usage_predictions(model,'./videos/' + video, video, smooth_thres=conf[m])
        for i, label in enumerate(labels):
            for val in label:
                if val % 2 == 0:
                    video_results['Hand'].append('Right')
                else:
                    video_results['Hand'].append('Left')
                video_results['Video Name'].append(video)
                video_results['Frame'].append(i)
                video_results['Prediction'].append(int(val))
    tool_usage = pd.DataFrame(video_results)
    tool_usage['Real Class'] = 0
    for i in range(len(tool_usage)):
        real_class = real_labels[(real_labels['Video Name'] == tool_usage['Video Name'][i]) & 
                                 (real_labels['Min Frame'] <= tool_usage['Frame'][i]) & 
                                 (tool_usage['Frame'][i] <= real_labels['Max Frame']) &
                                 (real_labels['Hand'] == tool_usage['Hand'][i])]['Class'].item()
        tool_usage['Real Class'][i] = real_class
    tool_usage['Real Class'] = tool_usage['Real Class'].apply(lambda x: tool_usage_dict[x])
    tool_usage['Real Class'] = tool_usage['Hand'] + '_' + tool_usage['Real Class']
    tool_usage['Real Class'] = tool_usage['Real Class'].apply(lambda x: classes_names[x])
    experiments.append(tool_usage)

P022_balloon1.wmv:   0%|          | 0/7589 [00:00<?, ?it/s]

P023_tissue2.wmv:   0%|          | 0/11727 [00:00<?, ?it/s]

P024_balloon1.wmv:   0%|          | 0/6761 [00:00<?, ?it/s]

P025_tissue2.wmv:   0%|          | 0/4904 [00:00<?, ?it/s]

P026_tissue1.wmv:   0%|          | 0/14893 [00:00<?, ?it/s]

P022_balloon1.wmv:   0%|          | 0/7589 [00:00<?, ?it/s]

P023_tissue2.wmv:   0%|          | 0/11727 [00:00<?, ?it/s]

P024_balloon1.wmv:   0%|          | 0/6761 [00:00<?, ?it/s]

P025_tissue2.wmv:   0%|          | 0/4904 [00:00<?, ?it/s]

P026_tissue1.wmv:   0%|          | 0/14893 [00:00<?, ?it/s]

P022_balloon1.wmv:   0%|          | 0/7589 [00:00<?, ?it/s]

P023_tissue2.wmv:   0%|          | 0/11727 [00:00<?, ?it/s]

P024_balloon1.wmv:   0%|          | 0/6761 [00:00<?, ?it/s]

P025_tissue2.wmv:   0%|          | 0/4904 [00:00<?, ?it/s]

P026_tissue1.wmv:   0%|          | 0/14893 [00:00<?, ?it/s]

## Experiment 1

In [9]:
report = classification_report(experiments[0]['Real Class'], experiments[0]['Prediction'],output_dict=True)

In [10]:
pd.DataFrame(report).T

Unnamed: 0,precision,recall,f1-score,support
0,0.821858,0.809473,0.815618,3716.0
2,0.975368,0.991177,0.983209,39671.0
3,0.0,0.0,0.0,0.0
5,0.959742,0.898953,0.928353,15567.0
6,0.696613,0.54589,0.61211,2713.0
7,0.95195,0.980021,0.965782,30081.0
accuracy,0.951345,0.951345,0.951345,0.951345
macro avg,0.734255,0.704252,0.717512,91748.0
weighted avg,0.950579,0.951345,0.950427,91748.0


In [None]:
## Experiment 2

In [11]:
from sklearn.metrics import classification_report
report = classification_report(experiments[1]['Real Class'], experiments[1]['Prediction'],output_dict=True)
pd.DataFrame(report).T

Unnamed: 0,precision,recall,f1-score,support
0,0.815816,0.675245,0.738904,3575.0
1,0.0,0.0,0.0,0.0
2,0.96917,0.977139,0.973138,39281.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
5,0.578776,0.84075,0.68559,15887.0
6,0.546969,0.59255,0.568848,2604.0
7,0.899203,0.6758,0.771658,30401.0
accuracy,0.830994,0.830994,0.830994,0.830994
macro avg,0.476242,0.470186,0.467267,91748.0


In [12]:
from sklearn.metrics import classification_report
report = classification_report(experiments[2]['Real Class'], experiments[2]['Prediction'],output_dict=True)
pd.DataFrame(report).T

Unnamed: 0,precision,recall,f1-score,support
0,0.802932,0.765591,0.783817,3720.0
1,0.0,0.0,0.0,0.0
2,0.970194,0.964644,0.967411,39682.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
5,0.603076,0.946949,0.736868,15570.0
6,0.565894,0.643786,0.602332,2768.0
7,0.960993,0.675686,0.793473,30008.0
accuracy,0.849381,0.849381,0.849381,0.849381
macro avg,0.487886,0.499582,0.485488,91748.0
