In [1]:
import os
import cv2
import shutil
import torch
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from statistics import mean
from datasets.UCEIS_dataset import UCEIS
from models import resnet
import matplotlib.pyplot as plt
import itertools
import numpy as np
from PIL import Image
from utils.provider import get_frame_annotation

**Prepare video**

In [2]:
# test videos:UC31-0.mp4, UC20-1.mp4, UC29-5.mp4, UC25-4.mp4

video_name = "UC12-5.mp4"
frame_root =  "../IBD/data_only_video_scores"
target_folder = "inference_results_on_videos"
target_path = os.path.join(target_folder, video_name)
target_video_path = os.path.join(target_path, "video/video.mp4")
video_path = os.path.join(frame_root, video_name, "video/video.mp4")
true_score=int(video_name.split('-')[1].split('.')[0])

if os.path.isdir(target_path):
    shutil.rmtree(target_path)
os.mkdir(target_path)
os.mkdir(os.path.join(target_path, "video"))
os.mkdir(os.path.join(target_path, "frames"))

video = cv2.VideoCapture(video_path)

frame_width = int(video.get(3)) 
frame_height = int(video.get(4))
size = (frame_width, frame_height)
fps = int(video.get(5))

print("size: "+str(size))
print("fps: "+str(fps))
print("target video path: "+ target_video_path)
# cv2.VideoWriter_fourcc(*'X264')
video_file = cv2.VideoWriter(target_video_path, cv2.VideoWriter_fourcc(*'h264'), fps, size)

size: (1920, 1080)
fps: 25
target video path: inference_results_on_videos/UC12-5.mp4/video/video.mp4


**Prepare model**

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device: ", device)

normalize = transforms.Normalize(mean=[0.348, 0.219, 0.184],
                                 std=[0.336, 0.219, 0.186])

test_transform = transforms.Compose([transforms.Resize((480,853)),
                                     transforms.ToTensor(),
                                     normalize])

state_dict_name = "weights/best_ResNet34.pth.tar"
model = resnet.resnet34(num_classes=[3, 4, 4])
model.load_state_dict(torch.load(state_dict_name, map_location=device))
model.to(device)
model.eval()

device:  cuda:0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

**Inference on video**

In [4]:
frame_count = 0
score_vascular = []
score_bleeding = []
score_erosion = []
score_UCEIS = []

while(video.isOpened()):
    ret, frame = video.read()
    if(not ret):
        break        
    
    frame_RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_RGB = Image.fromarray(np.uint8(frame_RGB))
    
    frame_transformed = test_transform(frame_RGB)
    frame_transformed = frame_transformed.to(device)   
    frame_transformed.unsqueeze_(0)
    output = model(frame_transformed)
    
    prediction_0 = output[0].argmax(dim=1, keepdim=True)[0][0].item()
    prediction_1 = output[1].argmax(dim=1, keepdim=True)[0][0].item()
    prediction_2 = output[2].argmax(dim=1, keepdim=True)[0][0].item()
    UCEIS = prediction_0+prediction_1+prediction_2
    
    score_vascular.append(prediction_0)
    score_bleeding.append(prediction_1)
    score_erosion.append(prediction_2)
    score_UCEIS.append(UCEIS)
    
#     true_vascular, true_bleeding, true_erosion = get_frame_annotation(video_name, frame_count, "../IBD/annotations_only_video_scores")
    true_vascular, true_bleeding, true_erosion = -1, -1, -1
    
    frame = cv2.putText(frame, "Vascular: "+str(prediction_0)+ " | "+str(true_vascular), (10,100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 2, cv2.LINE_AA)
    frame = cv2.putText(frame, "Bleeding: "+str(prediction_1)+ " | "+str(true_bleeding), (10,200), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 2, cv2.LINE_AA)
    frame = cv2.putText(frame, "Erosion : "+str(prediction_2)+ " | "+str(true_erosion), (10,300), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 2, cv2.LINE_AA)
    frame = cv2.putText(frame, "UCEIS  : "+str(UCEIS)+" | "+str(true_score), (10,400), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 2, cv2.LINE_AA)
    
    frame = frame.astype("uint8")
    video_file.write(frame)
    
    cv2.imwrite(os.path.join(target_path, "frames", str(frame_count)+".jpg"), frame)
    frame_count += 1
    
video.release()
video_file.release()
cv2.destroyAllWindows()

In [5]:
print("--- video statistics ---")
print("Vascular Score: "+str(mean(score_vascular)))
print("Bleeding Score: "+str(mean(score_bleeding)))
print("Erosion Score: "+str(mean(score_erosion)))
print("UCEIS: "+str(mean(score_UCEIS)))

--- video statistics ---
Vascular Score: 2
Bleeding Score: 0.9720930232558139
Erosion Score: 1.5767441860465117
UCEIS: 4.5488372093023255


**Play Video**

In [6]:
target_video_path

'inference_results_on_videos/UC12-5.mp4/video/video.mp4'

In [7]:
from IPython.display import Video
Video(os.path.join(target_video_path), width=960, height=540)

ValueError: To embed videos, you must pass embed=True (this may make your notebook files huge)
Consider passing Video(url='...')