<div align="center", class="alert alert-block alert-success">
    <h1>Deep Learning Project</h1>
    <h2>Vanilla U-Net</h2>
    <h3>Performing Left ventricle Segmentation on Videos</h3>
    <h4><i>By Ariba Khan (17270) and Dr. Sawera Hanif (29413)</i></h4>
</div>

### Introduction

In this notebook, we will import the trained **vanilla U-Net model** from the previous notebook which had learned on an initial dataset of **1,014** images and their corresponding masks.

The objective is to evaluate the capability of the vanilla U-Net to accurately segment the left ventricle in echocardiogram videos, despite being trained on a constrained dataset with limited publicly available annotated data.

#### Importing All Necessary Libraries

In [3]:
pip install GPUtil

Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25l[?25hdone
  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7392 sha256=e57a5b782820d9eb44812e704e8fb655feaeaeb1b3119df5a4c95fa5c3bd8651
  Stored in directory: /root/.cache/pip/wheels/a9/8a/bd/81082387151853ab8b6b3ef33426e98f5cbfebc3c397a9d4d0
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split, DataLoader, Dataset
from torchvision import transforms
import os
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF
import torch.nn.functional as F
import time
import psutil
import GPUtil
from threading import Thread
from sklearn.metrics import accuracy_score
import zipfile
import cv2
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

#### Defining the Vanilla U-Net Model

In [12]:
class VanillaUNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VanillaUNet, self).__init__()

        # encoder blocks
        self.enc1 = self.conv_block(in_channels, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)

        # pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # decoder blocks
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = self.conv_block(512, 256)

        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = self.conv_block(256, 128)

        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = self.conv_block(128, 64)

        # final output layer
        self.out = nn.Conv2d(64, out_channels, kernel_size=1)

    def conv_block(self, in_channels, out_channels):
        
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # encoder
        x1 = self.enc1(x)
        x2 = self.enc2(self.pool(x1))
        x3 = self.enc3(self.pool(x2))
        x4 = self.enc4(self.pool(x3))

        # decoder
        x = self.upconv3(x4)
        x = torch.cat([x, x3], dim=1)
        x = self.dec3(x)

        x = self.upconv2(x)
        x = torch.cat([x, x2], dim=1)
        x = self.dec2(x)

        x = self.upconv1(x)
        x = torch.cat([x, x1], dim=1)
        x = self.dec1(x)

        return self.out(x)

#### Monitoring Resources During Inference

In [6]:
def monitor_resources_continuously(log):
    process = psutil.Process()
    while log["running"]:
        try:
            # get memory usage
            memory_info = process.memory_info()
            memory_mb = memory_info.rss / (1024 * 1024) # convert to MB
            
            # get CPU usage
            cpu_percent = psutil.cpu_percent(interval=0.1) # short interval for real-time updates
            
            # get GPU usage
            gpus = GPUtil.getGPUs()
            gpu_percent = gpus[0].memoryUtil * 100 if gpus else 0 # GPU memory usage
            
            # logging the usage
            log["memory"].append(memory_mb)
            log["cpu"].append(cpu_percent)
            log["gpu"].append(gpu_percent)
        except Exception as e:
            print(f"Resource monitoring error: {e}")
        time.sleep(0.5)  # log every 0.5 seconds

#### Video Segmentation Functionality

In [26]:
# Functions for Video Segmentation

# 1. Function to apply mask to frame
def apply_mask_to_frame(frame, mask):
    """Apply the mask to the frame."""
    # ensuring the frame and mask are of the same shape
    mask_resized = cv2.resize(mask, (frame.shape[1], frame.shape[0]))
    
    # converting the mask to a 3-channel image (for visualization)
    mask_colored = np.stack([mask_resized] * 3, axis=-1)
    
    # zeroing out the areas outside the left ventricle
    masked_frame = frame * mask_colored
    
    # converting to uint8 format
    masked_frame = np.clip(masked_frame, 0, 255).astype(np.uint8)
    return masked_frame

# 2. Function to segment left ventricle in each frame
def segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                    original_video_name="Original.avi", segmented_video_name="VanillaUNet.avi"):
    # defining output paths
    original_output_path = os.path.join(output_dir, original_video_name)
    segmented_output_path = os.path.join(output_dir, segmented_video_name)
    
    # opening the input video
    cap = cv2.VideoCapture(input_video_path)
    
    # getting video information
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # defining the codec and creating VideoWriter objects
    fourcc = cv2.VideoWriter_fourcc(*'XVID') # for .avi files
    out_original = cv2.VideoWriter(original_output_path, fourcc, fps, (frame_width, frame_height))
    out_segmented = cv2.VideoWriter(segmented_output_path, fourcc, fps, (frame_width, frame_height))
    
    # preparing the transformation for the model input
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    # moving the model to the GPU before inference
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # writing the original frame to the "Original" video
            out_original.write(frame)
            
            # preprocessing the frame
            input_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            input_frame = transform(input_frame).unsqueeze(0).to(device)  # Add batch dimension
            
            # running the model on the frame
            output_mask = model(input_frame)
            output_mask = torch.sigmoid(output_mask).cpu().numpy().squeeze()  # Convert to numpy
            
            # applying the mask to the frame
            segmented_frame = apply_mask_to_frame(frame, output_mask)
            
            # writing the processed frame to the "VanillaUNet" video
            out_segmented.write(segmented_frame)
        
        cap.release()
        out_original.release()
        out_segmented.release()
        print(f"Original video saved to {original_output_path}")
        print(f"Segmented video saved to {segmented_output_path}")

#### Loading Pre-Trained Model Weights

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VanillaUNet(in_channels=3, out_channels=1)
model.load_state_dict(torch.load("/kaggle/input/vanilla-u-net-trained/pytorch/default/1/vanilla_unet.pth"))

<All keys matched successfully>

<div align="center", class="alert alert-block alert-success">
    <h4>Inference Phase</h4>
</div>

#### Video 1:

In [27]:
input_video_path = "/kaggle/input/echonet-videos/EchoNet-Dynamic/Videos/0X1002E8FBACD08477.avi"
output_dir = "/kaggle/working"
original_video_name = "Video1_Original.avi"  # custom name for the original video
segmented_video_name = "Video1_VanillaUNet.avi"  # custom name for the segmented video

In [28]:
# logging setup for resource monitoring
log = {"memory": [], "cpu": [], "gpu": [], "running": True}

start_time = time.time()

# starting resource monitoring in a separate thread
monitor_thread = Thread(target=monitor_resources_continuously, args=(log,))
monitor_thread.start()

segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                original_video_name=original_video_name, 
                                segmented_video_name=segmented_video_name)

# stopping resource monitoring after training is complete
log["running"] = False
monitor_thread.join()

# calculating training time
end_time = time.time()
inference_time = end_time - start_time

Original video saved to /kaggle/working/Video1_Original.avi
Segmented video saved to /kaggle/working/Video1_VanillaUNet.avi


In [29]:
# recording resources used during inference

if len(log["memory"]) > 0:
    avg_memory = sum(log["memory"]) / len(log["memory"])
    peak_memory = max(log["memory"])
else:
    avg_memory = peak_memory = 0

if len(log["cpu"]) > 0:
    avg_cpu = sum(log["cpu"]) / len(log["cpu"])
    peak_cpu = max(log["cpu"])
else:
    avg_cpu = peak_cpu = 0

if len(log["gpu"]) > 0:
    avg_gpu = sum(log["gpu"]) / len(log["gpu"])
    peak_gpu = max(log["gpu"])
else:
    avg_gpu = peak_gpu = 0

print(f"Inference Time: {inference_time:.2f} seconds")
print(f"Average Memory Usage: {avg_memory:.2f} MB")
print(f"Peak Memory Usage: {peak_memory:.2f} MB")
print(f"Average CPU Usage: {avg_cpu:.2f}%")
print(f"Peak CPU Usage: {peak_cpu:.2f}%")
print(f"Average GPU Usage: {avg_gpu:.2f}%")
print(f"Peak GPU Usage: {peak_gpu:.2f}%")

Inference Time: 1.28 seconds
Average Memory Usage: 897.28 MB
Peak Memory Usage: 897.28 MB
Average CPU Usage: 58.25%
Peak CPU Usage: 75.00%
Average GPU Usage: 1.37%
Peak GPU Usage: 1.37%


#### Video 2

In [30]:
input_video_path = "/kaggle/input/echonet-videos/EchoNet-Dynamic/Videos/0X1005D03EED19C65B.avi"
output_dir = "/kaggle/working"
original_video_name = "Video2_Original.avi"  # custom name for the original video
segmented_video_name = "Video2_VanillaUNet.avi"  # custom name for the segmented video

In [31]:
# logging setup for resource monitoring
log = {"memory": [], "cpu": [], "gpu": [], "running": True}

start_time = time.time()

# starting resource monitoring in a separate thread
monitor_thread = Thread(target=monitor_resources_continuously, args=(log,))
monitor_thread.start()

segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                original_video_name=original_video_name, 
                                segmented_video_name=segmented_video_name)

# stopping resource monitoring after training is complete
log["running"] = False
monitor_thread.join()

# calculating training time
end_time = time.time()
inference_time = end_time - start_time

Original video saved to /kaggle/working/Video2_Original.avi
Segmented video saved to /kaggle/working/Video2_VanillaUNet.avi


In [32]:
# recording resources used during inference

if len(log["memory"]) > 0:
    avg_memory = sum(log["memory"]) / len(log["memory"])
    peak_memory = max(log["memory"])
else:
    avg_memory = peak_memory = 0

if len(log["cpu"]) > 0:
    avg_cpu = sum(log["cpu"]) / len(log["cpu"])
    peak_cpu = max(log["cpu"])
else:
    avg_cpu = peak_cpu = 0

if len(log["gpu"]) > 0:
    avg_gpu = sum(log["gpu"]) / len(log["gpu"])
    peak_gpu = max(log["gpu"])
else:
    avg_gpu = peak_gpu = 0

print(f"Inference Time: {inference_time:.2f} seconds")
print(f"Average Memory Usage: {avg_memory:.2f} MB")
print(f"Peak Memory Usage: {peak_memory:.2f} MB")
print(f"Average CPU Usage: {avg_cpu:.2f}%")
print(f"Peak CPU Usage: {peak_cpu:.2f}%")
print(f"Average GPU Usage: {avg_gpu:.2f}%")
print(f"Peak GPU Usage: {peak_gpu:.2f}%")

Inference Time: 0.64 seconds
Average Memory Usage: 897.28 MB
Peak Memory Usage: 897.28 MB
Average CPU Usage: 46.30%
Peak CPU Usage: 46.30%
Average GPU Usage: 1.37%
Peak GPU Usage: 1.37%


#### Video 3

In [33]:
input_video_path = "/kaggle/input/echonet-videos/EchoNet-Dynamic/Videos/0X100CF05D141FF143.avi"
output_dir = "/kaggle/working"
original_video_name = "Video3_Original.avi"  # custom name for the original video
segmented_video_name = "Video3_VanillaUNet.avi"  # custom name for the segmented video

In [34]:
# logging setup for resource monitoring
log = {"memory": [], "cpu": [], "gpu": [], "running": True}

start_time = time.time()

# starting resource monitoring in a separate thread
monitor_thread = Thread(target=monitor_resources_continuously, args=(log,))
monitor_thread.start()

segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                original_video_name=original_video_name, 
                                segmented_video_name=segmented_video_name)

# stopping resource monitoring after training is complete
log["running"] = False
monitor_thread.join()

# calculating training time
end_time = time.time()
inference_time = end_time - start_time

Original video saved to /kaggle/working/Video3_Original.avi
Segmented video saved to /kaggle/working/Video3_VanillaUNet.avi


In [35]:
# recording resources used during inference

if len(log["memory"]) > 0:
    avg_memory = sum(log["memory"]) / len(log["memory"])
    peak_memory = max(log["memory"])
else:
    avg_memory = peak_memory = 0

if len(log["cpu"]) > 0:
    avg_cpu = sum(log["cpu"]) / len(log["cpu"])
    peak_cpu = max(log["cpu"])
else:
    avg_cpu = peak_cpu = 0

if len(log["gpu"]) > 0:
    avg_gpu = sum(log["gpu"]) / len(log["gpu"])
    peak_gpu = max(log["gpu"])
else:
    avg_gpu = peak_gpu = 0

print(f"Inference Time: {inference_time:.2f} seconds")
print(f"Average Memory Usage: {avg_memory:.2f} MB")
print(f"Peak Memory Usage: {peak_memory:.2f} MB")
print(f"Average CPU Usage: {avg_cpu:.2f}%")
print(f"Peak CPU Usage: {peak_cpu:.2f}%")
print(f"Average GPU Usage: {avg_gpu:.2f}%")
print(f"Peak GPU Usage: {peak_gpu:.2f}%")

Inference Time: 1.27 seconds
Average Memory Usage: 897.34 MB
Peak Memory Usage: 897.40 MB
Average CPU Usage: 49.95%
Peak CPU Usage: 51.20%
Average GPU Usage: 1.37%
Peak GPU Usage: 1.37%


#### Video 4

In [36]:
input_video_path = "/kaggle/input/echonet-videos/EchoNet-Dynamic/Videos/0X10267ADF2E644E0.avi"
output_dir = "/kaggle/working"
original_video_name = "Video4_Original.avi"  # custom name for the original video
segmented_video_name = "Video4_VanillaUNet.avi"  # custom name for the segmented video

In [37]:
# logging setup for resource monitoring
log = {"memory": [], "cpu": [], "gpu": [], "running": True}

start_time = time.time()

# starting resource monitoring in a separate thread
monitor_thread = Thread(target=monitor_resources_continuously, args=(log,))
monitor_thread.start()

segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                original_video_name=original_video_name, 
                                segmented_video_name=segmented_video_name)

# stopping resource monitoring after training is complete
log["running"] = False
monitor_thread.join()

# calculating training time
end_time = time.time()
inference_time = end_time - start_time

Original video saved to /kaggle/working/Video4_Original.avi
Segmented video saved to /kaggle/working/Video4_VanillaUNet.avi


In [38]:
# recording resources used during inference

if len(log["memory"]) > 0:
    avg_memory = sum(log["memory"]) / len(log["memory"])
    peak_memory = max(log["memory"])
else:
    avg_memory = peak_memory = 0

if len(log["cpu"]) > 0:
    avg_cpu = sum(log["cpu"]) / len(log["cpu"])
    peak_cpu = max(log["cpu"])
else:
    avg_cpu = peak_cpu = 0

if len(log["gpu"]) > 0:
    avg_gpu = sum(log["gpu"]) / len(log["gpu"])
    peak_gpu = max(log["gpu"])
else:
    avg_gpu = peak_gpu = 0

print(f"Inference Time: {inference_time:.2f} seconds")
print(f"Average Memory Usage: {avg_memory:.2f} MB")
print(f"Peak Memory Usage: {peak_memory:.2f} MB")
print(f"Average CPU Usage: {avg_cpu:.2f}%")
print(f"Peak CPU Usage: {peak_cpu:.2f}%")
print(f"Average GPU Usage: {avg_gpu:.2f}%")
print(f"Peak GPU Usage: {peak_gpu:.2f}%")

Inference Time: 1.27 seconds
Average Memory Usage: 897.40 MB
Peak Memory Usage: 897.40 MB
Average CPU Usage: 50.00%
Peak CPU Usage: 53.70%
Average GPU Usage: 1.37%
Peak GPU Usage: 1.37%


#### Video 5

In [39]:
input_video_path = "/kaggle/input/echonet-videos/EchoNet-Dynamic/Videos/0X103BE163257D663A.avi"
output_dir = "/kaggle/working"
original_video_name = "Video5_Original.avi"  # custom name for the original video
segmented_video_name = "Video5_VanillaUNet.avi"  # custom name for the segmented video

In [40]:
# logging setup for resource monitoring
log = {"memory": [], "cpu": [], "gpu": [], "running": True}

start_time = time.time()

# starting resource monitoring in a separate thread
monitor_thread = Thread(target=monitor_resources_continuously, args=(log,))
monitor_thread.start()

segment_left_ventricle_in_video(input_video_path, output_dir, model, device, 
                                original_video_name=original_video_name, 
                                segmented_video_name=segmented_video_name)

# stopping resource monitoring after training is complete
log["running"] = False
monitor_thread.join()

# calculating training time
end_time = time.time()
inference_time = end_time - start_time

Original video saved to /kaggle/working/Video5_Original.avi
Segmented video saved to /kaggle/working/Video5_VanillaUNet.avi


In [41]:
# recording resources used during inference

if len(log["memory"]) > 0:
    avg_memory = sum(log["memory"]) / len(log["memory"])
    peak_memory = max(log["memory"])
else:
    avg_memory = peak_memory = 0

if len(log["cpu"]) > 0:
    avg_cpu = sum(log["cpu"]) / len(log["cpu"])
    peak_cpu = max(log["cpu"])
else:
    avg_cpu = peak_cpu = 0

if len(log["gpu"]) > 0:
    avg_gpu = sum(log["gpu"]) / len(log["gpu"])
    peak_gpu = max(log["gpu"])
else:
    avg_gpu = peak_gpu = 0

print(f"Inference Time: {inference_time:.2f} seconds")
print(f"Average Memory Usage: {avg_memory:.2f} MB")
print(f"Peak Memory Usage: {peak_memory:.2f} MB")
print(f"Average CPU Usage: {avg_cpu:.2f}%")
print(f"Peak CPU Usage: {peak_cpu:.2f}%")
print(f"Average GPU Usage: {avg_gpu:.2f}%")
print(f"Peak GPU Usage: {peak_gpu:.2f}%")

Inference Time: 1.27 seconds
Average Memory Usage: 897.40 MB
Peak Memory Usage: 897.40 MB
Average CPU Usage: 49.35%
Peak CPU Usage: 51.20%
Average GPU Usage: 1.37%
Peak GPU Usage: 1.37%


#### Summarizing Resources Consumed and Inference Time

In [42]:
data = {
    "Video": ["Video 1", "Video 2", "Video 3", "Video 4", "Video 5"],
    "Inference Time (s)": [1.28, 0.64, 1.27, 1.27, 1.27],
    "Average Memory Usage (MB)": [897.28, 897.28, 897.34, 897.40, 897.40],
    "Peak Memory Usage (MB)": [897.28, 897.28, 897.40, 897.40, 897.40],
    "Average CPU Usage (%)": [58.25, 46.30, 49.95, 50.00, 49.35],
    "Peak CPU Usage (%)": [75.00, 46.30, 51.20, 53.70, 51.20],
    "Average GPU Usage (%)": [1.37, 1.37, 1.37, 1.37, 1.37],
    "Peak GPU Usage (%)": [1.37, 1.37, 1.37, 1.37, 1.37]
}

df = pd.DataFrame(data)

In [43]:
df.head()

Unnamed: 0,Video,Inference Time (s),Average Memory Usage (MB),Peak Memory Usage (MB),Average CPU Usage (%),Peak CPU Usage (%),Average GPU Usage (%),Peak GPU Usage (%)
0,Video 1,1.28,897.28,897.28,58.25,75.0,1.37,1.37
1,Video 2,0.64,897.28,897.28,46.3,46.3,1.37,1.37
2,Video 3,1.27,897.34,897.4,49.95,51.2,1.37,1.37
3,Video 4,1.27,897.4,897.4,50.0,53.7,1.37,1.37
4,Video 5,1.27,897.4,897.4,49.35,51.2,1.37,1.37


In [44]:
mean_averages = df.iloc[:, 1:].mean()

mean_df = pd.DataFrame(mean_averages, columns=["Mean Average"]).reset_index()
mean_df.rename(columns={"index": "Metric"}, inplace=True)

In [45]:
mean_df.head()

Unnamed: 0,Metric,Mean Average
0,Inference Time (s),1.146
1,Average Memory Usage (MB),897.34
2,Peak Memory Usage (MB),897.352
3,Average CPU Usage (%),50.77
4,Peak CPU Usage (%),55.48


In [46]:
mean_df.to_csv("VanillaUNet Video Segmentation Performance Metrics.csv", index=False)

print("Mean averages saved as 'VanillaUNet Video Segmentation Performance Metrics.csv'")

Mean averages saved as 'VanillaUNet Video Segmentation Performance Metrics.csv'
