## YOLOV8

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO  # YOLO model
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Load the YOLOv8 model
model = YOLO('yolov8n.pt')  # You can choose a larger model (e.g., 'yolov8m.pt') for better accuracy
model.to(device)

def detect_person(frame):
    """
    Detects persons in the frame using YOLO.
    Returns a binary mask of the detected persons.
    """
    results = model(frame)  # Perform detection
    mask = np.zeros(frame.shape[:2], dtype=np.uint8)  # Initialize a blank mask
    
    for result in results[0].boxes.data.cpu().numpy():
        x1, y1, x2, y2, conf, cls = result
        if int(cls) == 0: 
            cv2.rectangle(mask, (int(x1), int(y1)), (int(x2), int(y2)), color=255, thickness=-1)
    
    return mask

def inpaint_frame(frame, mask):
    """
    Inpaints the masked region in the frame.
    """
    return cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA)

# Load video
video_path = 'vid.mp4'
cap = cv2.VideoCapture(video_path)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_video.mp4', fourcc, int(cap.get(cv2.CAP_PROP_FPS)),
                      (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    mask = detect_person(frame)

    result = inpaint_frame(frame, mask)

    out.write(result)

cap.release()
out.release()
cv2.destroyAllWindows()


## DeepLabV3

In [10]:
import cv2
import numpy as np
import torch
from PIL import Image
from torchvision import models, transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval().to(device)

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def segment_person(frame):
    input_tensor = transform(frame).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(input_tensor)['out'][0]
    mask = output.argmax(0).byte().cpu().numpy() 
    return (mask == 15).astype(np.uint8)  
def inpaint_frame(frame, mask):
    inpainted_frame = cv2.inpaint(frame, mask * 255, 3, cv2.INPAINT_TELEA)
    return inpainted_frame

# Load video
video_path = 'vid.mp4'
cap = cv2.VideoCapture(video_path)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_video.mp4', fourcc, int(cap.get(cv2.CAP_PROP_FPS)),
                      (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    mask = segment_person(frame)

    mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST)

    result = inpaint_frame(frame, mask)

    out.write(result)

cap.release()
out.release()
cv2.destroyAllWindows()
