### Face Save for PS1_fixed

In [None]:
import cv2
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms.functional as F
import os
import numpy as np

def get_frame_timestamps(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_timestamps = {i: i / fps for i in range(frame_count)}
    cap.release()
    return frame_timestamps

def save_tracked_image(frame, tracked_bbox, output_dir, timestamp):
    try:
        if tracked_bbox is not None:
            x, y, w, h = [int(v) for v in tracked_bbox]
            roi_frame = frame[y:y+h, x:x+w]
            if roi_frame.size > 0:
                filename = f"tracked_{timestamp:.2f}.jpg"
                filepath = os.path.join(output_dir, filename)
                cv2.imwrite(filepath, roi_frame)
    except Exception as e:
        print(f"Exception occurred while saving image: {e}")

def calculate_roi(point, frame, roi_size=(510, 300), screen_width=1366, screen_height=768):
    original_x = int(point[0] * frame.shape[1] / screen_width)
    original_y = int(point[1] * frame.shape[0] / screen_height)

    x1 = max(original_x - roi_size[0] // 2, 0)
    y1 = max(original_y - roi_size[1] // 2, 0)
    x2 = min(original_x + roi_size[0] // 2, frame.shape[1])
    y2 = min(original_y + roi_size[1] // 2, frame.shape[0])

    return (x1, y1, x2, y2)

def detect_and_track(video_path, model, roi, screen_width, screen_height, output_dir):
    print("Starting detection and tracking...")
    cap = cv2.VideoCapture(video_path)
    tracker = None
    init_tracking = False
    frame_count = 0
    timestamps = get_frame_timestamps(video_path)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = timestamps[frame_count]

        if frame_count % 10 == 0 or not init_tracking:
            tensor_frame = F.to_tensor(frame).unsqueeze(0)
            model.eval()
            with torch.no_grad():
                prediction = model(tensor_frame)

            for element in prediction[0]['boxes']:
                tracked_bbox = element.numpy()
                x, y, x2, y2 = tracked_bbox
                x, y, w, h = int(x), int(y), int(x2 - x), int(y2 - y)
                if x >= 0 and y >= 0 and x+w <= frame.shape[1]:
                    tracker = cv2.TrackerKCF_create()
                    tracker.init(frame, (x, y, w, h))
                    init_tracking = True
                    break
            else:
                init_tracking = False

        if init_tracking and tracker is not None:
            success, tracked_bbox = tracker.update(frame)
            if success:
                save_tracked_image(frame, tracked_bbox, output_dir, timestamp)

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()
    print("Finished processing video.")

def main():
    video_path = "/path/to/your/video.mp4"
    model_path = "/path/to/your/model.pth"
    output_dir = "/path/to/your/output/directory"
    points_file = "/path/to/your/points_file.txt"

    # Load model
    torch.set_num_threads(16)
    num_classes = 2
    model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))
    model.eval()

    # Read point from text file
    with open(points_file, 'r') as f:
        line = f.readline().strip()
        _, coords = line.split(':')
        x, y = map(int, coords.split(','))

    # Process video
    cap = cv2.VideoCapture(video_path)
    ret, first_frame = cap.read()
    if not ret:
        print("Failed to read first frame of the video")
        return
    cap.release()

    roi = calculate_roi((x, y), first_frame)
    os.makedirs(output_dir, exist_ok=True)
    detect_and_track(video_path, model, roi, 1366, 768, output_dir)

if __name__ == "__main__":
    main()


### Face Save for PS2_fixed

In [None]:
import cv2
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import os
import numpy as np

def get_frame_timestamps(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_timestamps = {i: i / fps for i in range(frame_count)}
    cap.release()
    return frame_timestamps

def save_tracked_image(frame, tracked_bbox, output_dir, timestamp):
    try:
        if tracked_bbox is not None:
            x, y, w, h = [int(v) for v in tracked_bbox]
            roi_frame = frame[y:y+h, x:x+w]
            if roi_frame.size > 0:
                filename = f"tracked_{timestamp:.2f}.jpg"
                filepath = os.path.join(output_dir, filename)
                success = cv2.imwrite(filepath, roi_frame)
                if not success:
                    raise Exception("cv2.imwrite returned False")  
    except Exception as e:
        print(f"Exception occurred while saving image: {e}")

def calculate_roi(point, frame, roi_size=(510, 300), screen_width=1366, screen_height=768):
    original_x = int(point[0] * frame.shape[1] / screen_width)
    original_y = int(point[1] * frame.shape[0] / screen_height)

    x1 = max(original_x - roi_size[0] // 2, 0)
    y1 = max(original_y - roi_size[1] // 2, 0)
    x2 = min(original_x + roi_size[0] // 2, frame.shape[1])
    y2 = min(original_y + roi_size[1] // 2, frame.shape[0])

    return (x1, y1, x2, y2)

def detect_and_track(video_path, model, roi, screen_width, screen_height, output_dir, timestamps):
    print("Starting detection and tracking...")
    cap = cv2.VideoCapture(video_path)
    tracker = None
    init_tracking = False
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = timestamps[frame_count]

        if frame_count % 10 == 0 or not init_tracking:
            tensor_frame = F.to_tensor(frame).unsqueeze(0)  # Removed `.cuda()`
            model.eval()
            with torch.no_grad():
                prediction = model(tensor_frame)

            for element in prediction[0]['boxes']:
                tracked_bbox = element.numpy()  # Removed `.cpu()`
                x, y, x2, y2 = tracked_bbox
                x, y, w, h = int(x), int(y), int(x2 - x), int(y2 - y)
                if x >= 0 and y >= 0 and x+w <= frame.shape[1] and y+h <= frame.shape[0]:  # Check if bbox is within frame
                    tracker = cv2.TrackerKCF_create()
                    tracker.init(frame, (x, y, w, h))
                    init_tracking = True
                    break
            else:
                init_tracking = False

        if init_tracking and tracker is not None:
            success, tracked_bbox = tracker.update(frame)
            if success:
                save_tracked_image(frame, tracked_bbox, output_dir, timestamp)

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()
    print("Finished processing video.")

def process_videos(directory_path, model_path, save_base_dir, points_file):
    torch.set_num_threads(16)  # Adjust this based on your CPU cores

    num_classes = 2
    model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))  # Ensure model is loaded to CPU
    model.eval()  # Move this outside the loop for clarity and efficiency

    points_map = {}
    with open(points_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or ':' not in line:
                continue
            video_id, coords = line.split(':')
            x, y = map(int, coords.split(','))
            points_map[video_id.strip()] = (x, y)

    for filename in os.listdir(directory_path):
        base_filename = os.path.splitext(filename)[0]
        if base_filename in points_map and filename.lower().endswith(('.mp4', '.avi', '.mov')):
            video_path = os.path.join(directory_path, filename)
            print(f"Processing video: {filename}")

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Failed to open video {filename}")
                continue

            ret, first_frame = cap.read()
            if not ret:
                print(f"Failed to read first frame of {filename}")
                cap.release()
                continue
            cap.release()

            timestamps = get_frame_timestamps(video_path)
            roi = calculate_roi(points_map[base_filename], first_frame)

            video_save_dir = os.path.join(save_base_dir, base_filename)
            os.makedirs(video_save_dir, exist_ok=True)

            detect_and_track(video_path, model, roi, 1366, 768, video_save_dir, timestamps)

            print(f"Finished processing {filename}.")

# Example usage
process_videos("/storage/group/klk37/default/homebytes/video/fbs/PS_vids_original/PortionSize2","/storage/group/klk37/default/homebytes/code/scripts/models/best_model_for_faceROI.pth","/storage/group/klk37/default/homebytes/video/fbs/ROI_face/PS2_ROI_face","/storage/group/klk37/default/homebytes/video/fbs/selectpoint_textfiles/PS2_points.txt")

### Face Save for PS3_fixed

In [None]:
import cv2
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import os
import numpy as np

def get_frame_timestamps(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_timestamps = {i: i / fps for i in range(frame_count)}
    cap.release()
    return frame_timestamps

def save_tracked_image(frame, tracked_bbox, output_dir, timestamp):
    try:
        if tracked_bbox is not None:
            x, y, w, h = [int(v) for v in tracked_bbox]
            roi_frame = frame[y:y+h, x:x+w]
            if roi_frame.size > 0:
                filename = f"tracked_{timestamp:.2f}.jpg"
                filepath = os.path.join(output_dir, filename)
                success = cv2.imwrite(filepath, roi_frame)
                if not success:
                    raise Exception("cv2.imwrite returned False")  
    except Exception as e:
        print(f"Exception occurred while saving image: {e}")

def calculate_roi(point, frame, roi_size=(510, 300), screen_width=1366, screen_height=768):
    original_x = int(point[0] * frame.shape[1] / screen_width)
    original_y = int(point[1] * frame.shape[0] / screen_height)

    x1 = max(original_x - roi_size[0] // 2, 0)
    y1 = max(original_y - roi_size[1] // 2, 0)
    x2 = min(original_x + roi_size[0] // 2, frame.shape[1])
    y2 = min(original_y + roi_size[1] // 2, frame.shape[0])

    return (x1, y1, x2, y2)

def detect_and_track(video_path, model, roi, screen_width, screen_height, output_dir, timestamps):
    print("Starting detection and tracking...")
    cap = cv2.VideoCapture(video_path)
    tracker = None
    init_tracking = False
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = timestamps[frame_count]

        if frame_count % 10 == 0 or not init_tracking:
            tensor_frame = F.to_tensor(frame).unsqueeze(0)  # Removed `.cuda()`
            model.eval()
            with torch.no_grad():
                prediction = model(tensor_frame)

            for element in prediction[0]['boxes']:
                tracked_bbox = element.numpy()  # Removed `.cpu()`
                x, y, x2, y2 = tracked_bbox
                x, y, w, h = int(x), int(y), int(x2 - x), int(y2 - y)
                if x >= 0 and y >= 0 and x+w <= frame.shape[1] and y+h <= frame.shape[0]:  # Check if bbox is within frame
                    tracker = cv2.TrackerKCF_create()
                    tracker.init(frame, (x, y, w, h))
                    init_tracking = True
                    break
            else:
                init_tracking = False

        if init_tracking and tracker is not None:
            success, tracked_bbox = tracker.update(frame)
            if success:
                save_tracked_image(frame, tracked_bbox, output_dir, timestamp)

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()
    print("Finished processing video.")

def process_videos(directory_path, model_path, save_base_dir, points_file):
    torch.set_num_threads(16)  # Adjust this based on your CPU cores

    num_classes = 2
    model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))  # Ensure model is loaded to CPU
    model.eval()  # Move this outside the loop for clarity and efficiency

    points_map = {}
    with open(points_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or ':' not in line:
                continue
            video_id, coords = line.split(':')
            x, y = map(int, coords.split(','))
            points_map[video_id.strip()] = (x, y)

    for filename in os.listdir(directory_path):
        base_filename = os.path.splitext(filename)[0]
        if base_filename in points_map and filename.lower().endswith(('.mp4', '.avi', '.mov')):
            video_path = os.path.join(directory_path, filename)
            print(f"Processing video: {filename}")

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Failed to open video {filename}")
                continue

            ret, first_frame = cap.read()
            if not ret:
                print(f"Failed to read first frame of {filename}")
                cap.release()
                continue
            cap.release()

            timestamps = get_frame_timestamps(video_path)
            roi = calculate_roi(points_map[base_filename], first_frame)

            video_save_dir = os.path.join(save_base_dir, base_filename)
            os.makedirs(video_save_dir, exist_ok=True)

            detect_and_track(video_path, model, roi, 1366, 768, video_save_dir, timestamps)

            print(f"Finished processing {filename}.")

# Example usage
process_videos("/storage/group/klk37/default/homebytes/video/fbs/PS_vids_original/PortionSize3","/storage/group/klk37/default/homebytes/code/scripts/models/best_model_for_faceROI.pth","/storage/group/klk37/default/homebytes/video/fbs/ROI_face/PS3_ROI_face","/storage/group/klk37/default/homebytes/video/fbs/selectpoint_textfiles/PS3_points.txt")

### Face Save for PS4_fixed

In [None]:
import cv2
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import os
import numpy as np

def get_frame_timestamps(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_timestamps = {i: i / fps for i in range(frame_count)}
    cap.release()
    return frame_timestamps

def save_tracked_image(frame, tracked_bbox, output_dir, timestamp):
    try:
        if tracked_bbox is not None:
            x, y, w, h = [int(v) for v in tracked_bbox]
            roi_frame = frame[y:y+h, x:x+w]
            if roi_frame.size > 0:
                filename = f"tracked_{timestamp:.2f}.jpg"
                filepath = os.path.join(output_dir, filename)
                success = cv2.imwrite(filepath, roi_frame)
                if not success:
                    raise Exception("cv2.imwrite returned False")  
    except Exception as e:
        print(f"Exception occurred while saving image: {e}")

def calculate_roi(point, frame, roi_size=(510, 300), screen_width=1366, screen_height=768):
    original_x = int(point[0] * frame.shape[1] / screen_width)
    original_y = int(point[1] * frame.shape[0] / screen_height)

    x1 = max(original_x - roi_size[0] // 2, 0)
    y1 = max(original_y - roi_size[1] // 2, 0)
    x2 = min(original_x + roi_size[0] // 2, frame.shape[1])
    y2 = min(original_y + roi_size[1] // 2, frame.shape[0])

    return (x1, y1, x2, y2)

def detect_and_track(video_path, model, roi, screen_width, screen_height, output_dir, timestamps):
    print("Starting detection and tracking...")
    cap = cv2.VideoCapture(video_path)
    tracker = None
    init_tracking = False
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = timestamps[frame_count]

        if frame_count % 10 == 0 or not init_tracking:
            tensor_frame = F.to_tensor(frame).unsqueeze(0)  # Removed `.cuda()`
            model.eval()
            with torch.no_grad():
                prediction = model(tensor_frame)

            for element in prediction[0]['boxes']:
                tracked_bbox = element.numpy()  # Removed `.cpu()`
                x, y, x2, y2 = tracked_bbox
                x, y, w, h = int(x), int(y), int(x2 - x), int(y2 - y)
                if x >= 0 and y >= 0 and x+w <= frame.shape[1] and y+h <= frame.shape[0]:  # Check if bbox is within frame
                    tracker = cv2.TrackerKCF_create()
                    tracker.init(frame, (x, y, w, h))
                    init_tracking = True
                    break
            else:
                init_tracking = False

        if init_tracking and tracker is not None:
            success, tracked_bbox = tracker.update(frame)
            if success:
                save_tracked_image(frame, tracked_bbox, output_dir, timestamp)

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()
    print("Finished processing video.")

def process_videos(directory_path, model_path, save_base_dir, points_file):
    torch.set_num_threads(16)  # Adjust this based on your CPU cores

    num_classes = 2
    model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))  # Ensure model is loaded to CPU
    model.eval()  # Move this outside the loop for clarity and efficiency

    points_map = {}
    with open(points_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or ':' not in line:
                continue
            video_id, coords = line.split(':')
            x, y = map(int, coords.split(','))
            points_map[video_id.strip()] = (x, y)

    for filename in os.listdir(directory_path):
        base_filename = os.path.splitext(filename)[0]
        if base_filename in points_map and filename.lower().endswith(('.mp4', '.avi', '.mov')):
            video_path = os.path.join(directory_path, filename)
            print(f"Processing video: {filename}")

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Failed to open video {filename}")
                continue

            ret, first_frame = cap.read()
            if not ret:
                print(f"Failed to read first frame of {filename}")
                cap.release()
                continue
            cap.release()

            timestamps = get_frame_timestamps(video_path)
            roi = calculate_roi(points_map[base_filename], first_frame)

            video_save_dir = os.path.join(save_base_dir, base_filename)
            os.makedirs(video_save_dir, exist_ok=True)

            detect_and_track(video_path, model, roi, 1366, 768, video_save_dir, timestamps)

            print(f"Finished processing {filename}.")

# Example usage
process_videos("/storage/group/klk37/default/homebytes/video/fbs/PS_vids_original/PortionSize4","/storage/group/klk37/default/homebytes/code/scripts/models/best_model_for_faceROI.pth","/storage/group/klk37/default/homebytes/video/fbs/ROI_face/PS4_ROI_face","/storage/group/klk37/default/homebytes/video/fbs/selectpoint_textfiles/PS4_points.txt")