In [7]:
from ultralytics import YOLO
import cv2 as cv
import os
from typing import Union
from pathlib import Path
import sys 
import shutil

In [8]:
os.getcwd()

'/home/ayhem18/DEV/My_Kaggle_Repo/Hack_visual_system'

In [9]:
HOME = os.getcwd()
DATA_FOLDER = os.path.join(HOME, 'data')
current = HOME
while 'src' not in os.listdir(current):
    current = Path(current).parent

PROJECT_DIR = str(current) 
sys.path.append(PROJECT_DIR)
sys.path.append(os.path.join(str(current), 'src'))

In [10]:
# os.path.realpath(os.path.dirname(__file__))

In [19]:
import requests
import numpy as np
import torch

from typing import Union, List, Dict
from _collections_abc import Sequence
from collections import defaultdict
from ultralytics.engine.results import Results

from face.custom_tracker import CustomByteTracker
from face.utilities import FR_SingletonInitializer


In [20]:
def images_to_numpy(images: Sequence[Union[str, Path]]) -> List[np.ndarray]:
    if isinstance(images, (str, Path)):
        images = [images]
    return [np.asarray(cv.imread(img)) for img in images]

In [18]:

class YoloFaceRecognizer():
    _tracker_URL = 'https://raw.githubusercontent.com/voyager-108/ml/main/apps/nn/yolov8/trackers/tracker.yaml'
    @classmethod
    def _file_dir(cls):
        try:
            return os.path.realpath(os.path.dirname(__file__))
        except NameError:
            return os.getcwd()
    
    @classmethod
    def _tracker_file(cls):
        # download the tracker file as needed
        req = requests.get(cls._tracker_URL, allow_redirects=True)
        with open(os.path.join(cls._file_dir(), 'tracker.yaml'), 'wb') as f:
            f.write(req.content)

    def __init__(self, 
                top_person_detection: int = 5,   
                top_face_prediction: int = 2,
                yolo_path: Union[str, Path] = 'yolov8n.pt',
                ) -> None:
        self.person_detected = top_person_detection
        self.face_detected = top_face_prediction, 

        # download the 'tracker.yaml' file if needed
        if not os.path.isfile(os.path.join(self._file_dir(), 'tracker.yaml')):
            # download the file in this case
            self._tracker_file()

        # the yolo components
        self.yolo = YOLO(yolo_path)
        self.tracker = CustomByteTracker(os.path.join(self._file_dir(), 'tracker.yaml'))

        singleton = FR_SingletonInitializer()
        self.face_detector = singleton.get_face_detector() 
        self.face_encoder = singleton.get_encoder()
        self.device = singleton.get_device()
    
    def _track(self, frames: Sequence[Union[Path, str, np.ndarray, torch.Tensor]]) -> List[Results]:
        """This function tracks the different people detected across the given 

        Args:
            frames (Sequence[Union[Path, str, np.ndarray, torch.Tensor]]): a sequence of frames. The assumption is
            that frames are consecutive in time.

        Returns:
            List[Results]: a list of Yolo Results objects
        """
        # this is the first step is the face detection pipeline: Detecting people in the image + tracking them
        tracking_results = self.yolo.track(source=frames, 
                                           persist=True, 
                                           classes=0, # only detect people in the image
                                           device=self.device,
                                           show=False)   
        
        # remove the extra ids by calling the custom tracker's method
        self.tracker.track(tracking_results)
        return tracking_results
    
    def _identify(self, tracking_results: List[Results]):
        # create a dictionary to save the information about each id detected in the results
        # the dictionary will be of the form {id: [(frame_index, boxes, probs)]}
        ids_dict = defaultdict(lambda: [])
        
        # iterate through the results to extract the ids
        for frame_index, results in enumerate(tracking_results):
            
            boxes = results.boxes
            
            if boxes is None:
                continue

            probs = results.probs 
            ids = boxes.id.int().cpu().tolist()

            assert len(ids) == len(boxes) == len(probs), "Check the lengths of ids, probabilities and boxes"

            for i, bb, p in zip(ids, boxes.xywh.cpu().tolist(), probs):
                ids_dict[i].append((frame_index, bb, p))
            
        # the final step is to filter the results
        # keep only the top self.person_detected boxes for each id
        for person_id, info in enumerate(ids_dict):
            ids_dict[person_id] = sorted(info, key=lambda x: x[-1], reverse=True)[:self.person_detected]
        
        return ids_dict

    def _detect(self, frames, 
                person_dict: Dict[int, List], 
                crop_person: bool = True):
        
        if crop_person: 
            frames = images_to_numpy(frames)        
        
        self.face_detector.keep_all = False
        


    def detect_faces():
        # the first step is to pass the sequence to the


SyntaxError: expected ':' (3730525695.py, line 88)

In [15]:
yrf = YoloFaceRecognizer()

'/home/ayhem18/DEV/My_Kaggle_Repo/Hack_visual_system'

In [3]:
from src.pytorch_modular.directories_and_files import process_save_path

def video_to_images(video_path: Union[str, Path], 
                    output_dir: Union[Path, str], 
                    frame_stride: int = 32) -> None:
    if os.path.isdir(output_dir):
        # remove the directory if it already exists
        shutil.rmtree(output_dir)

    output_dir = process_save_path(output_dir, file_ok=False, dir_ok=True)    
    count = 0
    # create the cv2 capture video object
    video_iterator = cv.VideoCapture(video_path)
    
    # count the total number of frames in the video
    s = True
    total_count = 0
    
    while s:
        s, _ = video_iterator.read()
        total_count += 1
    video_iterator = cv.VideoCapture(video_path)
    
    while True:
        frame_exists, image = video_iterator.read()

        if not frame_exists: 
            # the video is over
            break

        count += 1
        if count % frame_stride == frame_stride - 1:
            frame_num_str = f"{(len(str(total_count)) - len(str(count))) * '0'}{count}"
            cv.imwrite(os.path.join(output_dir, f'frame_{frame_num_str}.jpg'), image)


In [4]:
video_path = os.path.join(DATA_FOLDER, 'v1.mp4')
frames_save_path = os.path.join(DATA_FOLDER, 'frames')

In [5]:
video_to_images(video_path=video_path,
                output_dir=frames_save_path)

In [17]:
from PIL import Image
frames = sorted(os.listdir(frames_save_path))
frames = [os.path.join(frames_save_path, f) for f in frames]
test_frames = frames[:15] 

for image in test_frames:
    print(image)
    image = os.path.join(frames_save_path, image)
    img = cv.imread(image)
    cv.imshow('image',img)
    cv.waitKey(0)
cv.destroyAllWindows()

/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0031.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0063.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0095.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0127.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0159.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0191.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0223.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0255.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0287.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0319.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0351.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0383.jpg
/home/ayhem18/DEV/My_Kaggle_Repo/FaceRecognition/data/frames/frame_0415.jpg
/home/ayhem1

In [18]:
import numpy as np
from _collections_abc import Sequence
import torch

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEF_YOLO = YOLO('yolov8n.pt')
def track(frames: Sequence[Union[Path, str], np.array], 
          yolo_model: YOLO = None, 
          device: str = DEVICE):
    
    if yolo_model is None:
        yolo_model = DEF_YOLO

    tracking_results = yolo_model.track(source=frames, 
                                        persist=True, 
                                        classes=0, # only detect people in the image
                                        device=device,
                                        show=False)   
    return tracking_results       

In [19]:
res = track(test_frames)
# pass the result through the tracker
from bytetrack import embed_tracking_into_results
new_res = embed_tracking_into_results(res)


0: 384x640 1 person, 1: 384x640 1 person, 2: 384x640 1 person, 3: 384x640 1 person, 4: 384x640 1 person, 5: 384x640 2 persons, 6: 384x640 2 persons, 7: 384x640 2 persons, 8: 384x640 1 person, 9: 384x640 1 person, 10: 384x640 1 person, 11: 384x640 1 person, 12: 384x640 1 person, 13: 384x640 1 person, 14: 384x640 1 person, 22.1ms
Speed: 0.9ms preprocess, 1.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


In [20]:
for r in res:
    # Get the boxes and track IDs
    track_ids = r.boxes.id

    if track_ids is None:
        continue

    track_ids = track_ids.int().cpu().tolist()
    # Visualize the results on the frame
    annotated_frame = r.plot()
    cv.imshow('frame', annotated_frame)
    cv.waitKey(0)
cv.destroyAllWindows()

In [24]:
boxes = [r.boxes for r in res]
print([b.id.int().cpu().tolist() for b in boxes])

[[1], [1], [1], [1], [5], [2, 1], [1], [1, 2], [1], [1], [1], [1], [1], [17], [2]]
