In [4]:
# Imports
import matplotlib.pyplot as plt
import cv2
import torch
import pathlib
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import logging
import shutil
import json

os.environ['DEEPFACE_LOG_LEVEL'] = str(logging.ERROR)
from deepface import DeepFace

# Constants 
CWD = pathlib.Path(os.path.abspath(""))
GIT_ROOT = CWD.parent.parent
DATA_DIR = GIT_ROOT / "data" / 'ICMI2024'
REID_DB = DATA_DIR / 'reid' / 'db'
OUTPUT_DIR = DATA_DIR / 'reid' / 'tables'

os.makedirs(OUTPUT_DIR, exist_ok=True)

2024-02-12 09:45:04.164690: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-12 09:45:04.166985: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-12 09:45:04.192339: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-12 09:45:04.192360: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-12 09:45:04.193237: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [11]:
# From video, create the cropped face images
def generate_cropped_faces(video_file, tracking_file, output_dir):
    assert video_file.exists()
    assert tracking_file.exists()
    os.makedirs(output_dir, exist_ok=True)

    # Load data
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    df = pd.read_csv(tracking_file)

    for i in tqdm(range(LENGTH), total=LENGTH):
        
        # Load frame
        ret, frame = cap.read()
        if not ret:
            break

        # Get the detected faces
        detected_faces = df[df['Frame'] == i]

        for (j, row) in detected_faces.iterrows():
            crop = frame[int(row['Y']):int(row['Y']+row['Height']), int(row['X']):int(row['X']+row['Width'])]
            # cv2.imshow('crop', crop)
            # cv2.waitKey(0)

            cv2.imwrite(str(output_dir / f'frame_{i}_id_{int(row["Student_ID"])}.png'), crop)

# generate_cropped_faces(
#     DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
# )
# generate_cropped_faces(
#     DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-second-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day1Group2Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd1g2'
# )
# generate_cropped_faces(
#     DATA_DIR / 'videos' / 'day2' / 'block-a-blue-day2-first-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day2Group1Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd2g1'
# )
# generate_cropped_faces(
#     DATA_DIR / 'videos' / 'day2' / 'block-a-blue-day2-second-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day2Group2Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd2g2'
# )

cv2.destroyAllWindows()

In [12]:
# Sanity check
def sanity_check(video_file: pathlib.Path, tracking_file: pathlib.Path, cropped_face_dir: pathlib.Path):
    assert video_file.exists()
    assert tracking_file.exists()
    assert cropped_face_dir.exists()

    # Load the file
    df = pd.read_csv(tracking_file)
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Get the face numpy array
    exists = 0
    for i in tqdm(range(LENGTH), total=LENGTH):
        
        # Get the detected faces
        detected_faces = df[df['Frame'] == i]

        for (j, row) in detected_faces.iterrows():
            face_crop = cropped_face_dir / f"frame_{i}_id_{int(row['Student_ID'])}.png"
            if face_crop.exists():
                exists += 1

    print(f"Df length: {len(df)}, Video length: {LENGTH}")
    print(f"Exists: {exists}/{len(df)} = {exists/len(df):.2f}")

sanity_check(
    DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
    DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
)

100%|██████████| 13464/13464 [00:12<00:00, 1121.83it/s]

Df length: 78894, Video length: 13464
Exists: 78894/78894 = 1.00





In [13]:
# 52,141 items according to file system
dir = DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
print(f"Total cropped images: {len([x for x in dir.iterdir()])}")

Total cropped images: 52141


In [19]:
SIZE_REQ = 30
INDIVIDUAL_THRESHOLD = 1
DISTANCE_THRESHOLD = 0.5

def reid_process(video_file: pathlib.Path, tracking_file: pathlib.Path, cropped_face_dir: pathlib.Path, output_file: pathlib.Path):
    assert video_file.exists()
    assert tracking_file.exists()
    assert cropped_face_dir.exists()

    # Load the file
    df = pd.read_csv(tracking_file)
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create output folder to verify users REID
    reid_folder = cropped_face_dir.parent / f"{cropped_face_dir.name}_reid"
    if reid_folder.exists():
        shutil.rmtree(reid_folder)
    os.makedirs(reid_folder, exist_ok=True)
    for id in ['s1','s2','s3','s4','s5','s6','s7','r1', 'r2','teacher']:
        os.makedirs(reid_folder/id, exist_ok=True)

    # Create reid container
    reid_container = {'cropped_file': [], 'reid': [], 'distance': [], 'comment': []}

    tracked_id_to_reid_mapping = {}

    # Get the face numpy array
    for i in tqdm(range(LENGTH), total=LENGTH):

        try:

            # Get the detected faces
            detected_faces = df[df['Frame'] == i]

            # Create container to ensure frame consistency
            reid_detections = {}

            for (j, row) in detected_faces.iterrows():

                tracked_id = int(row['Student_ID'])

                if tracked_id in tracked_id_to_reid_mapping:
                    # reid_container['cropped_file'].append(face_crop.name)
                    # reid_container['reid'].append(tracked_id_to_reid_mapping[tracked_id])
                    # reid_container['distance'].append(None)
                    # reid_container['comment'].append("Retracked")
                    continue

                filename = f"frame_{i}_id_{tracked_id}.png"
                face_crop = cropped_face_dir / filename
                assert face_crop.exists()
                crop = cv2.imread(str(face_crop))

                # If the image is to small, not worth the trouble
                h,w = crop.shape[:2]
                if (h < SIZE_REQ or w < SIZE_REQ):
                    # reid_container['cropped_file'].append(face_crop.name)
                    # reid_container['reid'].append(None)
                    # reid_container['distance'].append(None)
                    # reid_container['comment'].append("image size too small")
                    continue

                # cv2.imshow('crop', crop)
                # cv2.waitKey(1)

                match_df = DeepFace.find(
                    img_path=crop,
                    db_path=REID_DB,
                    model_name="Facenet512",
                    distance_metric="euclidean_l2",
                    enforce_detection=False,
                    silent=True,
                    threshold=INDIVIDUAL_THRESHOLD
                )[0]

                ids = match_df['identity'].str.split("/").str.get(-2)
                match_df['identity'] = ids

                if len(ids) == 0:
                    # reid_container['cropped_file'].append(face_crop.name)
                    # reid_container['reid'].append(None)
                    # reid_container['distance'].append(None)
                    # reid_container['comment'].append("Failed REID: No Match")
                    pass
                else:

                    # Possible success
                    mode_df = match_df.groupby("identity")['distance'].agg(lambda x: x.mode()[0]).reset_index()

                    # Step 3: Find the 'identity' with the lowest mode value of 'distance'
                    lowest_mode_identity = mode_df.loc[mode_df['distance'].idxmin()]

                    # Compute counts of each 'identity'
                    # counts = match_df['identity'].value_counts().reset_index()
                    # counts.columns = ['identity', 'count']

                    reid_detections[face_crop.name] = {
                        "tracked_id": tracked_id,
                        "filepath": face_crop.name,
                        "image": crop,
                        "reid": lowest_mode_identity['identity'],
                        "distance": lowest_mode_identity['distance'],
                    }
            
            # Group by 'reid' and select the row with the highest 'distance' in each group
            if reid_detections:
                reid_df = pd.DataFrame.from_dict(reid_detections, orient="index")
                selected_entries_df = reid_df.groupby('reid', as_index=False).apply(lambda x: x.loc[x['distance'].idxmax()])
                selected_entries_df.reset_index(drop=True, inplace=True)

                # Save the entry
                for (k, row) in selected_entries_df.iterrows():

                    # Enter the data
                    reid_container['cropped_file'].append(row['filepath'])
                    reid_container['reid'].append(row['reid'])
                    reid_container['distance'].append(row['distance'])
                    reid_container['comment'].append("")

                    # Save the image as well
                    new_fp = reid_folder / row['reid'] / row['filepath']
                    cv2.imwrite(str(new_fp), row['image'])

                    # Save in the mapping
                    tracked_id_to_reid_mapping[row["tracked_id"]] = row['reid']

        except KeyboardInterrupt:
            print("KeyboardInterrupt detected, saving data")
            break

    # Save the container
    reid_df = pd.DataFrame(reid_container)
    reid_df.to_csv(output_file, index=False)

    # Save the mapping from tracked id to REID tag
    with open(output_file.parent / f"{output_file.stem}.json", "w") as f:
        json.dump(tracked_id_to_reid_mapping, f, indent=4)

# reid_process(
#     DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1',
#     OUTPUT_DIR / 'd1g1-cam2.csv'
# )
# reid_process(
#     DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-second-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day1Group2Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd1g2',
#     OUTPUT_DIR / 'd1g2-cam2.csv'
# )
# reid_process(
#     DATA_DIR / 'videos' / 'day2' / 'block-a-blue-day2-first-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day2Group1Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd2g1',
#     OUTPUT_DIR / 'd2g1-cam2.csv'
# )
# reid_process(
#     DATA_DIR / 'videos' / 'day2' / 'block-a-blue-day2-second-group-cam2.mp4',
#     DATA_DIR / 'trackings' / 'Day2Group2Camera2_with_student_IDs.csv',
#     DATA_DIR / 'reid' / 'cropped_faces' / 'd2g2',
#     OUTPUT_DIR / 'd2g2-cam2.csv'
# )

cv2.destroyAllWindows()

100%|██████████| 12029/12029 [02:15<00:00, 89.03it/s] 
100%|██████████| 18834/18834 [04:03<00:00, 77.34it/s] 


In [8]:
# Process the manually cleaned REID, report accuracy, and update JSON

def clean_reid(reid_dir: pathlib.Path, reid_json: pathlib.Path):
    assert reid_dir.exists()
    assert reid_json.exists()

    # Load the JSON
    with open(reid_json, "r") as f:
        tracked_id_to_reid_mapping = json.load(f)

    # Make a copy of the tracked_id_to_reid_mapping
    tracked_id_to_reid_mapping_copy = tracked_id_to_reid_mapping.copy()

    # Iterate through the reid
    for folder in reid_dir.iterdir():
        for file in folder.iterdir():
            # Get the tracking ID from the filename
            tracked_id = file.stem.split("_")[-1]

            # Update the mapping
            if tracked_id_to_reid_mapping[tracked_id] != folder.name:
                tracked_id_to_reid_mapping_copy[tracked_id] = folder.name
                # print(f"Updated {tracked_id} from {tracked_id_to_reid_mapping[tracked_id]} to {folder.name}")

    # Check for deleted entries
    to_delete = 0
    for k in tracked_id_to_reid_mapping:
        delete = True
        for folder in reid_dir.iterdir():
            tracked_ids_in_folder = [int(x.stem.split("_")[-1]) for x in folder.iterdir()]
            if int(k) in tracked_ids_in_folder:
                delete = False
                break
        if delete:
            del tracked_id_to_reid_mapping_copy[k]
            to_delete += 1

    # Report
    print(f"Deleted {to_delete} entries")

    # print(tracked_id_to_reid_mapping)
    # print(tracked_id_to_reid_mapping_copy)

    # After the loop, compare the two dictionaries
    correct = 0
    incorrect = 0
    false = 0
    total = len(tracked_id_to_reid_mapping)

    # Track also which REID tags are the most challenging
    correct_reid = []
    incorrect_reid = []

    for k in tracked_id_to_reid_mapping:
        if k not in tracked_id_to_reid_mapping_copy:
            false += 1
        elif tracked_id_to_reid_mapping[k] == tracked_id_to_reid_mapping_copy[k]:
            correct += 1
            correct_reid.append(tracked_id_to_reid_mapping[k])
        else:
            incorrect += 1
            incorrect_reid.append(tracked_id_to_reid_mapping[k])

    # Save the new JSON
    new_reid_json_fp = reid_json.parent / f"{reid_json.stem}_cleaned.json"
    with open(new_reid_json_fp, "w") as f:
        json.dump(tracked_id_to_reid_mapping_copy, f, indent=4)

    # Save the report
    report_fp = reid_json.parent / f"{reid_json.stem}_report.csv"
    report_df = pd.DataFrame({
        "correct": [correct, correct/total],
        "incorrect": [incorrect, incorrect/total],
        "false": [false, false/total],
    })

    report_df.to_csv(report_fp, index=False, float_format="%.3f")

    # Create a report that includes individual REID tags
    correct_counts = pd.Series(correct_reid).value_counts()
    incorrect_counts = pd.Series(incorrect_reid).value_counts()

    # Make sure that all REID tags are included (r1, r2, s1, s2, s3, s4, s5, s6, s7, teacher)
    for tag in ['s1','s2','s3','s4','s5','s6','s7','r1', 'r2','teacher']:
        if tag not in correct_counts:
            correct_counts[tag] = 0
        if tag not in incorrect_counts:
            incorrect_counts[tag] = 0

    # Compute correct/incorrect ratio
    ratio = correct_counts / (correct_counts + incorrect_counts)

    # Add back the counts and incorrect counts
    ratio = pd.concat([correct_counts, incorrect_counts, ratio], axis=1)
    ratio.columns = ["correct", "incorrect", "ratio"]

    ratios_fp = reid_json.parent / f"{reid_json.stem}_ratios.csv"
    ratio.to_csv(ratios_fp, index_label="reid")
                

clean_reid(
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1_reid_cleaned',
    OUTPUT_DIR / 'd1g1-cam2.json'
)

clean_reid(
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g2_reid_cleaned',
    OUTPUT_DIR / 'd1g2-cam2.json'
)

clean_reid(
    DATA_DIR / 'reid' / 'cropped_faces' / 'd2g1_reid_cleaned',
    OUTPUT_DIR / 'd2g1-cam2.json'
)

clean_reid(
    DATA_DIR / 'reid' / 'cropped_faces' / 'd2g2_reid_cleaned',
    OUTPUT_DIR / 'd2g2-cam2.json'
)

Deleted 69 entries
Deleted 58 entries
Deleted 88 entries
Deleted 132 entries
