# YOLO Finetuning

In [None]:
!pip -q install ultralytics

In [None]:
import os

dataset_path = "/kaggle/input/surveillance-for-retail-stores/tracking"
output_labels = "/kaggle/working/labels"
os.makedirs(output_labels, exist_ok=True)

sequences = ["02", "03", "05"]  # Training sequences

for seq in sequences:
    gt_file = os.path.join(dataset_path, "train", seq, "gt/gt.txt")
    img_dir = os.path.join(dataset_path, "train", seq, "img1")
    label_dir = os.path.join(output_labels, seq)
    os.makedirs(label_dir, exist_ok=True)

    with open(gt_file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            frame, obj_id, x, y, w, h, conf, cls, visibility = map(float, parts)

            img_width, img_height = 1920, 1080  # Adjust from seqinfo.ini
            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            w /= img_width
            h /= img_height

            label_file = os.path.join(label_dir, f"{int(frame):06d}.txt")
            with open(label_file, "a") as lf:
                lf.write(f"0 {x_center} {y_center} {w} {h}\n")

In [None]:
import os
import shutil
from tqdm import tqdm

# Define source image directories (read-only)
image_dirs = [
    "/kaggle/input/surveillance-for-retail-stores/tracking/train/02/img1",
    "/kaggle/input/surveillance-for-retail-stores/tracking/train/03/img1",
    "/kaggle/input/surveillance-for-retail-stores/tracking/train/05/img1"
]
label_dirs = [
    "/kaggle/working/labels/02",
    "/kaggle/working/labels/03",
    "/kaggle/working/labels/05"
]

# Define destination paths in /kaggle/working/
working_dir = "/kaggle/working/dataset"
os.makedirs(working_dir, exist_ok=True)

for idx, img_dir in enumerate(image_dirs):
    new_img_dir = os.path.join(working_dir, f"train/{idx}/images")
    new_label_dir = os.path.join(working_dir, f"train/{idx}/labels")

    os.makedirs(new_img_dir, exist_ok=True)
    os.makedirs(new_label_dir, exist_ok=True)

    # Copy images
    for img_file in tqdm(os.listdir(img_dir), desc=f"Copying images from {img_dir}"):
        src_path = os.path.join(img_dir, img_file)
        dst_path = os.path.join(new_img_dir, img_file)
        if os.path.isfile(src_path):
            shutil.copy2(src_path, dst_path)

    # Move corresponding labels
    for lbl_file in tqdm(os.listdir(label_dirs[idx]), desc=f"Moving labels for {img_dir}"):
        src_path = os.path.join(label_dirs[idx], lbl_file)
        dst_path = os.path.join(new_label_dir, lbl_file)
        if os.path.isfile(src_path):
            shutil.move(src_path, dst_path)

print("Dataset restructuring complete!")

In [None]:
yaml_content = """train: 
- /kaggle/working/dataset/train/0
- /kaggle/working/dataset/train/1
val: /kaggle/working/dataset/train/2

nc: 1
names: ["person"]
"""

# Save to a file
yaml_path = "/kaggle/working/dataset.yaml"
with open(yaml_path, "w") as f:
    f.write(yaml_content)

print(f"dataset.yaml created at {yaml_path}")

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Use a small, non-MOT model
model.train(data="/kaggle/working/dataset.yaml", epochs=5, imgsz=640, batch=16, device="cuda")

In [None]:
#model = YOLO("/kaggle/working/runs/detect/train/weights/best.pt")
#results = model.predict(source="/kaggle/input/surveillance-for-retail-stores/tracking/test/01/img1", save=True, conf=0.4)

# ReID

In [None]:
!pip -q install torchreid

In [None]:
import torchreid
torchreid.models.show_avai_models()  # Check available models

In [None]:
datamanager = torchreid.data.ImageDataManager(
    root="/kaggle/input",
    sources="market1501",
    targets="market1501",
    height=256, width=128,
    batch_size_train=32,
    batch_size_test=32,
    transforms=["random_flip", "random_crop"],
    use_gpu=True
)

In [None]:
model = torchreid.models.build_model(
    name="osnet_x1_0",  
    num_classes=datamanager.num_train_pids,  
    pretrained=True
)
model = model.cuda()

In [None]:
optimizer = torchreid.optim.build_optimizer(model, optim="adam", lr=0.0005)
scheduler = torchreid.optim.build_lr_scheduler(optimizer)

engine = torchreid.engine.ImageSoftmaxEngine(
    datamanager, model, optimizer, scheduler
)

engine.run(max_epoch=20, save_dir="log/osnet", print_freq=1)

# Merging

In [None]:
from ultralytics import YOLO
import torch
import cv2
import numpy as np

detector = YOLO("/kaggle/working/runs/detect/train/weights/best.pt")  

def detect_pedestrians(frame):
    results = detector(frame)
    detections = []
    
    for r in results:
        for box in r.boxes.xywh.cpu().numpy():
            x, y, w, h = box
            detections.append([x, y, w, h, 1.0])  # Confidence = 1.0 for tracking
    
    return np.array(detections) if len(detections) > 0 else np.empty((0, 5))

In [None]:
import torchreid
print(torchreid.__file__)

In [None]:
import pkgutil
import torchreid

modules = [module.name for module in pkgutil.iter_modules(torchreid.__path__)]
print(modules)

In [None]:
from torchreid.reid.utils import FeatureExtractor

In [None]:

# Load OSNet for embedding extraction
extractor = FeatureExtractor(
    model_name='osnet_x1_0',
    model_path="/kaggle/working/log/osnet/model.pth.tar-20",
    device='cuda'
)

# Extract embeddings
def get_embedding(frame, bbox):
    x, y, w, h = bbox
    cropped = frame[int(y):int(y+h), int(x):int(x+w)]
    cropped = cv2.resize(cropped, (128, 256))  # Resize for OSNet
    embedding = extractor(cropped).cpu().detach().numpy()
    return embedding

In [None]:
!git clone https://github.com/noahcao/OC_SORT.git

In [None]:
%cd OC_SORT
!pip -q install -r requirements.txt

In [None]:
!pip -q install filterpy

In [None]:
import sys
sys.path.append("/kaggle/working/OC_SORT")

In [None]:
'''from trackers.ocsort_tracker.ocsort import OCSort

tracker = OCSort(det_thresh=0.3, max_age=30, min_hits=3, iou_threshold=0.2)

def track_objects(frame, detections):
    img_size = frame.shape[:2]  # (height, width)
    img_info = (img_size[0], img_size[1], frame_id)  # Add frame_id as additional info if needed

    tracked_objects = tracker.update(detections, img_info, img_size)
    results = []

    for obj in tracked_objects:
        obj_id = int(obj[4])  # Assuming obj[4] contains the ID
        bbox = obj[:4]  # Bounding box (x1, y1, x2, y2)
        results.append({"id": obj_id, "bbox": bbox})

    return results
'''
from trackers.ocsort_tracker.ocsort import OCSort

# Initialize the tracker
tracker = OCSort(det_thresh=0.25, max_age=100, min_hits=0, iou_threshold=0.2)

def track_objects(frame, detections):
    img_size = frame.shape[:2]  # (height, width)
    img_info = (img_size[0], img_size[1], frame_id)  # Add frame_id as additional info if needed

    # Update tracker with new detections
    tracked_objects = tracker.update(detections, img_info, img_size)
    results = []

    for obj in tracked_objects:
        obj_id = int(obj[4])  # Assuming obj[4] contains the ID
        bbox = obj[:4]  # Bounding box (x1, y1, x2, y2)
        results.append({"id": obj_id, "bbox": bbox})

    return results

# Inference

In [None]:
import csv
import os
import cv2

output_csv = "/kaggle/working/submission_file_tracking.csv"

with open(output_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["ID", "frame", "objects", "objective"])

    frame_id = 1  # Start frame numbering from 1
    for seq in ["01"]:
        img_dir = f"/kaggle/input/surveillance-for-retail-stores/tracking/test/{seq}/img1" 
        img_files = sorted(os.listdir(img_dir))
        
        for img_file in img_files:
            img_path = os.path.join(img_dir, img_file)
            frame = cv2.imread(img_path)

            if frame is None:
                print(f"Warning: Failed to read {img_path}")
                continue  # Skip corrupted images

            # Detect pedestrians in the current frame
            detections = detect_pedestrians(frame)

            # Track objects across frames
            tracked_objects = track_objects(frame, detections)

            # Format tracked objects for CSV
            formatted_objects = []
            for obj in tracked_objects:
                obj_id = obj["id"]  # Unique ID for each object
                x1, y1, x2, y2 = obj["bbox"]
                w, h = x2 - x1, y2 - y1
                confidence = 1.0  # Default confidence

                formatted_objects.append({
                    "tracked_id": obj_id,
                    "x": int(x1),
                    "y": int(y1),
                    "w": int(w),
                    "h": int(h),
                    "confidence": round(float(confidence), 6)
                })

            # Write to CSV
            writer.writerow([frame_id - 1, float(frame_id), str(formatted_objects), "tracking"])
            print(f"Frame {frame_id}: Num Tracked objects: {len(formatted_objects)}")
            frame_id += 1

print("Submission file done")

# Face ReID

In [None]:
!pip -q install facenet-pytorch torchvision scipy pandas

In [10]:
import os
import json
import numpy as np
import pandas as pd
import torch
from PIL import Image
import torchvision.transforms as transforms
from scipy.spatial.distance import cosine

In [2]:
ROOT_DIR = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification"

# Load train.csv
df_train = pd.read_csv(os.path.join(ROOT_DIR, "trainset.csv"))

# Dictionary to store all image paths per person
train_images = {}

for _, row in df_train.iterrows():
    person = row["gt"]  # Person ID
    image_path = os.path.join(ROOT_DIR, row["image_path"])  # Full image path

    if os.path.exists(image_path):  # Check if file exists
        if person not in train_images:
            train_images[person] = []
        train_images[person].append(image_path)  # Append image path to person's list
    else:
        print(f"Image file does not exist: {image_path}")  # Debugging

print(f"Updated image paths for {len(train_images)} persons.")

# Check the first few entries
for person, img_paths in list(train_images.items())[:5]:
    print(f"{person}: {img_paths[:3]}")  # Show only the first 3 images per person


Updated image paths for 125 persons.
person_0: ['/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_0/0.jpg', '/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_0/1.jpg', '/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_0/10.jpg']
person_1: ['/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_1/23.jpg', '/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_1/24.jpg', '/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_1/25.jpg']
person_10: ['/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_10/67.jpg', '/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/train/person_10/68.jpg', '/kaggle/input/surveillance-for-retail-stores/fac

In [4]:
pip install insightface onnxruntime-gpu numpy opencv-python

Collecting insightface
  Downloading insightface-0.7.3.tar.gz (439 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.21.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime_gpu-1.21.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (280.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.8/280.8 MB[0m [31m6.0 MB/s[0m eta [36m0

In [5]:
import torch
import cv2
import numpy as np
from insightface.app import FaceAnalysis

# Check if CUDA is available
use_gpu = torch.cuda.is_available()
providers = ['CUDAExecutionProvider'] if use_gpu else ['CPUExecutionProvider']

print(f"Using {'GPU' if use_gpu else 'CPU'}")

# Initialize ArcFace model
app = FaceAnalysis(providers=providers)
app.prepare(ctx_id=0 if use_gpu else -1, det_size=(160, 160))  # Ensure correct context ID

def get_embedding(image_path):
    try:
        # Load image
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Image not found or unreadable")
        
        # Detect faces and extract embeddings
        faces = app.get(img)
        if len(faces) > 0:
            return faces[0].embedding  # Return first detected face embedding
        else:
            print(f"No face detected in {image_path}")
            return None
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

  check_for_updates()


Using GPU
download_path: /root/.insightface/models/buffalo_l
Downloading /root/.insightface/models/buffalo_l.zip from https://github.com/deepinsight/insightface/releases/download/v0.7/buffalo_l.zip...


100%|██████████| 281857/281857 [00:03<00:00, 83791.81KB/s]


Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
find model: /root/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with o

AttributeError: 'FaceAnalysis' object has no attribute 'providers'

In [6]:
train_embeddings = {}

total_persons = len(train_images)
print(f"Starting embedding extraction for {total_persons} persons.")

for i, (person, img_paths) in enumerate(train_images.items(), start=1):
    print(f"\nProcessing person {i}/{total_persons}: {person} ({len(img_paths)} images)")
    
    embeddings = [get_embedding(img) for img in img_paths if get_embedding(img) is not None]

    if embeddings:
        train_embeddings[person] = np.mean(embeddings, axis=0)  # Average embedding
        #print(f"Computed embedding for {person} (from {len(embeddings)} images)")
    else:
        print(f" No valid embeddings for {person}")

print(f"\nComputed embeddings for {len(train_embeddings)} unique persons.")

Starting embedding extraction for 125 persons.

Processing person 1/125: person_0 (14 images)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



Processing person 2/125: person_1 (31 images)

Processing person 3/125: person_10 (5 images)

Processing person 4/125: person_100 (77 images)

Processing person 5/125: person_101 (53 images)

Processing person 6/125: person_102 (36 images)

Processing person 7/125: person_103 (50 images)

Processing person 8/125: person_104 (21 images)

Processing person 9/125: person_105 (49 images)

Processing person 10/125: person_106 (33 images)

Processing person 11/125: person_107 (191 images)

Processing person 12/125: person_108 (22 images)

Processing person 13/125: person_109 (35 images)

Processing person 14/125: person_11 (26 images)

Processing person 15/125: person_110 (79 images)

Processing person 16/125: person_111 (18 images)

Processing person 17/125: person_112 (59 images)

Processing person 18/125: person_113 (52 images)

Processing person 19/125: person_114 (110 images)

Processing person 20/125: person_115 (23 images)

Processing person 21/125: person_116 (75 images)

Processing

In [7]:
test_folder = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test"
test_images = [os.path.join(test_folder, img) for img in os.listdir(test_folder) if img.endswith(('.jpg', '.png', '.jpeg'))]
print(f"Found {len(test_images)} test images.")

Found 4734 test images.


In [17]:
def find_best_match(test_embedding, threshold=0.5):
    """Find the closest match from the training embeddings."""
    best_match = None
    best_score = float('inf')  # Lower is better for cosine distance
    
    for person, emb in train_embeddings.items():
        score = cosine(test_embedding, emb)
        if score < best_score:
            best_score = score
            best_match = person

    return best_match if best_score < threshold else "doesn\'t_exist"


In [18]:
import csv
import os

# Define file paths
submission_file = "/kaggle/input/surveillance-for-retail-stores/submission_file.csv"
output_csv_path = "/kaggle/working/predictions2.csv"

# Extract image numbers from the submission file
submission_order = []
with open(submission_file, mode="r", encoding="utf-8") as file:
    reader = csv.reader(file)
    next(reader)  # Skip header
    for row in reader:
        try:
            parsed_obj = eval(row[2])  # Safely evaluate dictionary string
            if isinstance(parsed_obj, dict) and "image" in parsed_obj:
                image_filename = os.path.basename(parsed_obj["image"])  # Extract only filename
                submission_order.append(image_filename)  # Store ordered image filenames
            else:
                print(f"Unexpected format in submission file row: {row}")
        except Exception as e:
            print(f"Error parsing row {row}: {e}")

print(f"Loaded {len(submission_order)} image filenames from submission file.")

predictions_dict = {}

for test_img in submission_order:  # Ensure inference follows submission order
    test_emb = get_embedding(os.path.join("/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test", test_img))  # Compute embedding
    if test_emb is not None:
        match = find_best_match(test_emb)  # Find best match
    else:
        match = "doesn\'t_exist"
    
    predictions_dict[test_img] = match  

print(f"Generated {len(predictions_dict)} predictions.")

with open(output_csv_path, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file,quoting=csv.QUOTE_ALL)
    writer.writerow(["ID", "frame", "objects", "objective"])  # Write header

    for index, image_filename in enumerate(submission_order, start=429):  
        if image_filename in predictions_dict:
            gt_label = predictions_dict[image_filename]  # Get label from dictionary
            short_image_path = os.path.join("test_set", image_filename)  # Reconstruct path

            if gt_label == "doesn\'t_exist":
                obj_str = f'{{"gt": "{gt_label}", "image": "{short_image_path}"}}'  # Use double quotes for "doesn't exist"
            else:
                obj_str = f"{{'gt': '{gt_label}', 'image': '{short_image_path}'}}"  # Use single quotes for other cases

            # Write row in correct format
            writer.writerow([index, -1.0, obj_str, "face_reid"])
        else:
            print(f"Missing prediction for {image_filename}")

print(f"Predictions saved to {output_csv_path}")

Unexpected format in submission file row: ['0', '1.0', '[]', 'tracking']
Unexpected format in submission file row: ['1', '2.0', '[]', 'tracking']
Unexpected format in submission file row: ['2', '3.0', '[]', 'tracking']
Unexpected format in submission file row: ['3', '4.0', '[]', 'tracking']
Unexpected format in submission file row: ['4', '5.0', '[]', 'tracking']
Unexpected format in submission file row: ['5', '6.0', '[]', 'tracking']
Unexpected format in submission file row: ['6', '7.0', '[]', 'tracking']
Unexpected format in submission file row: ['7', '8.0', '[]', 'tracking']
Unexpected format in submission file row: ['8', '9.0', '[]', 'tracking']
Unexpected format in submission file row: ['9', '10.0', '[]', 'tracking']
Unexpected format in submission file row: ['10', '11.0', '[]', 'tracking']
Unexpected format in submission file row: ['11', '12.0', '[]', 'tracking']
Unexpected format in submission file row: ['12', '13.0', '[]', 'tracking']
Unexpected format in submission file row: ['

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/4233.jpg
No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/4564.jpg
No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/5205.jpg
No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/6597.jpg
No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/10670.jpg
No face detected in /kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test/9743.jpg
Generated 4734 predictions.
Predictions saved to /kaggle/working/predictions2.csv


# Tracker

In [19]:
import pandas as pd

# Load both CSV files
df1 = pd.read_csv("/kaggle/input/tracking/submission.csv")
df2 = pd.read_csv("/kaggle/working/predictions2.csv")

# Concatenate them
df_combined = pd.concat([df1, df2], ignore_index=False)

# Save the merged CSV
df_combined.to_csv("/kaggle/working/bestsubmission.csv", index=False)

print(f"Merged CSV saved as submission.csv")


Merged CSV saved as submission.csv
