## 1. Library Imports and Setup
Imports essential libraries for the project including computer vision (cv2), deep learning (torch, YOLO), data processing (numpy), and visualization tools. Sets up logging configuration for tracking the training process.

In [2]:
# Import required libraries
import cv2
import torch
from ultralytics import YOLO
import os
import pickle
import logging
import numpy as np
from datetime import datetime
import colorsys
import shutil
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt

# Configure logging
logging.basicConfig(
    filename=f'train_detection_log_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

## 2. COCO Classes and Color Generation
Defines the COCO dataset classes and implements a color generation system that creates distinct colors for visualizing different object classes. Uses HSV color space for better color separation.

In [49]:
# COCO class names (80 classes)
COCO_CLASSES = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
    "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
    "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
    "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
    "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "TV",
    "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
    "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair dryer", "toothbrush"
]

def generate_distinct_colors(num_classes):
    """
    Generate distinct colors for classes using HSV color space.
    Ensures good color separation by varying hue, saturation, and value.
    """
    colors = []
    for i in range(num_classes):
        # Use golden ratio to spread out hues
        hue = (i * 0.618033988749895) % 1.0
        # Full saturation and value for vibrant colors
        saturation = 0.8
        value = 0.9
        
        # Convert HSV to RGB
        rgb = colorsys.hsv_to_rgb(hue, saturation, value)
        
        # Convert to BGR and scale to 0-255
        bgr_color = (int(rgb[2] * 255), int(rgb[1] * 255), int(rgb[0] * 255))
        colors.append(bgr_color)
    
    return colors

# Generate color map with a color for each class
COLOR_MAP = generate_distinct_colors(len(COCO_CLASSES))

## 3. MOT17 Dataset Preparation
Converts MOT17 dataset into YOLO format. Handles:
- Train/val split (80/20)
- Ground truth conversion
- Image organization
- Label generation in YOLO format
- Dataset configuration (yaml) creation

In [50]:
def prepare_mot17_for_yolo(mot17_root, output_dir):
    """
    Prepare MOT17 dataset for YOLO training by creating a dataset in YOLO format.
    This function converts MOT17 ground truth to YOLO format.
    """
    # Create the necessary directories
    os.makedirs(os.path.join(output_dir, 'images', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images', 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', 'val'), exist_ok=True)
    
    # Define the train-val split (80-20)
    sequence_dirs = [d for d in os.listdir(mot17_root) if os.path.isdir(os.path.join(mot17_root, d)) and d.startswith('MOT17-')]
    train_sequences = sequence_dirs[:int(len(sequence_dirs) * 0.8)]
    val_sequences = sequence_dirs[int(len(sequence_dirs) * 0.8):]
    
    logging.info(f"Training sequences: {train_sequences}")
    logging.info(f"Validation sequences: {val_sequences}")
    
    # Process each sequence
    for sequence_dir in sequence_dirs:
        is_train = sequence_dir in train_sequences
        split = 'train' if is_train else 'val'
        
        full_sequence_path = os.path.join(mot17_root, sequence_dir)
        gt_file = os.path.join(full_sequence_path, 'gt', 'gt.txt')
        img_dir = os.path.join(full_sequence_path, 'img1')
        
        # Read ground truth
        if not os.path.exists(gt_file):
            logging.warning(f"Ground truth file not found for {sequence_dir}")
            continue
            
        frame_detections = {}
        with open(gt_file, 'r') as f:
            for line in f:
                data = line.strip().split(',')
                frame_id = int(data[0])
                object_id = int(data[1])
                x, y, w, h = map(float, data[2:6])
                confidence = float(data[6])
                class_id = int(data[7])
                
                # Only consider pedestrians (class 1) with high confidence
                if class_id == 1 and confidence == 1:
                    if frame_id not in frame_detections:
                        frame_detections[frame_id] = []
                        
                    # Store as (x, y, w, h) - will convert to YOLO format later
                    frame_detections[frame_id].append((x, y, w, h))
        
        # Process each frame
        img_files = sorted(glob.glob(os.path.join(img_dir, '*.jpg')))
        for img_file in tqdm(img_files, desc=f"Processing {sequence_dir}"):
            img_path = img_file
            frame_id = int(os.path.basename(img_file).split('.')[0])
            
            # Copy image to dataset directory
            dest_img_path = os.path.join(output_dir, 'images', split, f"{sequence_dir}_{frame_id:06d}.jpg")
            shutil.copy(img_path, dest_img_path)
            
            # Create corresponding label file
            label_path = os.path.join(output_dir, 'labels', split, f"{sequence_dir}_{frame_id:06d}.txt")
            
            # Get image dimensions for normalization
            img = cv2.imread(img_path)
            img_height, img_width = img.shape[:2]
            
            # Convert detections to YOLO format and write to label file
            with open(label_path, 'w') as f:
                if frame_id in frame_detections:
                    for x, y, w, h in frame_detections[frame_id]:
                        # Convert to YOLO format: class x_center y_center width height
                        # All values normalized to [0, 1]
                        x_center = (x + w/2) / img_width
                        y_center = (y + h/2) / img_height
                        width = w / img_width
                        height = h / img_height
                        
                        # In MOT17, pedestrians are class 1, but YOLO expects 0-indexed classes
                        # We'll use "person" which is class 0 in COCO
                        f.write(f"0 {x_center} {y_center} {width} {height}\n")
    
    # Create dataset.yaml file
    with open(os.path.join(output_dir, 'dataset.yaml'), 'w') as f:
        f.write(f"""
path: {output_dir}
train: images/train
val: images/val
nc: 1
names: ['person']
        """)
    
    logging.info(f"MOT17 dataset prepared for YOLO training in {output_dir}")
    return os.path.join(output_dir, 'dataset.yaml')




## 4. Training Callbacks
Implements callbacks for the training process:
- Saves model checkpoints after each epoch
- Records training losses
- Generates loss curve visualizations
- Provides real-time training monitoring

In [51]:
# Define a simple callback function approach
def create_save_epoch_callback(output_dir):
    """Create a callback function for saving epoch checkpoints"""
    losses = []
    epochs_dir = os.path.join(output_dir, "epochs")
    os.makedirs(epochs_dir, exist_ok=True)
    
    def save_epoch_callback(trainer):
        """Method called at the end of each training epoch"""
        # Get current epoch
        epoch = trainer.epoch
        
        # Save the model for this epoch
        epoch_model_path = os.path.join(epochs_dir, f"epoch_{epoch}.pt")
        if hasattr(trainer, 'model'):
            trainer.model.save(epoch_model_path)
        elif hasattr(trainer, 'save'):
            trainer.save(epoch_model_path)
        else:
            # Try a direct save of the trainer object
            torch.save(trainer.model.state_dict(), epoch_model_path)
        
        logging.info(f"Saved model checkpoint for epoch {epoch} to {epoch_model_path}")
        
        # Record loss
        if hasattr(trainer, 'loss') and trainer.loss is not None:
            loss_val = trainer.loss.item() if isinstance(trainer.loss, torch.Tensor) else trainer.loss
            losses.append(loss_val)
    
    def plot_loss_curve():
        if not losses:
            logging.warning("No losses recorded to plot")
            return
                
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(losses) + 1), losses, marker='o')
        plt.title('Training Loss Curve')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.grid(True)
        
        # Save loss curve image
        loss_curve_path = os.path.join(output_dir, "loss_curve.png")
        plt.savefig(loss_curve_path)
        logging.info(f"Saved loss curve to {loss_curve_path}")
        
        # Show the plot if in interactive environment
        plt.show()
        
        return losses
    
    # Return the callback function and the plotting function
    return save_epoch_callback, plot_loss_curve



## 5. YOLO Training Pipeline
Main training function that:
- Prepares MOT17 dataset
- Loads base YOLOv8 model
- Configures training parameters
- Handles model training
- Saves best weights
- Returns training metrics

In [52]:
def train_yolo_on_mot17(base_model_path, mot17_root, output_dir, epochs=10, batch_size=8, imgsz=640):
    """
    Train a YOLOv8 model on the MOT17 dataset.
    
    Args:
        base_model_path: Path to the base YOLOv8 model to fine-tune
        mot17_root: Path to the MOT17 dataset root directory
        output_dir: Path to save the trained model and dataset
        epochs: Number of training epochs
        batch_size: Batch size for training
        imgsz: Image size for training
    
    Returns:
        Path to the trained model weights and callback object with loss data
    """
    # Prepare the dataset
    yolo_dataset_dir = os.path.join(output_dir, 'yolo_dataset')
    os.makedirs(yolo_dataset_dir, exist_ok=True)
    dataset_yaml = prepare_mot17_for_yolo(mot17_root, yolo_dataset_dir)
    
    logging.info("Starting YOLO training...")
    
    # Load the base model
    model = YOLO(base_model_path)
    
    # Create callback function for saving epoch checkpoints
    save_epoch_callback, plot_loss_curve = create_save_epoch_callback(output_dir)
    
    # Train the model - without specifying callbacks parameter
    try:
        results = model.train(
            data=dataset_yaml,
            epochs=epochs,
            batch=batch_size,
            imgsz=imgsz,
            project=output_dir,
            name="mot17_model",
            exist_ok=True
        )
        
        # Get the path to the best model
        best_model_path = os.path.join(output_dir, "mot17_model", "weights", "best.pt")
        logging.info(f"Training completed. Best model saved to {best_model_path}")
        
        # Plot the loss curve
        losses = plot_loss_curve()
        
        return best_model_path, losses
    
    except Exception as e:
        logging.error(f"Training failed: {e}")
        raise


## 6. Model Loading and Utility Functions
Helper functions for:
- Loading trained YOLO models
- Managing class colors
- Processing individual frames
- Applying confidence thresholds

In [53]:
def load_yolo_model(model_path):
    """Load the YOLOv8 model from the specified path."""
    try:
        model = YOLO(model_path)
        logging.info(f"Successfully loaded YOLOv8 model from {model_path}")
        return model
    except Exception as e:
        logging.error(f"Failed to load YOLO model: {e}")
        raise

def get_class_color(class_id):
    """
    Returns a consistent color for a given class ID.
    Uses modulo to ensure color mapping for any class ID.
    """
    return COLOR_MAP[class_id % len(COLOR_MAP)]

def process_frame(frame, model, frame_id, confidence_threshold=0.5):
    """
    Process a single frame to detect objects and draw labeled bounding boxes with consistent colors.
    Added confidence threshold to filter low-confidence detections.
    """
    detections = []
    try:
        results = model(frame, conf=confidence_threshold)
        logging.info(f"Processed frame {frame_id} with detection results")

        for result in results:
            boxes = result.boxes.xyxy.cpu().numpy()  # Bounding boxes [x1, y1, x2, y2]
            scores = result.boxes.conf.cpu().numpy()  # Confidence scores
            class_ids = result.boxes.cls.cpu().numpy().astype(int)  # Class IDs as integers

            for box, score, cls in zip(boxes, scores, class_ids):
                # Filter detections based on confidence threshold
                if score >= confidence_threshold:
                    class_name = COCO_CLASSES[cls] if cls < len(COCO_CLASSES) else f"Unknown_{cls}"
                    color = get_class_color(cls)

                    detections.append({
                        "frame": frame_id,
                        "bbox": box.tolist(),
                        "score": float(score),
                        "class_id": int(cls),
                        "class_name": class_name
                    })

                    # Draw bounding box and label
                    x1, y1, x2, y2 = map(int, box)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    label = f"{class_name}: {score:.2f}"
                    cv2.putText(frame, label, (x1, y1 - 10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    except Exception as e:
        logging.error(f"Error processing frame {frame_id}: {e}")
    
    return detections, frame

In [54]:
def detect_objects_in_sequence(model_path, mot17_root, sequence, output_dir, confidence_threshold=0.5):
    """
    Detect objects in an MOT17 sequence, draw labeled bounding boxes, and save results.
    Added optional confidence threshold parameter.
    """
    model = load_yolo_model(model_path)
    video_path = os.path.join(mot17_root, sequence, "img1")

    if not os.path.exists(video_path):
        logging.error(f"Video path {video_path} does not exist")
        raise FileNotFoundError(f"Sequence directory {video_path} not found")

    os.makedirs(output_dir, exist_ok=True)
    output_video_path = os.path.join(output_dir, f"{sequence}_detections.avi")

    all_detections = []
    frame_id = 1
    frame_size = None
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    video_writer = None

    for img_file in sorted(os.listdir(video_path)):
        if img_file.endswith(".jpg"):
            frame_path = os.path.join(video_path, img_file)
            frame = cv2.imread(frame_path)
            if frame is None:
                logging.warning(f"Failed to load frame {frame_path}")
                continue

            if frame_size is None:
                frame_size = (frame.shape[1], frame.shape[0])
                video_writer = cv2.VideoWriter(output_video_path, fourcc, 20.0, frame_size)

            frame_detections, processed_frame = process_frame(
                frame, model, frame_id, confidence_threshold
            )
            all_detections.extend(frame_detections)
            video_writer.write(processed_frame)
            frame_id += 1

    video_writer.release()
    output_file = os.path.join(output_dir, "detections.pkl")
    
    try:
        with open(output_file, "wb") as f:
            pickle.dump(all_detections, f)
        logging.info(f"Saved {len(all_detections)} detections to {output_file}")
    except Exception as e:
        logging.error(f"Failed to save detections: {e}")
        raise

    print(f"Detections completed for {frame_id-1} frames. Results saved to {output_file} and {output_video_path}")

In [55]:
# Set your paths here
base_model_path = r"/csehome/b22ai025/Project/Scratch/yolov8n.pt"  # Base YOLOv8 model
mot17_root = r"/scratch/b22ai025/MOT17/train"      # MOT17 dataset
output_dir = r"/scratch/b22ai025/Results"          # Output directory
# Training parameters
train_epochs = 20
batch_size = 16
image_size = 640

# Detection parameters
sequence_to_evaluate = "MOT17-02-DPM"
confidence_threshold = 0.5

# Step 1: Train the model on MOT17
print("Starting YOLOv8 training on MOT17 dataset...")
trained_model_path, losses = train_yolo_on_mot17(
    base_model_path, 
    mot17_root, 
    output_dir, 
    epochs=train_epochs, 
    batch_size=batch_size, 
    imgsz=image_size
)
print(f"Training completed. Model saved to {trained_model_path}")

# Plot the loss curve
print("Loss data collected during training:", losses)
print("Loss curve was already generated during training")

# Step 2: Run detection on a sequence using the trained model
print(f"Running detection on sequence {sequence_to_evaluate} with trained model...")
detection_output_dir = os.path.join(output_dir, "Detections")
detect_objects_in_sequence(
    trained_model_path, 
    mot17_root, 
    sequence_to_evaluate, 
    detection_output_dir, 
    confidence_threshold
)

print("Training and detection pipeline completed!")

Starting YOLOv8 training on MOT17 dataset...


Processing MOT17-02-DPM: 100%|██████████| 600/600 [00:06<00:00, 93.25it/s] 
Processing MOT17-11-FRCNN: 100%|██████████| 900/900 [00:09<00:00, 95.34it/s] 
Processing MOT17-05-SDP: 100%|██████████| 837/837 [00:02<00:00, 410.04it/s]
Processing MOT17-10-DPM: 100%|██████████| 654/654 [00:06<00:00, 100.00it/s]
Processing MOT17-13-FRCNN: 100%|██████████| 750/750 [00:09<00:00, 81.73it/s]
Processing MOT17-05-DPM: 100%|██████████| 837/837 [00:02<00:00, 384.61it/s]
Processing MOT17-04-FRCNN: 100%|██████████| 1050/1050 [00:12<00:00, 81.76it/s]
Processing MOT17-09-SDP: 100%|██████████| 525/525 [00:06<00:00, 85.27it/s]
Processing MOT17-04-DPM: 100%|██████████| 1050/1050 [00:13<00:00, 79.21it/s]
Processing MOT17-13-DPM: 100%|██████████| 750/750 [00:09<00:00, 83.23it/s]
Processing MOT17-10-FRCNN: 100%|██████████| 654/654 [00:06<00:00, 103.70it/s]
Processing MOT17-11-SDP: 100%|██████████| 900/900 [00:08<00:00, 101.72it/s]
Processing MOT17-02-SDP: 100%|██████████| 600/600 [00:06<00:00, 95.24it/s] 
Proce

Ultralytics 8.3.98 🚀 Python-3.12.9 torch-2.6.0+cpu CPU (Intel Xeon Gold 6326 2.90GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/csehome/b22ai025/Project/Scratch/yolov8n.pt, data=/scratch/b22ai025/Results/yolo_dataset/dataset.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=/scratch/b22ai025/Results, name=mot17_model, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames

100%|██████████| 755k/755k [00:00<00:00, 6.42MB/s]

Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics




 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    751507  ultralytics.nn.modules.head.Detect           [1, [64, 128, 256]]           
Model summary: 129 layers, 3,011,043 parameters, 3,011,027 gradients, 8.2 GFLOPs

Transferred 319/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'


[34m[1mtrain: [0mScanning /scratch/b22ai025/Results/yolo_dataset/labels/train... 12561 images, 0 backgrounds, 5325 corrupt: 100%|██████████| 12561/12561 [00:16<00:00, 739.96it/s]

 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.0337963 ]
 -0.0337963 ]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.0337963 ]
 -0.0337963 ]





[34m[1mtrain: [0mNew cache created: /scratch/b22ai025/Results/yolo_dataset/labels/train.cache


[34m[1mval: [0mScanning /scratch/b22ai025/Results/yolo_dataset/labels/val... 3912 images, 0 backgrounds, 2295 corrupt: 100%|██████████| 3912/3912 [00:04<00:00, 792.68it/s]


 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.03425926]
 -0.0337963 ]
 -0.0337963 ]
[34m[1mval: [0mNew cache created: /scratch/b22ai025/Results/yolo_dataset/labels/val.cache
Plotting labels to /scratch/b22ai025/Results/mot17_model/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/scratch/b22ai025/Results/mot17_model[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20         0G      1.507      1.128      1.063        152        640: 100%|██████████| 453/453 [11:39<00:00,  1.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:41<00:00,  1.23it/s]


                   all       1617      28327      0.826       0.57      0.683      0.422

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20         0G        1.4     0.8562      1.024        157        640: 100%|██████████| 453/453 [12:14<00:00,  1.62s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.29it/s]


                   all       1617      28327      0.851      0.594      0.738      0.456

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20         0G      1.344      0.791      1.008         95        640: 100%|██████████| 453/453 [11:33<00:00,  1.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.30it/s]

                   all       1617      28327      0.832      0.637      0.779      0.509






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20         0G       1.29     0.7474     0.9906        103        640: 100%|██████████| 453/453 [11:33<00:00,  1.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.839      0.664      0.803      0.542






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20         0G      1.253     0.7168     0.9796        194        640: 100%|██████████| 453/453 [11:43<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.31it/s]

                   all       1617      28327      0.869      0.677      0.819      0.559






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20         0G       1.22     0.6909     0.9678        122        640: 100%|██████████| 453/453 [11:43<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.879      0.679      0.821      0.564






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20         0G      1.183     0.6654     0.9566        183        640: 100%|██████████| 453/453 [11:43<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.30it/s]

                   all       1617      28327      0.864      0.689      0.829      0.574






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20         0G      1.155     0.6482     0.9483        129        640: 100%|██████████| 453/453 [11:41<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.882      0.712      0.838      0.593






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20         0G      1.133     0.6318     0.9432        132        640: 100%|██████████| 453/453 [11:44<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.32it/s]

                   all       1617      28327      0.871      0.736      0.855      0.612






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20         0G      1.108     0.6159     0.9347        143        640: 100%|██████████| 453/453 [11:42<00:00,  1.55s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.30it/s]

                   all       1617      28327      0.864      0.735      0.857      0.615





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20         0G      1.079     0.6021     0.9232         46        640: 100%|██████████| 453/453 [11:00<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.872      0.746      0.867      0.624






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20         0G      1.048     0.5841      0.913         80        640: 100%|██████████| 453/453 [10:58<00:00,  1.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.30it/s]

                   all       1617      28327      0.892      0.752      0.868      0.635






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20         0G      1.027     0.5717      0.907         93        640: 100%|██████████| 453/453 [11:04<00:00,  1.47s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:41<00:00,  1.24it/s]

                   all       1617      28327      0.901      0.768      0.879      0.647






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20         0G      1.004     0.5588     0.9005         65        640: 100%|██████████| 453/453 [11:01<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.31it/s]

                   all       1617      28327      0.887      0.767      0.885      0.659






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20         0G      0.983     0.5467      0.895         67        640: 100%|██████████| 453/453 [10:56<00:00,  1.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:40<00:00,  1.27it/s]

                   all       1617      28327      0.899      0.775      0.886      0.664






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20         0G     0.9603     0.5328      0.889         35        640: 100%|██████████| 453/453 [10:59<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.891      0.781      0.892      0.678






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20         0G     0.9393     0.5211     0.8832         92        640: 100%|██████████| 453/453 [11:05<00:00,  1.47s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:40<00:00,  1.27it/s]

                   all       1617      28327      0.894      0.788      0.893      0.681






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20         0G     0.9192     0.5106     0.8779         48        640: 100%|██████████| 453/453 [10:59<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.29it/s]

                   all       1617      28327      0.905      0.787      0.898      0.694






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20         0G     0.8989     0.4996     0.8724         67        640: 100%|██████████| 453/453 [11:01<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:39<00:00,  1.28it/s]

                   all       1617      28327      0.908      0.792      0.901      0.701






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20         0G     0.8816     0.4907      0.869         49        640: 100%|██████████| 453/453 [10:59<00:00,  1.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:38<00:00,  1.31it/s]

                   all       1617      28327      0.902      0.799      0.904      0.708

20 epochs completed in 4.012 hours.





Optimizer stripped from /scratch/b22ai025/Results/mot17_model/weights/last.pt, 6.2MB
Optimizer stripped from /scratch/b22ai025/Results/mot17_model/weights/best.pt, 6.2MB

Validating /scratch/b22ai025/Results/mot17_model/weights/best.pt...
Ultralytics 8.3.98 🚀 Python-3.12.9 torch-2.6.0+cpu CPU (Intel Xeon Gold 6326 2.90GHz)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:36<00:00,  1.41it/s]


                   all       1617      28327      0.902      0.798      0.904      0.708
Speed: 0.3ms preprocess, 11.6ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1m/scratch/b22ai025/Results/mot17_model[0m
Training completed. Model saved to /scratch/b22ai025/Results/mot17_model/weights/best.pt
Loss data collected during training: None
Loss curve was already generated during training
Running detection on sequence MOT17-02-DPM with trained model...

0: 384x640 18 persons, 42.3ms
Speed: 2.2ms preprocess, 42.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 28.3ms
Speed: 1.6ms preprocess, 28.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 25.9ms
Speed: 1.6ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x

## 7. Detection Pipeline
Implements detection on sequences:
- Processes video frames
- Applies object detection
- Draws bounding boxes
- Saves detection results
- Generates visualization videos

In [None]:
def extract_frames_from_video(video_path, temp_dir):
    """Extract frames from video file and save to temporary directory"""
    os.makedirs(temp_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    if not cap.isOpened():
        raise ValueError(f"Failed to open video file: {video_path}")
    
    frame_paths = []
    print("\nExtracting frames from video...")
    with tqdm(total=total_frames, desc="Frames", bar_format='{l_bar}{bar:30}{r_bar}') as pbar:
        frame_idx = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
                
            frame_path = os.path.join(temp_dir, f"frame_{frame_idx:06d}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_paths.append(frame_path)
            frame_idx += 1
            pbar.update(1)
    
    cap.release()
    return frame_paths, fps

def generate_detections(model_path, input_path, output_dir, conf_thresh=0.5):
    """Generate detections with controlled output"""
    # Load YOLO model and configure
    model = YOLO(model_path)
    model.conf = conf_thresh
    model.verbose = False
    os.makedirs(output_dir, exist_ok=True)
    
    # Determine input type and get frames
    is_video = input_path.endswith(('.mp4', '.avi', '.mov'))
    if is_video:
        temp_dir = os.path.join(output_dir, 'temp_frames')
        img_files, fps = extract_frames_from_video(input_path, temp_dir)
        total_frames = len(img_files)
        frame_dir = temp_dir
    else:
        frame_dir = os.path.join(input_path, 'img1')
        if not os.path.exists(frame_dir):
            raise ValueError(f"Invalid sequence directory: {input_path}")
        img_files = sorted(os.listdir(frame_dir))
        total_frames = len(img_files)
        fps = 30  # Default for MOT17
    
    # Initialize video writer
    first_frame = cv2.imread(os.path.join(frame_dir, img_files[0]) if not is_video else img_files[0])
    frame_size = (first_frame.shape[1], first_frame.shape[0])
    video_path = os.path.join(output_dir, 'detections.avi')
    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frame_size)
    
    # Process frames
    all_detections = []
    total_dets = 0
    
    print("\nStarting detection pipeline...")
    print("Sample detections (first 5 frames):")
    with tqdm(total=total_frames, desc="Processing", bar_format='{l_bar}{bar:30}{r_bar}') as pbar:
        for frame_id, img_file in enumerate(img_files):
            frame_path = os.path.join(frame_dir, img_file) if not is_video else img_file
            frame = cv2.imread(frame_path)
            results = model(frame)[0]
            frame_dets = 0
            
            # Process detections
            for det in results.boxes.data:
                x1, y1, x2, y2, conf, cls = det
                if int(cls) == 0 and conf > conf_thresh:  # Person class
                    detection = {
                        'frame': frame_id + 1,
                        'bbox': [float(x1), float(y1), float(x2), float(y2)],
                        'score': float(conf)
                    }
                    all_detections.append(detection)
                    frame_dets += 1
                    
                    # Visualize detection
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                    cv2.putText(frame, f'person: {conf:.2f}', 
                              (int(x1), int(y1)-10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
            out.write(frame)
            total_dets += frame_dets
            
            # Show only first 5 frames details
            if frame_id < 5:
                print(f"Frame {frame_id+1}: {frame_dets} detections")
            elif frame_id == 5:
                print("\n... Processing remaining frames ...\n")
                
            pbar.set_postfix({'dets': total_dets}, refresh=True)
            pbar.update(1)
    
    # Cleanup
    out.release()
    if is_video and os.path.exists(temp_dir):
        import shutil
        shutil.rmtree(temp_dir)
    
    # Save detections
    detections_path = os.path.join(output_dir, 'detections.pkl')
    with open(detections_path, 'wb') as f:
        pickle.dump(all_detections, f)
    
    # Print summary
    print("\n" + "="*50)
    print("Detection Pipeline Summary")
    print("="*50)
    print(f"Input type: {'Video' if is_video else 'MOT17 Sequence'}")
    print(f"Total frames: {total_frames}")
    print(f"Total detections: {total_dets}")
    print(f"Average detections per frame: {total_dets/total_frames:.1f}")
    print(f"\nOutputs saved to:")
    print(f"- Detections: {detections_path}")
    print(f"- Video: {video_path}")
    print("="*50)
    
    return all_detections



In [29]:
def main():
    """Main function with clean output"""
    print("\n" + "="*50)
    print("Object Detection Pipeline")
    print("="*50)
    
    # Get model path
    model_path = "/csehome/b22ai025/Project/Scratch/best.pt"
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found at {model_path}")
    
    # Select input type
    print("\nSelect input type:")
    print("1. MOT17 Sequence")
    print("2. Video file")
    
    while True:
        choice = input("\nEnter choice (1 or 2): ").strip()
        if choice in ['1', '2']:
            break
        print("Invalid choice. Please enter 1 or 2.")
    
    # Handle input based on choice
    if choice == '1':
        sequence_path = input("\nEnter MOT17 sequence path (or press Enter for default): ").strip()
        if not sequence_path:
            sequence_path = "/scratch/b22ai025/MOT17/train/MOT17-02-DPM"
        
        if not os.path.exists(os.path.join(sequence_path, 'img1')):
            raise ValueError(f"Invalid sequence path: {sequence_path}")
            
        output_dir = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "sequence_output")
        input_path = sequence_path
        
    else:
        video_path = input("\nEnter video file path (.mp4/.avi): ").strip()
        if not video_path.endswith(('.mp4', '.avi', '.mov')):
            raise ValueError("Invalid video format. Supported formats: .mp4, .avi, .mov")
        if not os.path.exists(video_path):
            raise FileNotFoundError(f"Video file not found: {video_path}")
            
        output_dir = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "video_output")
        input_path = video_path
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    print(f"\nOutput will be saved to: {output_dir}")
    
    # Run detection
    try:
        generate_detections(model_path, input_path, output_dir)
        print("\nPipeline completed successfully!")
    except Exception as e:
        print(f"\nError during detection: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Object Detection Pipeline

Select input type:
1. MOT17 Sequence
2. Video file

Output will be saved to: /csehome/b22ai025/Project/Scratch/Detections/sequence_output

Starting detection pipeline...
Sample detections (first 5 frames):


Processing:   0%|                              | 0/600 [00:00<?, ?it/s]


0: 384x640 21 persons, 40.1ms
Speed: 2.5ms preprocess, 40.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


Processing:   0%|                              | 1/600 [00:00<01:24,  7.09it/s, dets=20]

Frame 1: 20 detections

0: 384x640 21 persons, 38.8ms
Speed: 2.5ms preprocess, 38.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   0%|                              | 1/600 [00:00<01:24,  7.09it/s, dets=40]

Frame 2: 20 detections

0: 384x640 21 persons, 38.4ms
Speed: 2.2ms preprocess, 38.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   0%|▏                             | 3/600 [00:00<00:56, 10.57it/s, dets=60]

Frame 3: 20 detections

0: 384x640 22 persons, 38.2ms
Speed: 2.4ms preprocess, 38.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   0%|▏                             | 3/600 [00:00<00:56, 10.57it/s, dets=80]

Frame 4: 20 detections

0: 384x640 21 persons, 37.7ms
Speed: 2.2ms preprocess, 37.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   1%|▎                             | 5/600 [00:00<00:50, 11.85it/s, dets=100]

Frame 5: 20 detections

0: 384x640 21 persons, 35.5ms
Speed: 2.5ms preprocess, 35.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


Processing:   1%|▎                             | 5/600 [00:00<00:50, 11.85it/s, dets=120]


... Processing remaining frames ...


0: 384x640 21 persons, 41.3ms
Speed: 2.6ms preprocess, 41.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   1%|▎                             | 7/600 [00:00<00:47, 12.38it/s, dets=139]


0: 384x640 20 persons, 37.8ms
Speed: 2.2ms preprocess, 37.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   1%|▎                             | 7/600 [00:00<00:47, 12.38it/s, dets=158]


0: 384x640 21 persons, 37.3ms
Speed: 2.2ms preprocess, 37.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▍                             | 9/600 [00:00<00:46, 12.68it/s, dets=178]


0: 384x640 21 persons, 38.0ms
Speed: 2.1ms preprocess, 38.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▍                             | 9/600 [00:00<00:46, 12.68it/s, dets=197]


0: 384x640 21 persons, 35.7ms
Speed: 2.3ms preprocess, 35.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▌                             | 11/600 [00:00<00:45, 13.02it/s, dets=217]


0: 384x640 21 persons, 35.8ms
Speed: 2.2ms preprocess, 35.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▌                             | 11/600 [00:00<00:45, 13.02it/s, dets=236]


0: 384x640 23 persons, 35.3ms
Speed: 2.1ms preprocess, 35.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▋                             | 13/600 [00:01<00:44, 13.21it/s, dets=257]


0: 384x640 23 persons, 36.0ms
Speed: 2.1ms preprocess, 36.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▋                             | 13/600 [00:01<00:44, 13.21it/s, dets=277]


0: 384x640 24 persons, 36.7ms
Speed: 2.5ms preprocess, 36.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▊                             | 15/600 [00:01<00:43, 13.35it/s, dets=297]


0: 384x640 24 persons, 36.4ms
Speed: 2.5ms preprocess, 36.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   2%|▊                             | 15/600 [00:01<00:43, 13.35it/s, dets=317]


0: 384x640 24 persons, 35.2ms
Speed: 2.2ms preprocess, 35.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   3%|▊                             | 17/600 [00:01<00:43, 13.39it/s, dets=336]


0: 384x640 23 persons, 38.0ms
Speed: 2.4ms preprocess, 38.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)


Processing:   3%|▊                             | 17/600 [00:01<00:43, 13.39it/s, dets=355]


0: 384x640 26 persons, 35.8ms
Speed: 2.2ms preprocess, 35.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   3%|▉                             | 19/600 [00:01<00:43, 13.44it/s, dets=375]


0: 384x640 22 persons, 35.8ms
Speed: 2.2ms preprocess, 35.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   3%|▉                             | 19/600 [00:01<00:43, 13.44it/s, dets=395]


0: 384x640 24 persons, 36.4ms
Speed: 2.2ms preprocess, 36.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█                             | 21/600 [00:01<00:42, 13.51it/s, dets=415]


0: 384x640 23 persons, 36.1ms
Speed: 2.5ms preprocess, 36.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█                             | 21/600 [00:01<00:42, 13.51it/s, dets=435]


0: 384x640 24 persons, 37.5ms
Speed: 2.2ms preprocess, 37.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▏                            | 23/600 [00:01<00:42, 13.48it/s, dets=455]


0: 384x640 23 persons, 36.5ms
Speed: 2.4ms preprocess, 36.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▏                            | 23/600 [00:01<00:42, 13.48it/s, dets=475]


0: 384x640 22 persons, 35.9ms
Speed: 2.2ms preprocess, 35.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▎                            | 25/600 [00:01<00:42, 13.54it/s, dets=495]


0: 384x640 23 persons, 37.8ms
Speed: 2.2ms preprocess, 37.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▎                            | 25/600 [00:02<00:42, 13.54it/s, dets=517]


0: 384x640 22 persons, 35.5ms
Speed: 2.5ms preprocess, 35.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▎                            | 27/600 [00:02<00:42, 13.60it/s, dets=538]


0: 384x640 22 persons, 35.4ms
Speed: 2.3ms preprocess, 35.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   4%|█▎                            | 27/600 [00:02<00:42, 13.60it/s, dets=559]


0: 384x640 21 persons, 35.9ms
Speed: 2.5ms preprocess, 35.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   5%|█▍                            | 29/600 [00:02<00:41, 13.65it/s, dets=579]


0: 384x640 22 persons, 35.4ms
Speed: 2.2ms preprocess, 35.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   5%|█▍                            | 29/600 [00:02<00:41, 13.65it/s, dets=599]


0: 384x640 21 persons, 35.6ms
Speed: 2.5ms preprocess, 35.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:   5%|█▌                            | 31/600 [00:02<00:41, 13.65it/s, dets=619]


0: 384x640 22 persons, 37.8ms
Speed: 2.2ms preprocess, 37.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:   5%|█▌                            | 31/600 [00:02<00:41, 13.65it/s, dets=639]


0: 384x640 21 persons, 31.4ms
Speed: 1.9ms preprocess, 31.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▋                            | 33/600 [00:02<00:40, 14.02it/s, dets=659]


0: 384x640 21 persons, 27.5ms
Speed: 2.2ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▋                            | 33/600 [00:02<00:40, 14.02it/s, dets=679]


0: 384x640 21 persons, 26.8ms
Speed: 1.8ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▊                            | 35/600 [00:02<00:37, 14.96it/s, dets=699]


0: 384x640 24 persons, 26.8ms
Speed: 1.6ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▊                            | 35/600 [00:02<00:37, 14.96it/s, dets=719]


0: 384x640 24 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▊                            | 37/600 [00:02<00:35, 15.84it/s, dets=739]


0: 384x640 23 persons, 32.8ms
Speed: 2.0ms preprocess, 32.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▊                            | 37/600 [00:02<00:35, 15.84it/s, dets=759]


0: 384x640 24 persons, 26.6ms
Speed: 2.0ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▉                            | 39/600 [00:02<00:34, 16.28it/s, dets=780]


0: 384x640 25 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   6%|█▉                            | 39/600 [00:02<00:34, 16.28it/s, dets=800]


0: 384x640 24 persons, 24.5ms
Speed: 2.0ms preprocess, 24.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   7%|██                            | 41/600 [00:02<00:33, 16.94it/s, dets=821]


0: 384x640 25 persons, 24.5ms
Speed: 1.5ms preprocess, 24.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   7%|██                            | 41/600 [00:02<00:33, 16.94it/s, dets=841]


0: 384x640 24 persons, 25.2ms
Speed: 1.6ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   7%|██▏                           | 43/600 [00:03<00:31, 17.47it/s, dets=862]


0: 384x640 25 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   7%|██▏                           | 43/600 [00:03<00:31, 17.47it/s, dets=882]


0: 384x640 25 persons, 26.7ms
Speed: 1.9ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▎                           | 45/600 [00:03<00:31, 17.73it/s, dets=901]


0: 384x640 25 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▎                           | 45/600 [00:03<00:31, 17.73it/s, dets=920]


0: 384x640 23 persons, 24.5ms
Speed: 1.5ms preprocess, 24.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▎                           | 47/600 [00:03<00:30, 18.17it/s, dets=937]


0: 384x640 24 persons, 25.7ms
Speed: 1.5ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▎                           | 47/600 [00:03<00:30, 18.17it/s, dets=954]


0: 384x640 24 persons, 27.0ms
Speed: 2.2ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▍                           | 49/600 [00:03<00:30, 18.15it/s, dets=973]


0: 384x640 24 persons, 26.3ms
Speed: 2.0ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▍                           | 49/600 [00:03<00:30, 18.15it/s, dets=991]


0: 384x640 22 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▌                           | 51/600 [00:03<00:29, 18.32it/s, dets=1008]


0: 384x640 22 persons, 24.8ms
Speed: 1.5ms preprocess, 24.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   8%|██▌                           | 51/600 [00:03<00:29, 18.32it/s, dets=1030]


0: 384x640 24 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   9%|██▋                           | 53/600 [00:03<00:29, 18.50it/s, dets=1051]


0: 384x640 22 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   9%|██▋                           | 53/600 [00:03<00:29, 18.50it/s, dets=1071]


0: 384x640 25 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:   9%|██▊                           | 55/600 [00:03<00:29, 18.45it/s, dets=1091]


0: 384x640 25 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:   9%|██▊                           | 55/600 [00:03<00:29, 18.45it/s, dets=1111]


0: 384x640 26 persons, 25.1ms
Speed: 1.5ms preprocess, 25.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|██▊                           | 57/600 [00:03<00:29, 18.62it/s, dets=1132]


0: 384x640 27 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|██▊                           | 57/600 [00:03<00:29, 18.62it/s, dets=1153]


0: 384x640 24 persons, 25.8ms
Speed: 1.5ms preprocess, 25.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|██▉                           | 59/600 [00:03<00:29, 18.50it/s, dets=1173]


0: 384x640 26 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|██▉                           | 59/600 [00:03<00:29, 18.50it/s, dets=1193]


0: 384x640 24 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|███                           | 61/600 [00:04<00:29, 18.38it/s, dets=1213]


0: 384x640 26 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|███                           | 61/600 [00:04<00:29, 18.38it/s, dets=1232]


0: 384x640 23 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|███▏                          | 63/600 [00:04<00:29, 18.28it/s, dets=1251]


0: 384x640 23 persons, 27.5ms
Speed: 1.9ms preprocess, 27.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  10%|███▏                          | 63/600 [00:04<00:29, 18.28it/s, dets=1270]


0: 384x640 23 persons, 25.4ms
Speed: 1.8ms preprocess, 25.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  11%|███▎                          | 65/600 [00:04<00:29, 18.23it/s, dets=1288]


0: 384x640 25 persons, 27.4ms
Speed: 1.5ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  11%|███▎                          | 65/600 [00:04<00:29, 18.23it/s, dets=1307]


0: 384x640 24 persons, 25.4ms
Speed: 1.5ms preprocess, 25.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  11%|███▎                          | 67/600 [00:04<00:28, 18.41it/s, dets=1327]


0: 384x640 23 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  11%|███▎                          | 67/600 [00:04<00:28, 18.41it/s, dets=1347]


0: 384x640 27 persons, 25.4ms
Speed: 2.3ms preprocess, 25.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▍                          | 69/600 [00:04<00:28, 18.43it/s, dets=1367]


0: 384x640 24 persons, 25.2ms
Speed: 1.6ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▍                          | 69/600 [00:04<00:28, 18.43it/s, dets=1387]


0: 384x640 23 persons, 24.3ms
Speed: 1.5ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▌                          | 71/600 [00:04<00:28, 18.73it/s, dets=1406]


0: 384x640 25 persons, 26.2ms
Speed: 2.0ms preprocess, 26.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▌                          | 71/600 [00:04<00:28, 18.73it/s, dets=1426]


0: 384x640 26 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▋                          | 73/600 [00:04<00:28, 18.70it/s, dets=1444]


0: 384x640 28 persons, 26.8ms
Speed: 2.0ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▋                          | 73/600 [00:04<00:28, 18.70it/s, dets=1464]


0: 384x640 28 persons, 26.2ms
Speed: 1.8ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▊                          | 75/600 [00:04<00:28, 18.66it/s, dets=1486]


0: 384x640 28 persons, 26.8ms
Speed: 1.8ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  12%|███▊                          | 75/600 [00:04<00:28, 18.66it/s, dets=1508]


0: 384x640 29 persons, 24.8ms
Speed: 1.5ms preprocess, 24.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  13%|███▊                          | 77/600 [00:04<00:27, 18.95it/s, dets=1529]


0: 384x640 29 persons, 26.2ms
Speed: 1.5ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  13%|███▊                          | 77/600 [00:04<00:27, 18.95it/s, dets=1550]


0: 384x640 28 persons, 26.9ms
Speed: 2.2ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  13%|███▉                          | 79/600 [00:04<00:27, 18.67it/s, dets=1571]


0: 384x640 29 persons, 26.7ms
Speed: 2.2ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  13%|███▉                          | 79/600 [00:05<00:27, 18.67it/s, dets=1591]


0: 384x640 28 persons, 25.6ms
Speed: 1.9ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████                          | 81/600 [00:05<00:27, 18.66it/s, dets=1611]


0: 384x640 28 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████                          | 81/600 [00:05<00:27, 18.66it/s, dets=1630]


0: 384x640 27 persons, 25.8ms
Speed: 1.9ms preprocess, 25.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▏                         | 83/600 [00:05<00:27, 18.52it/s, dets=1650]


0: 384x640 26 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▏                         | 83/600 [00:05<00:27, 18.52it/s, dets=1670]


0: 384x640 25 persons, 26.0ms
Speed: 1.5ms preprocess, 26.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▎                         | 85/600 [00:05<00:27, 18.45it/s, dets=1690]


0: 384x640 26 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▎                         | 85/600 [00:05<00:27, 18.45it/s, dets=1710]


0: 384x640 26 persons, 25.8ms
Speed: 1.9ms preprocess, 25.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▎                         | 87/600 [00:05<00:29, 17.46it/s, dets=1730]


0: 384x640 26 persons, 37.5ms
Speed: 2.3ms preprocess, 37.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  14%|████▎                         | 87/600 [00:05<00:29, 17.46it/s, dets=1750]


0: 384x640 29 persons, 33.0ms
Speed: 1.9ms preprocess, 33.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  15%|████▍                         | 89/600 [00:05<00:31, 16.42it/s, dets=1770]


0: 384x640 30 persons, 35.6ms
Speed: 2.1ms preprocess, 35.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  15%|████▍                         | 89/600 [00:05<00:31, 16.42it/s, dets=1790]


0: 384x640 26 persons, 35.8ms
Speed: 2.2ms preprocess, 35.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  15%|████▌                         | 91/600 [00:05<00:32, 15.55it/s, dets=1810]


0: 384x640 28 persons, 37.2ms
Speed: 2.2ms preprocess, 37.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  15%|████▌                         | 91/600 [00:05<00:32, 15.55it/s, dets=1830]


0: 384x640 29 persons, 36.9ms
Speed: 2.4ms preprocess, 36.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▋                         | 93/600 [00:05<00:34, 14.85it/s, dets=1851]


0: 384x640 30 persons, 38.7ms
Speed: 2.2ms preprocess, 38.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▋                         | 93/600 [00:05<00:34, 14.85it/s, dets=1873]


0: 384x640 31 persons, 36.1ms
Speed: 2.4ms preprocess, 36.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▊                         | 95/600 [00:06<00:35, 14.40it/s, dets=1894]


0: 384x640 29 persons, 37.9ms
Speed: 2.5ms preprocess, 37.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▊                         | 95/600 [00:06<00:35, 14.40it/s, dets=1915]


0: 384x640 30 persons, 37.3ms
Speed: 2.4ms preprocess, 37.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▊                         | 97/600 [00:06<00:35, 14.04it/s, dets=1935]


0: 384x640 31 persons, 36.8ms
Speed: 2.1ms preprocess, 36.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▊                         | 97/600 [00:06<00:35, 14.04it/s, dets=1956]


0: 384x640 26 persons, 36.1ms
Speed: 2.5ms preprocess, 36.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▉                         | 99/600 [00:06<00:35, 13.95it/s, dets=1977]


0: 384x640 28 persons, 37.5ms
Speed: 2.2ms preprocess, 37.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  16%|████▉                         | 99/600 [00:06<00:35, 13.95it/s, dets=1997]


0: 384x640 28 persons, 37.2ms
Speed: 2.2ms preprocess, 37.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  17%|█████                         | 101/600 [00:06<00:36, 13.75it/s, dets=2016]


0: 384x640 30 persons, 36.8ms
Speed: 2.5ms preprocess, 36.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  17%|█████                         | 101/600 [00:06<00:36, 13.75it/s, dets=2036]


0: 384x640 28 persons, 35.9ms
Speed: 2.2ms preprocess, 35.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  17%|█████▏                        | 103/600 [00:06<00:36, 13.79it/s, dets=2055]


0: 384x640 28 persons, 35.9ms
Speed: 2.2ms preprocess, 35.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  17%|█████▏                        | 103/600 [00:06<00:36, 13.79it/s, dets=2074]


0: 384x640 30 persons, 35.7ms
Speed: 2.1ms preprocess, 35.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▎                        | 105/600 [00:06<00:35, 13.77it/s, dets=2092]


0: 384x640 26 persons, 37.3ms
Speed: 2.2ms preprocess, 37.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▎                        | 105/600 [00:06<00:35, 13.77it/s, dets=2110]


0: 384x640 29 persons, 35.7ms
Speed: 2.2ms preprocess, 35.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▎                        | 107/600 [00:06<00:35, 13.74it/s, dets=2129]


0: 384x640 29 persons, 36.6ms
Speed: 2.1ms preprocess, 36.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▎                        | 107/600 [00:06<00:35, 13.74it/s, dets=2150]


0: 384x640 31 persons, 35.7ms
Speed: 2.4ms preprocess, 35.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▍                        | 109/600 [00:07<00:35, 13.75it/s, dets=2172]


0: 384x640 31 persons, 37.8ms
Speed: 2.2ms preprocess, 37.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▍                        | 109/600 [00:07<00:35, 13.75it/s, dets=2195]


0: 384x640 30 persons, 37.1ms
Speed: 2.5ms preprocess, 37.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▌                        | 111/600 [00:07<00:35, 13.69it/s, dets=2217]


0: 384x640 26 persons, 36.8ms
Speed: 2.2ms preprocess, 36.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  18%|█████▌                        | 111/600 [00:07<00:35, 13.69it/s, dets=2236]


0: 384x640 30 persons, 35.8ms
Speed: 2.6ms preprocess, 35.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  19%|█████▋                        | 113/600 [00:07<00:35, 13.75it/s, dets=2256]


0: 384x640 28 persons, 36.4ms
Speed: 2.2ms preprocess, 36.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  19%|█████▋                        | 113/600 [00:07<00:35, 13.75it/s, dets=2276]


0: 384x640 29 persons, 37.5ms
Speed: 2.2ms preprocess, 37.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  19%|█████▊                        | 115/600 [00:07<00:35, 13.67it/s, dets=2296]


0: 384x640 26 persons, 37.4ms
Speed: 2.2ms preprocess, 37.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)


Processing:  19%|█████▊                        | 115/600 [00:07<00:35, 13.67it/s, dets=2316]


0: 384x640 29 persons, 36.9ms
Speed: 2.6ms preprocess, 36.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|█████▊                        | 117/600 [00:07<00:35, 13.65it/s, dets=2335]


0: 384x640 29 persons, 36.3ms
Speed: 2.2ms preprocess, 36.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|█████▊                        | 117/600 [00:07<00:35, 13.65it/s, dets=2353]


0: 384x640 29 persons, 36.5ms
Speed: 2.7ms preprocess, 36.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|█████▉                        | 119/600 [00:07<00:35, 13.68it/s, dets=2371]


0: 384x640 29 persons, 36.6ms
Speed: 2.2ms preprocess, 36.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|█████▉                        | 119/600 [00:07<00:35, 13.68it/s, dets=2388]


0: 384x640 30 persons, 36.0ms
Speed: 2.2ms preprocess, 36.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|██████                        | 121/600 [00:07<00:34, 13.69it/s, dets=2406]


0: 384x640 27 persons, 37.1ms
Speed: 2.2ms preprocess, 37.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|██████                        | 121/600 [00:07<00:34, 13.69it/s, dets=2425]


0: 384x640 27 persons, 36.3ms
Speed: 2.6ms preprocess, 36.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|██████▏                       | 123/600 [00:08<00:34, 13.65it/s, dets=2444]


0: 384x640 30 persons, 37.2ms
Speed: 2.4ms preprocess, 37.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  20%|██████▏                       | 123/600 [00:08<00:34, 13.65it/s, dets=2463]


0: 384x640 30 persons, 36.4ms
Speed: 2.1ms preprocess, 36.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  21%|██████▎                       | 125/600 [00:08<00:34, 13.61it/s, dets=2482]


0: 384x640 29 persons, 36.0ms
Speed: 2.2ms preprocess, 36.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  21%|██████▎                       | 125/600 [00:08<00:34, 13.61it/s, dets=2501]


0: 384x640 26 persons, 38.7ms
Speed: 2.4ms preprocess, 38.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  21%|██████▎                       | 127/600 [00:08<00:34, 13.63it/s, dets=2519]


0: 384x640 28 persons, 31.4ms
Speed: 1.8ms preprocess, 31.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  21%|██████▎                       | 127/600 [00:08<00:34, 13.63it/s, dets=2537]


0: 384x640 30 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▍                       | 129/600 [00:08<00:32, 14.56it/s, dets=2556]


0: 384x640 29 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▍                       | 129/600 [00:08<00:32, 14.56it/s, dets=2574]


0: 384x640 29 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▌                       | 131/600 [00:08<00:29, 15.68it/s, dets=2593]


0: 384x640 30 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▌                       | 131/600 [00:08<00:29, 15.68it/s, dets=2613]


0: 384x640 32 persons, 33.2ms
Speed: 1.5ms preprocess, 33.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▋                       | 133/600 [00:08<00:29, 16.10it/s, dets=2633]


0: 384x640 30 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▋                       | 133/600 [00:08<00:29, 16.10it/s, dets=2653]


0: 384x640 31 persons, 26.1ms
Speed: 1.8ms preprocess, 26.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▊                       | 135/600 [00:08<00:27, 16.73it/s, dets=2673]


0: 384x640 32 persons, 26.8ms
Speed: 1.6ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  22%|██████▊                       | 135/600 [00:08<00:27, 16.73it/s, dets=2695]


0: 384x640 34 persons, 26.2ms
Speed: 1.9ms preprocess, 26.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  23%|██████▊                       | 137/600 [00:08<00:26, 17.17it/s, dets=2718]


0: 384x640 34 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  23%|██████▊                       | 137/600 [00:08<00:26, 17.17it/s, dets=2741]


0: 384x640 32 persons, 25.4ms
Speed: 2.3ms preprocess, 25.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  23%|██████▉                       | 139/600 [00:09<00:26, 17.41it/s, dets=2764]


0: 384x640 33 persons, 27.0ms
Speed: 1.7ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  23%|██████▉                       | 139/600 [00:09<00:26, 17.41it/s, dets=2786]


0: 384x640 32 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████                       | 141/600 [00:09<00:25, 17.69it/s, dets=2808]


0: 384x640 32 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████                       | 141/600 [00:09<00:25, 17.69it/s, dets=2830]


0: 384x640 36 persons, 25.9ms
Speed: 1.8ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▏                      | 143/600 [00:09<00:25, 17.88it/s, dets=2854]


0: 384x640 33 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▏                      | 143/600 [00:09<00:25, 17.88it/s, dets=2878]


0: 384x640 34 persons, 26.9ms
Speed: 1.6ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▎                      | 145/600 [00:09<00:25, 17.87it/s, dets=2904]


0: 384x640 36 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▎                      | 145/600 [00:09<00:25, 17.87it/s, dets=2930]


0: 384x640 35 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▎                      | 147/600 [00:09<00:25, 18.07it/s, dets=2955]


0: 384x640 36 persons, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  24%|███████▎                      | 147/600 [00:09<00:25, 18.07it/s, dets=2979]


0: 384x640 36 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  25%|███████▍                      | 149/600 [00:09<00:24, 18.18it/s, dets=3002]


0: 384x640 35 persons, 27.5ms
Speed: 1.6ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  25%|███████▍                      | 149/600 [00:09<00:24, 18.18it/s, dets=3026]


0: 384x640 36 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  25%|███████▌                      | 151/600 [00:09<00:24, 18.09it/s, dets=3050]


0: 384x640 33 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  25%|███████▌                      | 151/600 [00:09<00:24, 18.09it/s, dets=3073]


0: 384x640 34 persons, 34.3ms
Speed: 1.6ms preprocess, 34.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▋                      | 153/600 [00:09<00:25, 17.61it/s, dets=3096]


0: 384x640 34 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▋                      | 153/600 [00:09<00:25, 17.61it/s, dets=3119]


0: 384x640 34 persons, 27.3ms
Speed: 2.0ms preprocess, 27.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▊                      | 155/600 [00:09<00:25, 17.62it/s, dets=3142]


0: 384x640 32 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▊                      | 155/600 [00:09<00:25, 17.62it/s, dets=3165]


0: 384x640 31 persons, 27.4ms
Speed: 2.0ms preprocess, 27.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▊                      | 157/600 [00:10<00:25, 17.63it/s, dets=3187]


0: 384x640 33 persons, 26.4ms
Speed: 1.6ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▊                      | 157/600 [00:10<00:25, 17.63it/s, dets=3209]


0: 384x640 34 persons, 25.9ms
Speed: 1.8ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▉                      | 159/600 [00:10<00:24, 17.84it/s, dets=3230]


0: 384x640 31 persons, 27.5ms
Speed: 2.0ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  26%|███████▉                      | 159/600 [00:10<00:24, 17.84it/s, dets=3251]


0: 384x640 32 persons, 24.3ms
Speed: 1.7ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  27%|████████                      | 161/600 [00:10<00:24, 18.14it/s, dets=3272]


0: 384x640 30 persons, 25.2ms
Speed: 1.9ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  27%|████████                      | 161/600 [00:10<00:24, 18.14it/s, dets=3294]


0: 384x640 32 persons, 24.3ms
Speed: 1.5ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  27%|████████▏                     | 163/600 [00:10<00:23, 18.35it/s, dets=3316]


0: 384x640 31 persons, 25.6ms
Speed: 1.5ms preprocess, 25.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  27%|████████▏                     | 163/600 [00:10<00:23, 18.35it/s, dets=3338]


0: 384x640 29 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▎                     | 165/600 [00:10<00:23, 18.28it/s, dets=3361]


0: 384x640 29 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▎                     | 165/600 [00:10<00:23, 18.28it/s, dets=3384]


0: 384x640 29 persons, 25.6ms
Speed: 2.0ms preprocess, 25.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▎                     | 167/600 [00:10<00:23, 18.38it/s, dets=3406]


0: 384x640 31 persons, 26.7ms
Speed: 1.4ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▎                     | 167/600 [00:10<00:23, 18.38it/s, dets=3428]


0: 384x640 30 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▍                     | 169/600 [00:10<00:23, 18.13it/s, dets=3450]


0: 384x640 30 persons, 26.8ms
Speed: 2.3ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▍                     | 169/600 [00:10<00:23, 18.13it/s, dets=3473]


0: 384x640 31 persons, 25.4ms
Speed: 1.9ms preprocess, 25.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▌                     | 171/600 [00:10<00:23, 18.31it/s, dets=3494]


0: 384x640 30 persons, 25.1ms
Speed: 1.9ms preprocess, 25.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  28%|████████▌                     | 171/600 [00:10<00:23, 18.31it/s, dets=3516]


0: 384x640 32 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  29%|████████▋                     | 173/600 [00:10<00:23, 18.50it/s, dets=3539]


0: 384x640 32 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  29%|████████▋                     | 173/600 [00:10<00:23, 18.50it/s, dets=3561]


0: 384x640 33 persons, 25.0ms
Speed: 1.6ms preprocess, 25.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  29%|████████▊                     | 175/600 [00:11<00:22, 18.54it/s, dets=3585]


0: 384x640 32 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  29%|████████▊                     | 175/600 [00:11<00:22, 18.54it/s, dets=3610]


0: 384x640 33 persons, 26.3ms
Speed: 1.9ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|████████▊                     | 177/600 [00:11<00:22, 18.48it/s, dets=3633]


0: 384x640 35 persons, 27.1ms
Speed: 2.1ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|████████▊                     | 177/600 [00:11<00:22, 18.48it/s, dets=3654]


0: 384x640 33 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|████████▉                     | 179/600 [00:11<00:23, 18.20it/s, dets=3675]


0: 384x640 32 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|████████▉                     | 179/600 [00:11<00:23, 18.20it/s, dets=3696]


0: 384x640 33 persons, 27.2ms
Speed: 1.9ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|█████████                     | 181/600 [00:11<00:23, 18.16it/s, dets=3717]


0: 384x640 32 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|█████████                     | 181/600 [00:11<00:23, 18.16it/s, dets=3736]


0: 384x640 32 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|█████████▏                    | 183/600 [00:11<00:22, 18.27it/s, dets=3756]


0: 384x640 33 persons, 25.8ms
Speed: 1.5ms preprocess, 25.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  30%|█████████▏                    | 183/600 [00:11<00:22, 18.27it/s, dets=3776]


0: 384x640 30 persons, 24.3ms
Speed: 1.5ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  31%|█████████▎                    | 185/600 [00:11<00:22, 18.59it/s, dets=3795]


0: 384x640 30 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  31%|█████████▎                    | 185/600 [00:11<00:22, 18.59it/s, dets=3817]


0: 384x640 31 persons, 24.4ms
Speed: 1.5ms preprocess, 24.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  31%|█████████▎                    | 187/600 [00:11<00:21, 18.81it/s, dets=3839]


0: 384x640 29 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  31%|█████████▎                    | 187/600 [00:11<00:21, 18.81it/s, dets=3861]


0: 384x640 30 persons, 27.5ms
Speed: 1.6ms preprocess, 27.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▍                    | 189/600 [00:11<00:22, 18.49it/s, dets=3882]


0: 384x640 31 persons, 27.1ms
Speed: 1.9ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▍                    | 189/600 [00:11<00:22, 18.49it/s, dets=3903]


0: 384x640 31 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▌                    | 191/600 [00:11<00:22, 18.35it/s, dets=3926]


0: 384x640 34 persons, 27.4ms
Speed: 1.6ms preprocess, 27.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▌                    | 191/600 [00:11<00:22, 18.35it/s, dets=3947]


0: 384x640 33 persons, 26.6ms
Speed: 2.1ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▋                    | 193/600 [00:11<00:22, 18.23it/s, dets=3970]


0: 384x640 34 persons, 27.1ms
Speed: 1.9ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▋                    | 193/600 [00:12<00:22, 18.23it/s, dets=3992]


0: 384x640 34 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▊                    | 195/600 [00:12<00:22, 18.22it/s, dets=4016]


0: 384x640 33 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  32%|█████████▊                    | 195/600 [00:12<00:22, 18.22it/s, dets=4040]


0: 384x640 35 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  33%|█████████▊                    | 197/600 [00:12<00:22, 18.31it/s, dets=4062]


0: 384x640 39 persons, 27.2ms
Speed: 1.6ms preprocess, 27.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  33%|█████████▊                    | 197/600 [00:12<00:22, 18.31it/s, dets=4084]


0: 384x640 39 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  33%|█████████▉                    | 199/600 [00:12<00:21, 18.31it/s, dets=4107]


0: 384x640 36 persons, 26.3ms
Speed: 1.6ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  33%|█████████▉                    | 199/600 [00:12<00:21, 18.31it/s, dets=4128]


0: 384x640 40 persons, 26.9ms
Speed: 2.0ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████                    | 201/600 [00:12<00:21, 18.22it/s, dets=4149]


0: 384x640 37 persons, 27.5ms
Speed: 1.5ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████                    | 201/600 [00:12<00:21, 18.22it/s, dets=4170]


0: 384x640 35 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▏                   | 203/600 [00:12<00:22, 18.03it/s, dets=4190]


0: 384x640 35 persons, 27.8ms
Speed: 1.9ms preprocess, 27.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▏                   | 203/600 [00:12<00:22, 18.03it/s, dets=4209]


0: 384x640 31 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▎                   | 205/600 [00:12<00:21, 18.10it/s, dets=4231]


0: 384x640 34 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▎                   | 205/600 [00:12<00:21, 18.10it/s, dets=4252]


0: 384x640 36 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▎                   | 207/600 [00:12<00:21, 18.09it/s, dets=4272]


0: 384x640 39 persons, 33.4ms
Speed: 1.7ms preprocess, 33.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  34%|██████████▎                   | 207/600 [00:12<00:21, 18.09it/s, dets=4295]


0: 384x640 40 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  35%|██████████▍                   | 209/600 [00:12<00:22, 17.77it/s, dets=4321]


0: 384x640 38 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  35%|██████████▍                   | 209/600 [00:12<00:22, 17.77it/s, dets=4345]


0: 384x640 39 persons, 26.3ms
Speed: 1.7ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  35%|██████████▌                   | 211/600 [00:12<00:21, 17.89it/s, dets=4366]


0: 384x640 41 persons, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  35%|██████████▌                   | 211/600 [00:13<00:21, 17.89it/s, dets=4388]


0: 384x640 40 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▋                   | 213/600 [00:13<00:21, 18.04it/s, dets=4414]


0: 384x640 42 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▋                   | 213/600 [00:13<00:21, 18.04it/s, dets=4439]


0: 384x640 40 persons, 24.1ms
Speed: 1.5ms preprocess, 24.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▊                   | 215/600 [00:13<00:21, 18.24it/s, dets=4463]


0: 384x640 40 persons, 24.9ms
Speed: 1.6ms preprocess, 24.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▊                   | 215/600 [00:13<00:21, 18.24it/s, dets=4486]


0: 384x640 42 persons, 24.6ms
Speed: 1.5ms preprocess, 24.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▊                   | 217/600 [00:13<00:20, 18.41it/s, dets=4508]


0: 384x640 41 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▊                   | 217/600 [00:13<00:20, 18.41it/s, dets=4529]


0: 384x640 42 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▉                   | 219/600 [00:13<00:20, 18.53it/s, dets=4550]


0: 384x640 40 persons, 26.1ms
Speed: 1.5ms preprocess, 26.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  36%|██████████▉                   | 219/600 [00:13<00:20, 18.53it/s, dets=4571]


0: 384x640 38 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  37%|███████████                   | 221/600 [00:13<00:20, 18.65it/s, dets=4595]


0: 384x640 40 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  37%|███████████                   | 221/600 [00:13<00:20, 18.65it/s, dets=4617]


0: 384x640 38 persons, 25.7ms
Speed: 1.8ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  37%|███████████▏                  | 223/600 [00:13<00:20, 18.48it/s, dets=4641]


0: 384x640 36 persons, 27.6ms
Speed: 2.2ms preprocess, 27.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  37%|███████████▏                  | 223/600 [00:13<00:20, 18.48it/s, dets=4665]


0: 384x640 39 persons, 25.8ms
Speed: 1.5ms preprocess, 25.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▎                  | 225/600 [00:13<00:20, 18.29it/s, dets=4689]


0: 384x640 36 persons, 26.8ms
Speed: 2.0ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▎                  | 225/600 [00:13<00:20, 18.29it/s, dets=4713]


0: 384x640 35 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▎                  | 227/600 [00:13<00:20, 18.01it/s, dets=4736]


0: 384x640 35 persons, 27.4ms
Speed: 1.7ms preprocess, 27.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▎                  | 227/600 [00:13<00:20, 18.01it/s, dets=4760]


0: 384x640 33 persons, 25.1ms
Speed: 2.1ms preprocess, 25.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▍                  | 229/600 [00:13<00:20, 17.96it/s, dets=4783]


0: 384x640 35 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▍                  | 229/600 [00:14<00:20, 17.96it/s, dets=4805]


0: 384x640 33 persons, 26.9ms
Speed: 1.9ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▌                  | 231/600 [00:14<00:20, 18.04it/s, dets=4829]


0: 384x640 32 persons, 25.3ms
Speed: 2.0ms preprocess, 25.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  38%|███████████▌                  | 231/600 [00:14<00:20, 18.04it/s, dets=4853]


0: 384x640 33 persons, 27.2ms
Speed: 1.9ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  39%|███████████▋                  | 233/600 [00:14<00:20, 18.12it/s, dets=4877]


0: 384x640 34 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  39%|███████████▋                  | 233/600 [00:14<00:20, 18.12it/s, dets=4902]


0: 384x640 31 persons, 24.2ms
Speed: 1.5ms preprocess, 24.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  39%|███████████▊                  | 235/600 [00:14<00:19, 18.31it/s, dets=4927]


0: 384x640 29 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  39%|███████████▊                  | 235/600 [00:14<00:19, 18.31it/s, dets=4952]


0: 384x640 35 persons, 25.7ms
Speed: 2.1ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|███████████▊                  | 237/600 [00:14<00:19, 18.28it/s, dets=4976]


0: 384x640 29 persons, 27.8ms
Speed: 2.0ms preprocess, 27.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|███████████▊                  | 237/600 [00:14<00:19, 18.28it/s, dets=4999]


0: 384x640 31 persons, 25.6ms
Speed: 1.6ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|███████████▉                  | 239/600 [00:14<00:19, 18.22it/s, dets=5020]


0: 384x640 33 persons, 27.2ms
Speed: 1.6ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|███████████▉                  | 239/600 [00:14<00:19, 18.22it/s, dets=5041]


0: 384x640 35 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|████████████                  | 241/600 [00:14<00:19, 18.07it/s, dets=5066]


0: 384x640 34 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|████████████                  | 241/600 [00:14<00:19, 18.07it/s, dets=5089]


0: 384x640 30 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|████████████▏                 | 243/600 [00:14<00:19, 18.03it/s, dets=5112]


0: 384x640 38 persons, 26.1ms
Speed: 1.5ms preprocess, 26.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  40%|████████████▏                 | 243/600 [00:14<00:19, 18.03it/s, dets=5135]


0: 384x640 34 persons, 26.2ms
Speed: 1.6ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  41%|████████████▎                 | 245/600 [00:14<00:19, 18.17it/s, dets=5157]


0: 384x640 33 persons, 27.2ms
Speed: 1.6ms preprocess, 27.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  41%|████████████▎                 | 245/600 [00:14<00:19, 18.17it/s, dets=5180]


0: 384x640 30 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  41%|████████████▎                 | 247/600 [00:14<00:19, 18.12it/s, dets=5201]


0: 384x640 32 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  41%|████████████▎                 | 247/600 [00:15<00:19, 18.12it/s, dets=5221]


0: 384x640 32 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▍                 | 249/600 [00:15<00:19, 18.21it/s, dets=5242]


0: 384x640 37 persons, 26.7ms
Speed: 1.6ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▍                 | 249/600 [00:15<00:19, 18.21it/s, dets=5265]


0: 384x640 34 persons, 25.6ms
Speed: 1.5ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▌                 | 251/600 [00:15<00:19, 18.09it/s, dets=5286]


0: 384x640 31 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▌                 | 251/600 [00:15<00:19, 18.09it/s, dets=5309]


0: 384x640 32 persons, 26.2ms
Speed: 2.1ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▋                 | 253/600 [00:15<00:19, 18.06it/s, dets=5333]


0: 384x640 31 persons, 27.1ms
Speed: 2.0ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▋                 | 253/600 [00:15<00:19, 18.06it/s, dets=5356]


0: 384x640 31 persons, 25.5ms
Speed: 1.8ms preprocess, 25.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▊                 | 255/600 [00:15<00:19, 18.09it/s, dets=5380]


0: 384x640 30 persons, 27.5ms
Speed: 1.8ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  42%|████████████▊                 | 255/600 [00:15<00:19, 18.09it/s, dets=5403]


0: 384x640 31 persons, 25.3ms
Speed: 1.8ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  43%|████████████▊                 | 257/600 [00:15<00:18, 18.19it/s, dets=5427]


0: 384x640 33 persons, 24.6ms
Speed: 1.8ms preprocess, 24.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  43%|████████████▊                 | 257/600 [00:15<00:18, 18.19it/s, dets=5452]


0: 384x640 34 persons, 27.4ms
Speed: 1.5ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  43%|████████████▉                 | 259/600 [00:15<00:18, 18.06it/s, dets=5477]


0: 384x640 36 persons, 25.7ms
Speed: 1.6ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  43%|████████████▉                 | 259/600 [00:15<00:18, 18.06it/s, dets=5501]


0: 384x640 29 persons, 27.1ms
Speed: 1.6ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████                 | 261/600 [00:15<00:18, 18.07it/s, dets=5523]


0: 384x640 32 persons, 26.8ms
Speed: 1.8ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████                 | 261/600 [00:15<00:18, 18.07it/s, dets=5544]


0: 384x640 29 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▏                | 263/600 [00:15<00:18, 18.01it/s, dets=5564]


0: 384x640 29 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▏                | 263/600 [00:15<00:18, 18.01it/s, dets=5584]


0: 384x640 29 persons, 26.0ms
Speed: 1.6ms preprocess, 26.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▎                | 265/600 [00:15<00:18, 18.05it/s, dets=5604]


0: 384x640 30 persons, 26.2ms
Speed: 1.8ms preprocess, 26.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▎                | 265/600 [00:16<00:18, 18.05it/s, dets=5624]


0: 384x640 27 persons, 26.8ms
Speed: 1.8ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▎                | 267/600 [00:16<00:18, 18.13it/s, dets=5645]


0: 384x640 29 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  44%|█████████████▎                | 267/600 [00:16<00:18, 18.13it/s, dets=5665]


0: 384x640 31 persons, 26.6ms
Speed: 1.9ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  45%|█████████████▍                | 269/600 [00:16<00:18, 18.03it/s, dets=5688]


0: 384x640 32 persons, 27.6ms
Speed: 1.9ms preprocess, 27.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  45%|█████████████▍                | 269/600 [00:16<00:18, 18.03it/s, dets=5710]


0: 384x640 30 persons, 25.3ms
Speed: 1.6ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  45%|█████████████▌                | 271/600 [00:16<00:18, 18.07it/s, dets=5732]


0: 384x640 34 persons, 27.6ms
Speed: 2.2ms preprocess, 27.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  45%|█████████████▌                | 271/600 [00:16<00:18, 18.07it/s, dets=5755]


0: 384x640 35 persons, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▋                | 273/600 [00:16<00:18, 17.98it/s, dets=5778]


0: 384x640 35 persons, 33.8ms
Speed: 1.5ms preprocess, 33.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▋                | 273/600 [00:16<00:18, 17.98it/s, dets=5801]


0: 384x640 35 persons, 25.6ms
Speed: 2.0ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▊                | 275/600 [00:16<00:18, 17.72it/s, dets=5824]


0: 384x640 34 persons, 27.4ms
Speed: 1.5ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▊                | 275/600 [00:16<00:18, 17.72it/s, dets=5847]


0: 384x640 33 persons, 26.0ms
Speed: 1.8ms preprocess, 26.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▊                | 277/600 [00:16<00:18, 17.82it/s, dets=5870]


0: 384x640 34 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▊                | 277/600 [00:16<00:18, 17.82it/s, dets=5893]


0: 384x640 36 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▉                | 279/600 [00:16<00:17, 17.84it/s, dets=5915]


0: 384x640 37 persons, 24.6ms
Speed: 1.5ms preprocess, 24.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  46%|█████████████▉                | 279/600 [00:16<00:17, 17.84it/s, dets=5938]


0: 384x640 39 persons, 24.8ms
Speed: 1.5ms preprocess, 24.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  47%|██████████████                | 281/600 [00:16<00:17, 18.15it/s, dets=5961]


0: 384x640 38 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  47%|██████████████                | 281/600 [00:16<00:17, 18.15it/s, dets=5983]


0: 384x640 37 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  47%|██████████████▏               | 283/600 [00:16<00:17, 18.16it/s, dets=6006]


0: 384x640 36 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  47%|██████████████▏               | 283/600 [00:17<00:17, 18.16it/s, dets=6029]


0: 384x640 39 persons, 26.6ms
Speed: 1.9ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▎               | 285/600 [00:17<00:17, 18.14it/s, dets=6050]


0: 384x640 38 persons, 27.8ms
Speed: 1.6ms preprocess, 27.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▎               | 285/600 [00:17<00:17, 18.14it/s, dets=6072]


0: 384x640 40 persons, 25.3ms
Speed: 2.0ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▎               | 287/600 [00:17<00:17, 18.11it/s, dets=6094]


0: 384x640 40 persons, 27.6ms
Speed: 1.8ms preprocess, 27.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▎               | 287/600 [00:17<00:17, 18.11it/s, dets=6117]


0: 384x640 40 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▍               | 289/600 [00:17<00:17, 18.02it/s, dets=6141]


0: 384x640 40 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▍               | 289/600 [00:17<00:17, 18.02it/s, dets=6165]


0: 384x640 43 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▌               | 291/600 [00:17<00:17, 18.00it/s, dets=6189]


0: 384x640 43 persons, 27.2ms
Speed: 1.9ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  48%|██████████████▌               | 291/600 [00:17<00:17, 18.00it/s, dets=6215]


0: 384x640 40 persons, 33.8ms
Speed: 1.6ms preprocess, 33.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  49%|██████████████▋               | 293/600 [00:17<00:17, 17.58it/s, dets=6241]


0: 384x640 39 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  49%|██████████████▋               | 293/600 [00:17<00:17, 17.58it/s, dets=6265]


0: 384x640 38 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  49%|██████████████▊               | 295/600 [00:17<00:17, 17.60it/s, dets=6289]


0: 384x640 39 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  49%|██████████████▊               | 295/600 [00:17<00:17, 17.60it/s, dets=6313]


0: 384x640 38 persons, 26.8ms
Speed: 1.6ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|██████████████▊               | 297/600 [00:17<00:17, 17.70it/s, dets=6337]


0: 384x640 41 persons, 27.1ms
Speed: 2.1ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|██████████████▊               | 297/600 [00:17<00:17, 17.70it/s, dets=6363]


0: 384x640 39 persons, 26.8ms
Speed: 2.2ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|██████████████▉               | 299/600 [00:17<00:17, 17.63it/s, dets=6389]


0: 384x640 42 persons, 27.4ms
Speed: 1.6ms preprocess, 27.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|██████████████▉               | 299/600 [00:17<00:17, 17.63it/s, dets=6415]


0: 384x640 41 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|███████████████               | 301/600 [00:17<00:16, 17.63it/s, dets=6440]


0: 384x640 40 persons, 27.8ms
Speed: 1.9ms preprocess, 27.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|███████████████               | 301/600 [00:18<00:16, 17.63it/s, dets=6465]


0: 384x640 40 persons, 27.0ms
Speed: 1.9ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|███████████████▏              | 303/600 [00:18<00:16, 17.70it/s, dets=6490]


0: 384x640 39 persons, 27.1ms
Speed: 2.1ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  50%|███████████████▏              | 303/600 [00:18<00:16, 17.70it/s, dets=6514]


0: 384x640 42 persons, 24.8ms
Speed: 1.5ms preprocess, 24.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  51%|███████████████▎              | 305/600 [00:18<00:16, 17.92it/s, dets=6539]


0: 384x640 42 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  51%|███████████████▎              | 305/600 [00:18<00:16, 17.92it/s, dets=6565]


0: 384x640 44 persons, 27.1ms
Speed: 2.0ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  51%|███████████████▎              | 307/600 [00:18<00:16, 17.86it/s, dets=6590]


0: 384x640 44 persons, 27.1ms
Speed: 2.1ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  51%|███████████████▎              | 307/600 [00:18<00:16, 17.86it/s, dets=6615]


0: 384x640 41 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▍              | 309/600 [00:18<00:16, 17.81it/s, dets=6640]


0: 384x640 39 persons, 27.9ms
Speed: 1.6ms preprocess, 27.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▍              | 309/600 [00:18<00:16, 17.81it/s, dets=6667]


0: 384x640 40 persons, 26.7ms
Speed: 1.7ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▌              | 311/600 [00:18<00:16, 17.63it/s, dets=6690]


0: 384x640 43 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▌              | 311/600 [00:18<00:16, 17.63it/s, dets=6712]


0: 384x640 42 persons, 27.3ms
Speed: 2.0ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▋              | 313/600 [00:18<00:16, 17.65it/s, dets=6734]


0: 384x640 38 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▋              | 313/600 [00:18<00:16, 17.65it/s, dets=6758]


0: 384x640 41 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▊              | 315/600 [00:18<00:16, 17.74it/s, dets=6784]


0: 384x640 39 persons, 27.2ms
Speed: 2.0ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  52%|███████████████▊              | 315/600 [00:18<00:16, 17.74it/s, dets=6809]


0: 384x640 37 persons, 26.5ms
Speed: 1.8ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  53%|███████████████▊              | 317/600 [00:18<00:15, 17.78it/s, dets=6834]


0: 384x640 39 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  53%|███████████████▊              | 317/600 [00:18<00:15, 17.78it/s, dets=6859]


0: 384x640 36 persons, 25.7ms
Speed: 1.5ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  53%|███████████████▉              | 319/600 [00:18<00:15, 17.93it/s, dets=6881]


0: 384x640 35 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  53%|███████████████▉              | 319/600 [00:19<00:15, 17.93it/s, dets=6903]


0: 384x640 37 persons, 24.5ms
Speed: 1.5ms preprocess, 24.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████              | 321/600 [00:19<00:15, 18.08it/s, dets=6927]


0: 384x640 38 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████              | 321/600 [00:19<00:15, 18.08it/s, dets=6951]


0: 384x640 38 persons, 27.4ms
Speed: 2.3ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████▏             | 323/600 [00:19<00:15, 17.94it/s, dets=6974]


0: 384x640 41 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████▏             | 323/600 [00:19<00:15, 17.94it/s, dets=6999]


0: 384x640 37 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████▎             | 325/600 [00:19<00:15, 17.91it/s, dets=7022]


0: 384x640 42 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  54%|████████████████▎             | 325/600 [00:19<00:15, 17.91it/s, dets=7047]


0: 384x640 42 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▎             | 327/600 [00:19<00:15, 17.93it/s, dets=7072]


0: 384x640 41 persons, 26.8ms
Speed: 1.8ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▎             | 327/600 [00:19<00:15, 17.93it/s, dets=7098]


0: 384x640 43 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▍             | 329/600 [00:19<00:15, 18.04it/s, dets=7125]


0: 384x640 42 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▍             | 329/600 [00:19<00:15, 18.04it/s, dets=7149]


0: 384x640 40 persons, 25.3ms
Speed: 1.6ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▌             | 331/600 [00:19<00:14, 18.09it/s, dets=7174]


0: 384x640 40 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  55%|████████████████▌             | 331/600 [00:19<00:14, 18.09it/s, dets=7200]


0: 384x640 43 persons, 27.2ms
Speed: 1.8ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▋             | 333/600 [00:19<00:14, 17.88it/s, dets=7227]


0: 384x640 37 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▋             | 333/600 [00:19<00:14, 17.88it/s, dets=7252]


0: 384x640 38 persons, 27.5ms
Speed: 1.6ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▊             | 335/600 [00:19<00:14, 17.75it/s, dets=7277]


0: 384x640 39 persons, 27.6ms
Speed: 1.8ms preprocess, 27.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▊             | 335/600 [00:19<00:14, 17.75it/s, dets=7302]


0: 384x640 40 persons, 27.4ms
Speed: 1.9ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▊             | 337/600 [00:19<00:14, 17.65it/s, dets=7328]


0: 384x640 42 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▊             | 337/600 [00:20<00:14, 17.65it/s, dets=7354]


0: 384x640 42 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▉             | 339/600 [00:20<00:14, 17.73it/s, dets=7380]


0: 384x640 44 persons, 26.4ms
Speed: 1.8ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  56%|████████████████▉             | 339/600 [00:20<00:14, 17.73it/s, dets=7403]


0: 384x640 43 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████             | 341/600 [00:20<00:14, 17.92it/s, dets=7426]


0: 384x640 41 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████             | 341/600 [00:20<00:14, 17.92it/s, dets=7450]


0: 384x640 42 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████▏            | 343/600 [00:20<00:14, 17.82it/s, dets=7477]


0: 384x640 41 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████▏            | 343/600 [00:20<00:14, 17.82it/s, dets=7503]


0: 384x640 38 persons, 25.6ms
Speed: 2.0ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████▎            | 345/600 [00:20<00:14, 17.87it/s, dets=7530]


0: 384x640 42 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  57%|█████████████████▎            | 345/600 [00:20<00:14, 17.87it/s, dets=7558]


0: 384x640 41 persons, 25.5ms
Speed: 1.5ms preprocess, 25.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▎            | 347/600 [00:20<00:14, 18.01it/s, dets=7586]


0: 384x640 40 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▎            | 347/600 [00:20<00:14, 18.01it/s, dets=7613]


0: 384x640 42 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▍            | 349/600 [00:20<00:14, 17.81it/s, dets=7641]


0: 384x640 45 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▍            | 349/600 [00:20<00:14, 17.81it/s, dets=7666]


0: 384x640 41 persons, 26.5ms
Speed: 1.8ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▌            | 351/600 [00:20<00:13, 17.99it/s, dets=7692]


0: 384x640 43 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  58%|█████████████████▌            | 351/600 [00:20<00:13, 17.99it/s, dets=7720]


0: 384x640 43 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  59%|█████████████████▋            | 353/600 [00:20<00:13, 17.82it/s, dets=7744]


0: 384x640 47 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  59%|█████████████████▋            | 353/600 [00:20<00:13, 17.82it/s, dets=7772]


0: 384x640 43 persons, 27.2ms
Speed: 1.9ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  59%|█████████████████▊            | 355/600 [00:21<00:13, 17.75it/s, dets=7797]


0: 384x640 46 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  59%|█████████████████▊            | 355/600 [00:21<00:13, 17.75it/s, dets=7819]


0: 384x640 45 persons, 25.0ms
Speed: 1.5ms preprocess, 25.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|█████████████████▊            | 357/600 [00:21<00:13, 17.82it/s, dets=7840]


0: 384x640 49 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|█████████████████▊            | 357/600 [00:21<00:13, 17.82it/s, dets=7865]


0: 384x640 48 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|█████████████████▉            | 359/600 [00:21<00:13, 17.79it/s, dets=7891]


0: 384x640 49 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|█████████████████▉            | 359/600 [00:21<00:13, 17.79it/s, dets=7914]


0: 384x640 46 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|██████████████████            | 361/600 [00:21<00:13, 17.75it/s, dets=7937]


0: 384x640 47 persons, 27.1ms
Speed: 1.9ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|██████████████████            | 361/600 [00:21<00:13, 17.75it/s, dets=7959]


0: 384x640 49 persons, 26.9ms
Speed: 2.0ms preprocess, 26.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|██████████████████▏           | 363/600 [00:21<00:13, 17.71it/s, dets=7980]


0: 384x640 47 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  60%|██████████████████▏           | 363/600 [00:21<00:13, 17.71it/s, dets=7999]


0: 384x640 44 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  61%|██████████████████▎           | 365/600 [00:21<00:13, 17.76it/s, dets=8020]


0: 384x640 45 persons, 27.2ms
Speed: 1.7ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  61%|██████████████████▎           | 365/600 [00:21<00:13, 17.76it/s, dets=8038]


0: 384x640 44 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  61%|██████████████████▎           | 367/600 [00:21<00:13, 17.84it/s, dets=8057]


0: 384x640 46 persons, 26.9ms
Speed: 1.6ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  61%|██████████████████▎           | 367/600 [00:21<00:13, 17.84it/s, dets=8074]


0: 384x640 47 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▍           | 369/600 [00:21<00:12, 17.91it/s, dets=8094]


0: 384x640 42 persons, 27.0ms
Speed: 2.2ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▍           | 369/600 [00:21<00:12, 17.91it/s, dets=8113]


0: 384x640 43 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▌           | 371/600 [00:21<00:12, 17.83it/s, dets=8131]


0: 384x640 39 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▌           | 371/600 [00:21<00:12, 17.83it/s, dets=8151]


0: 384x640 41 persons, 26.3ms
Speed: 1.8ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▋           | 373/600 [00:22<00:12, 17.92it/s, dets=8170]


0: 384x640 37 persons, 27.3ms
Speed: 1.9ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▋           | 373/600 [00:22<00:12, 17.92it/s, dets=8188]


0: 384x640 38 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▊           | 375/600 [00:22<00:12, 17.79it/s, dets=8206]


0: 384x640 37 persons, 27.5ms
Speed: 2.0ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  62%|██████████████████▊           | 375/600 [00:22<00:12, 17.79it/s, dets=8226]


0: 384x640 42 persons, 26.2ms
Speed: 1.8ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  63%|██████████████████▊           | 377/600 [00:22<00:12, 17.74it/s, dets=8246]


0: 384x640 41 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  63%|██████████████████▊           | 377/600 [00:22<00:12, 17.74it/s, dets=8265]


0: 384x640 44 persons, 24.8ms
Speed: 1.9ms preprocess, 24.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  63%|██████████████████▉           | 379/600 [00:22<00:12, 17.97it/s, dets=8285]


0: 384x640 45 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  63%|██████████████████▉           | 379/600 [00:22<00:12, 17.97it/s, dets=8304]


0: 384x640 40 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████           | 381/600 [00:22<00:12, 18.09it/s, dets=8325]


0: 384x640 41 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████           | 381/600 [00:22<00:12, 18.09it/s, dets=8345]


0: 384x640 44 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▏          | 383/600 [00:22<00:12, 17.98it/s, dets=8367]


0: 384x640 41 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▏          | 383/600 [00:22<00:12, 17.98it/s, dets=8389]


0: 384x640 46 persons, 26.2ms
Speed: 1.5ms preprocess, 26.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▎          | 385/600 [00:22<00:11, 18.08it/s, dets=8411]


0: 384x640 42 persons, 26.3ms
Speed: 1.9ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▎          | 385/600 [00:22<00:11, 18.08it/s, dets=8433]


0: 384x640 44 persons, 26.0ms
Speed: 1.5ms preprocess, 26.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▎          | 387/600 [00:22<00:11, 18.14it/s, dets=8455]


0: 384x640 43 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  64%|███████████████████▎          | 387/600 [00:22<00:11, 18.14it/s, dets=8477]


0: 384x640 46 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  65%|███████████████████▍          | 389/600 [00:22<00:11, 18.14it/s, dets=8501]


0: 384x640 44 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  65%|███████████████████▍          | 389/600 [00:22<00:11, 18.14it/s, dets=8525]


0: 384x640 45 persons, 27.0ms
Speed: 1.9ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  65%|███████████████████▌          | 391/600 [00:23<00:11, 18.04it/s, dets=8546]


0: 384x640 43 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  65%|███████████████████▌          | 391/600 [00:23<00:11, 18.04it/s, dets=8570]


0: 384x640 43 persons, 25.7ms
Speed: 1.5ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▋          | 393/600 [00:23<00:11, 18.17it/s, dets=8595]


0: 384x640 44 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▋          | 393/600 [00:23<00:11, 18.17it/s, dets=8621]


0: 384x640 44 persons, 24.3ms
Speed: 1.9ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▊          | 395/600 [00:23<00:11, 18.24it/s, dets=8647]


0: 384x640 44 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▊          | 395/600 [00:23<00:11, 18.24it/s, dets=8671]


0: 384x640 45 persons, 27.3ms
Speed: 1.6ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▊          | 397/600 [00:23<00:11, 18.19it/s, dets=8693]


0: 384x640 47 persons, 24.8ms
Speed: 2.0ms preprocess, 24.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▊          | 397/600 [00:23<00:11, 18.19it/s, dets=8714]


0: 384x640 46 persons, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▉          | 399/600 [00:23<00:10, 18.31it/s, dets=8736]


0: 384x640 44 persons, 27.1ms
Speed: 1.6ms preprocess, 27.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  66%|███████████████████▉          | 399/600 [00:23<00:10, 18.31it/s, dets=8758]


0: 384x640 40 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  67%|████████████████████          | 401/600 [00:23<00:10, 18.28it/s, dets=8780]


0: 384x640 41 persons, 26.3ms
Speed: 1.8ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  67%|████████████████████          | 401/600 [00:23<00:10, 18.28it/s, dets=8803]


0: 384x640 44 persons, 25.8ms
Speed: 1.5ms preprocess, 25.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  67%|████████████████████▏         | 403/600 [00:23<00:10, 18.22it/s, dets=8825]


0: 384x640 42 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  67%|████████████████████▏         | 403/600 [00:23<00:10, 18.22it/s, dets=8846]


0: 384x640 41 persons, 24.3ms
Speed: 1.5ms preprocess, 24.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▎         | 405/600 [00:23<00:10, 18.25it/s, dets=8868]


0: 384x640 42 persons, 25.2ms
Speed: 1.8ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▎         | 405/600 [00:23<00:10, 18.25it/s, dets=8890]


0: 384x640 46 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▎         | 407/600 [00:23<00:10, 18.26it/s, dets=8910]


0: 384x640 44 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▎         | 407/600 [00:23<00:10, 18.26it/s, dets=8934]


0: 384x640 45 persons, 26.8ms
Speed: 2.0ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▍         | 409/600 [00:23<00:10, 18.15it/s, dets=8955]


0: 384x640 47 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▍         | 409/600 [00:24<00:10, 18.15it/s, dets=8973]


0: 384x640 45 persons, 32.2ms
Speed: 2.0ms preprocess, 32.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▌         | 411/600 [00:24<00:10, 17.85it/s, dets=8991]


0: 384x640 45 persons, 30.4ms
Speed: 1.5ms preprocess, 30.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  68%|████████████████████▌         | 411/600 [00:24<00:10, 17.85it/s, dets=9010]


0: 384x640 38 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  69%|████████████████████▋         | 413/600 [00:24<00:10, 17.75it/s, dets=9029]


0: 384x640 41 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  69%|████████████████████▋         | 413/600 [00:24<00:10, 17.75it/s, dets=9052]


0: 384x640 42 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  69%|████████████████████▊         | 415/600 [00:24<00:10, 17.92it/s, dets=9071]


0: 384x640 42 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  69%|████████████████████▊         | 415/600 [00:24<00:10, 17.92it/s, dets=9092]


0: 384x640 43 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|████████████████████▊         | 417/600 [00:24<00:10, 18.06it/s, dets=9113]


0: 384x640 43 persons, 24.6ms
Speed: 2.3ms preprocess, 24.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|████████████████████▊         | 417/600 [00:24<00:10, 18.06it/s, dets=9134]


0: 384x640 40 persons, 25.0ms
Speed: 1.5ms preprocess, 25.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|████████████████████▉         | 419/600 [00:24<00:09, 18.29it/s, dets=9151]


0: 384x640 42 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|████████████████████▉         | 419/600 [00:24<00:09, 18.29it/s, dets=9170]


0: 384x640 38 persons, 26.7ms
Speed: 2.0ms preprocess, 26.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|█████████████████████         | 421/600 [00:24<00:09, 18.14it/s, dets=9189]


0: 384x640 42 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|█████████████████████         | 421/600 [00:24<00:09, 18.14it/s, dets=9209]


0: 384x640 42 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|█████████████████████▏        | 423/600 [00:24<00:09, 18.22it/s, dets=9229]


0: 384x640 43 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  70%|█████████████████████▏        | 423/600 [00:24<00:09, 18.22it/s, dets=9249]


0: 384x640 44 persons, 32.4ms
Speed: 1.5ms preprocess, 32.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  71%|█████████████████████▎        | 425/600 [00:24<00:09, 18.06it/s, dets=9269]


0: 384x640 49 persons, 29.5ms
Speed: 1.5ms preprocess, 29.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  71%|█████████████████████▎        | 425/600 [00:24<00:09, 18.06it/s, dets=9290]


0: 384x640 49 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  71%|█████████████████████▎        | 427/600 [00:24<00:09, 17.96it/s, dets=9311]


0: 384x640 45 persons, 27.4ms
Speed: 2.1ms preprocess, 27.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  71%|█████████████████████▎        | 427/600 [00:25<00:09, 17.96it/s, dets=9330]


0: 384x640 39 persons, 25.0ms
Speed: 2.3ms preprocess, 25.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▍        | 429/600 [00:25<00:09, 17.86it/s, dets=9350]


0: 384x640 40 persons, 27.3ms
Speed: 2.2ms preprocess, 27.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▍        | 429/600 [00:25<00:09, 17.86it/s, dets=9371]


0: 384x640 41 persons, 25.5ms
Speed: 1.5ms preprocess, 25.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▌        | 431/600 [00:25<00:09, 17.81it/s, dets=9394]


0: 384x640 40 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▌        | 431/600 [00:25<00:09, 17.81it/s, dets=9415]


0: 384x640 38 persons, 25.6ms
Speed: 2.0ms preprocess, 25.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▋        | 433/600 [00:25<00:09, 17.90it/s, dets=9436]


0: 384x640 41 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▋        | 433/600 [00:25<00:09, 17.90it/s, dets=9459]


0: 384x640 41 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▊        | 435/600 [00:25<00:09, 17.82it/s, dets=9482]


0: 384x640 41 persons, 27.3ms
Speed: 2.0ms preprocess, 27.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  72%|█████████████████████▊        | 435/600 [00:25<00:09, 17.82it/s, dets=9504]


0: 384x640 43 persons, 26.4ms
Speed: 1.8ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  73%|█████████████████████▊        | 437/600 [00:25<00:09, 17.67it/s, dets=9524]


0: 384x640 45 persons, 27.4ms
Speed: 1.6ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  73%|█████████████████████▊        | 437/600 [00:25<00:09, 17.67it/s, dets=9545]


0: 384x640 38 persons, 26.9ms
Speed: 2.0ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  73%|█████████████████████▉        | 439/600 [00:25<00:09, 17.54it/s, dets=9565]


0: 384x640 45 persons, 27.2ms
Speed: 1.8ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  73%|█████████████████████▉        | 439/600 [00:25<00:09, 17.54it/s, dets=9583]


0: 384x640 36 persons, 26.7ms
Speed: 2.1ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████        | 441/600 [00:25<00:09, 17.52it/s, dets=9600]


0: 384x640 40 persons, 27.0ms
Speed: 2.2ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████        | 441/600 [00:25<00:09, 17.52it/s, dets=9618]


0: 384x640 36 persons, 24.6ms
Speed: 1.8ms preprocess, 24.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▏       | 443/600 [00:25<00:08, 17.70it/s, dets=9636]


0: 384x640 30 persons, 27.9ms
Speed: 2.4ms preprocess, 27.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▏       | 443/600 [00:25<00:08, 17.70it/s, dets=9654]


0: 384x640 26 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▎       | 445/600 [00:26<00:08, 17.77it/s, dets=9671]


0: 384x640 28 persons, 25.9ms
Speed: 1.6ms preprocess, 25.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▎       | 445/600 [00:26<00:08, 17.77it/s, dets=9689]


0: 384x640 30 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▎       | 447/600 [00:26<00:08, 17.88it/s, dets=9706]


0: 384x640 36 persons, 25.1ms
Speed: 1.6ms preprocess, 25.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  74%|██████████████████████▎       | 447/600 [00:26<00:08, 17.88it/s, dets=9722]


0: 384x640 40 persons, 33.4ms
Speed: 1.9ms preprocess, 33.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  75%|██████████████████████▍       | 449/600 [00:26<00:08, 17.62it/s, dets=9737]


0: 384x640 41 persons, 27.5ms
Speed: 1.5ms preprocess, 27.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  75%|██████████████████████▍       | 449/600 [00:26<00:08, 17.62it/s, dets=9753]


0: 384x640 36 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  75%|██████████████████████▌       | 451/600 [00:26<00:08, 17.70it/s, dets=9769]


0: 384x640 43 persons, 25.7ms
Speed: 2.2ms preprocess, 25.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  75%|██████████████████████▌       | 451/600 [00:26<00:08, 17.70it/s, dets=9785]


0: 384x640 44 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▋       | 453/600 [00:26<00:08, 17.73it/s, dets=9802]


0: 384x640 43 persons, 26.5ms
Speed: 1.9ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▋       | 453/600 [00:26<00:08, 17.73it/s, dets=9820]


0: 384x640 42 persons, 27.4ms
Speed: 2.0ms preprocess, 27.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▊       | 455/600 [00:26<00:08, 17.61it/s, dets=9838]


0: 384x640 44 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▊       | 455/600 [00:26<00:08, 17.61it/s, dets=9858]


0: 384x640 44 persons, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▊       | 457/600 [00:26<00:08, 17.61it/s, dets=9879]


0: 384x640 42 persons, 26.4ms
Speed: 1.8ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▊       | 457/600 [00:26<00:08, 17.61it/s, dets=9897]


0: 384x640 47 persons, 27.2ms
Speed: 1.6ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▉       | 459/600 [00:26<00:07, 17.71it/s, dets=9917]


0: 384x640 48 persons, 26.3ms
Speed: 1.9ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  76%|██████████████████████▉       | 459/600 [00:26<00:07, 17.71it/s, dets=9936]


0: 384x640 45 persons, 27.9ms
Speed: 2.3ms preprocess, 27.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  77%|███████████████████████       | 461/600 [00:26<00:07, 17.59it/s, dets=9954]


0: 384x640 46 persons, 26.6ms
Speed: 1.8ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  77%|███████████████████████       | 461/600 [00:26<00:07, 17.59it/s, dets=9971]


0: 384x640 46 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  77%|███████████████████████▏      | 463/600 [00:27<00:07, 17.65it/s, dets=9986]


0: 384x640 39 persons, 26.8ms
Speed: 1.6ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  77%|███████████████████████▏      | 463/600 [00:27<00:07, 17.65it/s, dets=1e+4]


0: 384x640 38 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▎      | 465/600 [00:27<00:07, 17.68it/s, dets=1e+4]


0: 384x640 36 persons, 26.6ms
Speed: 1.8ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▎      | 465/600 [00:27<00:07, 17.68it/s, dets=1e+4]


0: 384x640 36 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▎      | 467/600 [00:27<00:07, 17.66it/s, dets=10064]


0: 384x640 38 persons, 26.0ms
Speed: 1.5ms preprocess, 26.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▎      | 467/600 [00:27<00:07, 17.66it/s, dets=10084]


0: 384x640 38 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▍      | 469/600 [00:27<00:07, 17.79it/s, dets=10104]


0: 384x640 36 persons, 26.1ms
Speed: 1.4ms preprocess, 26.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▍      | 469/600 [00:27<00:07, 17.79it/s, dets=10125]


0: 384x640 36 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▌      | 471/600 [00:27<00:07, 17.86it/s, dets=10148]


0: 384x640 43 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  78%|███████████████████████▌      | 471/600 [00:27<00:07, 17.86it/s, dets=10173]


0: 384x640 40 persons, 26.9ms
Speed: 1.8ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  79%|███████████████████████▋      | 473/600 [00:27<00:07, 17.78it/s, dets=10197]


0: 384x640 39 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  79%|███████████████████████▋      | 473/600 [00:27<00:07, 17.78it/s, dets=10219]


0: 384x640 39 persons, 27.0ms
Speed: 1.9ms preprocess, 27.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  79%|███████████████████████▊      | 475/600 [00:27<00:07, 17.69it/s, dets=10243]


0: 384x640 38 persons, 27.2ms
Speed: 1.9ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  79%|███████████████████████▊      | 475/600 [00:27<00:07, 17.69it/s, dets=10267]


0: 384x640 38 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|███████████████████████▊      | 477/600 [00:27<00:06, 17.66it/s, dets=10290]


0: 384x640 35 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|███████████████████████▊      | 477/600 [00:27<00:06, 17.66it/s, dets=10313]


0: 384x640 40 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|███████████████████████▉      | 479/600 [00:27<00:06, 17.60it/s, dets=10338]


0: 384x640 36 persons, 27.7ms
Speed: 1.9ms preprocess, 27.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|███████████████████████▉      | 479/600 [00:27<00:06, 17.60it/s, dets=10360]


0: 384x640 40 persons, 26.8ms
Speed: 2.0ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|████████████████████████      | 481/600 [00:28<00:06, 17.48it/s, dets=10385]


0: 384x640 37 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|████████████████████████      | 481/600 [00:28<00:06, 17.48it/s, dets=10408]


0: 384x640 39 persons, 32.1ms
Speed: 1.6ms preprocess, 32.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|████████████████████████▏     | 483/600 [00:28<00:06, 17.23it/s, dets=10433]


0: 384x640 33 persons, 27.3ms
Speed: 1.8ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  80%|████████████████████████▏     | 483/600 [00:28<00:06, 17.23it/s, dets=10456]


0: 384x640 31 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  81%|████████████████████████▎     | 485/600 [00:28<00:06, 17.31it/s, dets=10480]


0: 384x640 30 persons, 27.5ms
Speed: 1.5ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  81%|████████████████████████▎     | 485/600 [00:28<00:06, 17.31it/s, dets=10504]


0: 384x640 26 persons, 26.6ms
Speed: 1.8ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  81%|████████████████████████▎     | 487/600 [00:28<00:06, 17.33it/s, dets=10526]


0: 384x640 25 persons, 27.4ms
Speed: 1.6ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  81%|████████████████████████▎     | 487/600 [00:28<00:06, 17.33it/s, dets=10549]


0: 384x640 24 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▍     | 489/600 [00:28<00:06, 17.57it/s, dets=10571]


0: 384x640 28 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▍     | 489/600 [00:28<00:06, 17.57it/s, dets=10594]


0: 384x640 28 persons, 25.7ms
Speed: 2.2ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▌     | 491/600 [00:28<00:06, 17.60it/s, dets=10617]


0: 384x640 32 persons, 27.0ms
Speed: 1.8ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▌     | 491/600 [00:28<00:06, 17.60it/s, dets=10639]


0: 384x640 30 persons, 26.1ms
Speed: 1.5ms preprocess, 26.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▋     | 493/600 [00:28<00:06, 17.66it/s, dets=10661]


0: 384x640 31 persons, 27.1ms
Speed: 2.0ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▋     | 493/600 [00:28<00:06, 17.66it/s, dets=10683]


0: 384x640 30 persons, 25.8ms
Speed: 1.8ms preprocess, 25.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▊     | 495/600 [00:28<00:05, 17.69it/s, dets=10705]


0: 384x640 31 persons, 27.1ms
Speed: 1.8ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  82%|████████████████████████▊     | 495/600 [00:28<00:05, 17.69it/s, dets=10726]


0: 384x640 28 persons, 26.0ms
Speed: 1.5ms preprocess, 26.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  83%|████████████████████████▊     | 497/600 [00:28<00:05, 17.85it/s, dets=10748]


0: 384x640 30 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  83%|████████████████████████▊     | 497/600 [00:29<00:05, 17.85it/s, dets=10770]


0: 384x640 29 persons, 26.5ms
Speed: 1.9ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  83%|████████████████████████▉     | 499/600 [00:29<00:05, 17.88it/s, dets=10791]


0: 384x640 29 persons, 27.4ms
Speed: 2.1ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  83%|████████████████████████▉     | 499/600 [00:29<00:05, 17.88it/s, dets=10810]


0: 384x640 30 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████     | 501/600 [00:29<00:05, 17.92it/s, dets=10830]


0: 384x640 25 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████     | 501/600 [00:29<00:05, 17.92it/s, dets=10850]


0: 384x640 29 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▏    | 503/600 [00:29<00:05, 17.99it/s, dets=10871]


0: 384x640 27 persons, 26.4ms
Speed: 2.2ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▏    | 503/600 [00:29<00:05, 17.99it/s, dets=10892]


0: 384x640 28 persons, 27.5ms
Speed: 2.0ms preprocess, 27.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▎    | 505/600 [00:29<00:05, 17.89it/s, dets=10914]


0: 384x640 29 persons, 25.5ms
Speed: 1.5ms preprocess, 25.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▎    | 505/600 [00:29<00:05, 17.89it/s, dets=10936]


0: 384x640 33 persons, 27.1ms
Speed: 2.4ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▎    | 507/600 [00:29<00:05, 17.92it/s, dets=10957]


0: 384x640 34 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  84%|█████████████████████████▎    | 507/600 [00:29<00:05, 17.92it/s, dets=10978]


0: 384x640 34 persons, 26.5ms
Speed: 1.6ms preprocess, 26.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  85%|█████████████████████████▍    | 509/600 [00:29<00:05, 17.92it/s, dets=11001]


0: 384x640 31 persons, 27.4ms
Speed: 2.0ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  85%|█████████████████████████▍    | 509/600 [00:29<00:05, 17.92it/s, dets=11024]


0: 384x640 27 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  85%|█████████████████████████▌    | 511/600 [00:29<00:04, 17.85it/s, dets=11045]


0: 384x640 28 persons, 27.4ms
Speed: 1.8ms preprocess, 27.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  85%|█████████████████████████▌    | 511/600 [00:29<00:04, 17.85it/s, dets=11065]


0: 384x640 29 persons, 27.0ms
Speed: 2.1ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▋    | 513/600 [00:29<00:04, 17.74it/s, dets=11086]


0: 384x640 28 persons, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▋    | 513/600 [00:29<00:04, 17.74it/s, dets=11107]


0: 384x640 28 persons, 24.4ms
Speed: 1.5ms preprocess, 24.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▊    | 515/600 [00:29<00:04, 17.90it/s, dets=11127]


0: 384x640 30 persons, 26.7ms
Speed: 1.5ms preprocess, 26.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▊    | 515/600 [00:30<00:04, 17.90it/s, dets=11148]


0: 384x640 30 persons, 27.4ms
Speed: 2.1ms preprocess, 27.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▊    | 517/600 [00:30<00:04, 17.88it/s, dets=11170]


0: 384x640 29 persons, 25.7ms
Speed: 1.5ms preprocess, 25.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▊    | 517/600 [00:30<00:04, 17.88it/s, dets=11193]


0: 384x640 27 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▉    | 519/600 [00:30<00:04, 18.10it/s, dets=11216]


0: 384x640 26 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  86%|█████████████████████████▉    | 519/600 [00:30<00:04, 18.10it/s, dets=11237]


0: 384x640 27 persons, 26.4ms
Speed: 1.9ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  87%|██████████████████████████    | 521/600 [00:30<00:04, 18.12it/s, dets=11257]


0: 384x640 27 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  87%|██████████████████████████    | 521/600 [00:30<00:04, 18.12it/s, dets=11278]


0: 384x640 27 persons, 27.5ms
Speed: 2.1ms preprocess, 27.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  87%|██████████████████████████▏   | 523/600 [00:30<00:04, 18.06it/s, dets=11300]


0: 384x640 28 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  87%|██████████████████████████▏   | 523/600 [00:30<00:04, 18.06it/s, dets=11321]


0: 384x640 31 persons, 26.9ms
Speed: 1.7ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▎   | 525/600 [00:30<00:04, 17.91it/s, dets=11341]


0: 384x640 30 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▎   | 525/600 [00:30<00:04, 17.91it/s, dets=11361]


0: 384x640 30 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▎   | 527/600 [00:30<00:04, 18.05it/s, dets=11381]


0: 384x640 30 persons, 26.1ms
Speed: 1.5ms preprocess, 26.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▎   | 527/600 [00:30<00:04, 18.05it/s, dets=11401]


0: 384x640 29 persons, 27.3ms
Speed: 1.8ms preprocess, 27.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▍   | 529/600 [00:30<00:03, 17.87it/s, dets=11421]


0: 384x640 30 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▍   | 529/600 [00:30<00:03, 17.87it/s, dets=11440]


0: 384x640 30 persons, 27.0ms
Speed: 1.6ms preprocess, 27.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▌   | 531/600 [00:30<00:03, 17.90it/s, dets=11461]


0: 384x640 30 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  88%|██████████████████████████▌   | 531/600 [00:30<00:03, 17.90it/s, dets=11482]


0: 384x640 31 persons, 26.5ms
Speed: 1.5ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  89%|██████████████████████████▋   | 533/600 [00:30<00:03, 17.95it/s, dets=11502]


0: 384x640 33 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  89%|██████████████████████████▋   | 533/600 [00:31<00:03, 17.95it/s, dets=11521]


0: 384x640 29 persons, 34.8ms
Speed: 1.8ms preprocess, 34.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  89%|██████████████████████████▊   | 535/600 [00:31<00:04, 15.56it/s, dets=11542]


0: 384x640 31 persons, 36.4ms
Speed: 1.6ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  89%|██████████████████████████▊   | 535/600 [00:31<00:04, 15.56it/s, dets=11561]


0: 384x640 31 persons, 34.2ms
Speed: 1.6ms preprocess, 34.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|██████████████████████████▊   | 537/600 [00:31<00:04, 15.63it/s, dets=11581]


0: 384x640 30 persons, 29.6ms
Speed: 2.0ms preprocess, 29.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|██████████████████████████▊   | 537/600 [00:31<00:04, 15.63it/s, dets=11601]


0: 384x640 28 persons, 25.4ms
Speed: 1.8ms preprocess, 25.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|██████████████████████████▉   | 539/600 [00:31<00:03, 16.19it/s, dets=11621]


0: 384x640 31 persons, 26.6ms
Speed: 1.5ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|██████████████████████████▉   | 539/600 [00:31<00:03, 16.19it/s, dets=11640]


0: 384x640 29 persons, 27.1ms
Speed: 2.0ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|███████████████████████████   | 541/600 [00:31<00:03, 16.60it/s, dets=11659]


0: 384x640 27 persons, 26.2ms
Speed: 1.5ms preprocess, 26.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|███████████████████████████   | 541/600 [00:31<00:03, 16.60it/s, dets=11679]


0: 384x640 32 persons, 26.4ms
Speed: 1.8ms preprocess, 26.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|███████████████████████████▏  | 543/600 [00:31<00:03, 17.12it/s, dets=11698]


0: 384x640 31 persons, 26.4ms
Speed: 1.5ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  90%|███████████████████████████▏  | 543/600 [00:31<00:03, 17.12it/s, dets=11718]


0: 384x640 32 persons, 24.6ms
Speed: 1.5ms preprocess, 24.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  91%|███████████████████████████▎  | 545/600 [00:31<00:03, 17.54it/s, dets=11736]


0: 384x640 33 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  91%|███████████████████████████▎  | 545/600 [00:31<00:03, 17.54it/s, dets=11757]


0: 384x640 35 persons, 24.1ms
Speed: 1.5ms preprocess, 24.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  91%|███████████████████████████▎  | 547/600 [00:31<00:02, 18.00it/s, dets=11776]


0: 384x640 35 persons, 34.8ms
Speed: 2.0ms preprocess, 34.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  91%|███████████████████████████▎  | 547/600 [00:31<00:02, 18.00it/s, dets=11797]


0: 384x640 33 persons, 28.2ms
Speed: 1.5ms preprocess, 28.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▍  | 549/600 [00:31<00:02, 17.62it/s, dets=11819]


0: 384x640 33 persons, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▍  | 549/600 [00:31<00:02, 17.62it/s, dets=11839]


0: 384x640 33 persons, 24.5ms
Speed: 1.5ms preprocess, 24.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▌  | 551/600 [00:32<00:02, 17.89it/s, dets=11859]


0: 384x640 32 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▌  | 551/600 [00:32<00:02, 17.89it/s, dets=11879]


0: 384x640 35 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▋  | 553/600 [00:32<00:02, 17.97it/s, dets=11898]


0: 384x640 31 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▋  | 553/600 [00:32<00:02, 17.97it/s, dets=11917]


0: 384x640 31 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▊  | 555/600 [00:32<00:02, 18.14it/s, dets=11937]


0: 384x640 30 persons, 27.2ms
Speed: 1.6ms preprocess, 27.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  92%|███████████████████████████▊  | 555/600 [00:32<00:02, 18.14it/s, dets=11957]


0: 384x640 29 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  93%|███████████████████████████▊  | 557/600 [00:32<00:02, 18.38it/s, dets=11977]


0: 384x640 29 persons, 26.1ms
Speed: 2.2ms preprocess, 26.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  93%|███████████████████████████▊  | 557/600 [00:32<00:02, 18.38it/s, dets=11997]


0: 384x640 30 persons, 26.3ms
Speed: 1.5ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  93%|███████████████████████████▉  | 559/600 [00:32<00:02, 18.55it/s, dets=12018]


0: 384x640 30 persons, 25.4ms
Speed: 1.6ms preprocess, 25.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  93%|███████████████████████████▉  | 559/600 [00:32<00:02, 18.55it/s, dets=12038]


0: 384x640 30 persons, 25.3ms
Speed: 1.5ms preprocess, 25.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████  | 561/600 [00:32<00:02, 18.64it/s, dets=12060]


0: 384x640 30 persons, 24.7ms
Speed: 1.5ms preprocess, 24.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████  | 561/600 [00:32<00:02, 18.64it/s, dets=12081]


0: 384x640 29 persons, 26.4ms
Speed: 2.0ms preprocess, 26.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▏ | 563/600 [00:32<00:01, 18.54it/s, dets=12103]


0: 384x640 30 persons, 27.3ms
Speed: 1.8ms preprocess, 27.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▏ | 563/600 [00:32<00:01, 18.54it/s, dets=12124]


0: 384x640 26 persons, 25.0ms
Speed: 1.9ms preprocess, 25.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▎ | 565/600 [00:32<00:01, 18.46it/s, dets=12143]


0: 384x640 28 persons, 27.1ms
Speed: 1.5ms preprocess, 27.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▎ | 565/600 [00:32<00:01, 18.46it/s, dets=12161]


0: 384x640 28 persons, 26.9ms
Speed: 1.5ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▎ | 567/600 [00:32<00:01, 18.40it/s, dets=12180]


0: 384x640 29 persons, 26.7ms
Speed: 1.8ms preprocess, 26.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  94%|████████████████████████████▎ | 567/600 [00:32<00:01, 18.40it/s, dets=12199]


0: 384x640 30 persons, 26.9ms
Speed: 2.0ms preprocess, 26.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  95%|████████████████████████████▍ | 569/600 [00:33<00:01, 18.29it/s, dets=12218]


0: 384x640 27 persons, 26.0ms
Speed: 1.4ms preprocess, 26.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  95%|████████████████████████████▍ | 569/600 [00:33<00:01, 18.29it/s, dets=12238]


0: 384x640 27 persons, 25.7ms
Speed: 1.7ms preprocess, 25.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  95%|████████████████████████████▌ | 571/600 [00:33<00:01, 18.28it/s, dets=12257]


0: 384x640 29 persons, 26.8ms
Speed: 1.9ms preprocess, 26.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  95%|████████████████████████████▌ | 571/600 [00:33<00:01, 18.28it/s, dets=12276]


0: 384x640 26 persons, 26.8ms
Speed: 2.2ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▋ | 573/600 [00:33<00:01, 18.19it/s, dets=12294]


0: 384x640 29 persons, 25.1ms
Speed: 1.5ms preprocess, 25.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▋ | 573/600 [00:33<00:01, 18.19it/s, dets=12314]


0: 384x640 27 persons, 25.0ms
Speed: 2.2ms preprocess, 25.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▊ | 575/600 [00:33<00:01, 18.37it/s, dets=12334]


0: 384x640 27 persons, 27.7ms
Speed: 1.6ms preprocess, 27.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▊ | 575/600 [00:33<00:01, 18.37it/s, dets=12353]


0: 384x640 23 persons, 24.3ms
Speed: 1.5ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▊ | 577/600 [00:33<00:01, 18.41it/s, dets=12373]


0: 384x640 26 persons, 26.3ms
Speed: 2.0ms preprocess, 26.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▊ | 577/600 [00:33<00:01, 18.41it/s, dets=12392]


0: 384x640 26 persons, 26.6ms
Speed: 1.7ms preprocess, 26.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▉ | 579/600 [00:33<00:01, 18.35it/s, dets=12410]


0: 384x640 25 persons, 26.8ms
Speed: 1.5ms preprocess, 26.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  96%|████████████████████████████▉ | 579/600 [00:33<00:01, 18.35it/s, dets=12426]


0: 384x640 24 persons, 24.5ms
Speed: 2.0ms preprocess, 24.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  97%|█████████████████████████████ | 581/600 [00:33<00:01, 18.47it/s, dets=12442]


0: 384x640 26 persons, 25.9ms
Speed: 2.2ms preprocess, 25.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  97%|█████████████████████████████ | 581/600 [00:33<00:01, 18.47it/s, dets=12457]


0: 384x640 23 persons, 24.3ms
Speed: 2.1ms preprocess, 24.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  97%|█████████████████████████████▏| 583/600 [00:33<00:00, 18.55it/s, dets=12472]


0: 384x640 24 persons, 26.1ms
Speed: 1.6ms preprocess, 26.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  97%|█████████████████████████████▏| 583/600 [00:33<00:00, 18.55it/s, dets=12487]


0: 384x640 22 persons, 24.9ms
Speed: 1.8ms preprocess, 24.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▎| 585/600 [00:33<00:00, 18.70it/s, dets=12503]


0: 384x640 21 persons, 27.0ms
Speed: 1.5ms preprocess, 27.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▎| 585/600 [00:33<00:00, 18.70it/s, dets=12520]


0: 384x640 20 persons, 27.1ms
Speed: 1.6ms preprocess, 27.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▎| 587/600 [00:33<00:00, 18.49it/s, dets=12537]


0: 384x640 19 persons, 26.6ms
Speed: 1.8ms preprocess, 26.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▎| 587/600 [00:34<00:00, 18.49it/s, dets=12552]


0: 384x640 20 persons, 25.9ms
Speed: 1.5ms preprocess, 25.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▍| 589/600 [00:34<00:00, 18.52it/s, dets=12567]


0: 384x640 22 persons, 26.9ms
Speed: 1.8ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▍| 589/600 [00:34<00:00, 18.52it/s, dets=12583]


0: 384x640 22 persons, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▌| 591/600 [00:34<00:00, 18.52it/s, dets=12597]


0: 384x640 23 persons, 25.4ms
Speed: 1.9ms preprocess, 25.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  98%|█████████████████████████████▌| 591/600 [00:34<00:00, 18.52it/s, dets=12612]


0: 384x640 21 persons, 24.7ms
Speed: 1.9ms preprocess, 24.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  99%|█████████████████████████████▋| 593/600 [00:34<00:00, 18.78it/s, dets=12628]


0: 384x640 22 persons, 25.5ms
Speed: 1.5ms preprocess, 25.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  99%|█████████████████████████████▋| 593/600 [00:34<00:00, 18.78it/s, dets=12643]


0: 384x640 23 persons, 24.2ms
Speed: 1.9ms preprocess, 24.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  99%|█████████████████████████████▊| 595/600 [00:34<00:00, 19.05it/s, dets=12659]


0: 384x640 22 persons, 24.6ms
Speed: 1.5ms preprocess, 24.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing:  99%|█████████████████████████████▊| 595/600 [00:34<00:00, 19.05it/s, dets=12673]


0: 384x640 20 persons, 24.5ms
Speed: 2.0ms preprocess, 24.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing: 100%|█████████████████████████████▊| 597/600 [00:34<00:00, 19.31it/s, dets=12686]


0: 384x640 21 persons, 26.0ms
Speed: 1.8ms preprocess, 26.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


Processing: 100%|█████████████████████████████▊| 597/600 [00:34<00:00, 19.31it/s, dets=12699]


0: 384x640 21 persons, 24.2ms
Speed: 1.5ms preprocess, 24.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Processing: 100%|█████████████████████████████▉| 599/600 [00:34<00:00, 19.48it/s, dets=12712]


0: 384x640 22 persons, 25.2ms
Speed: 1.5ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


Processing: 100%|██████████████████████████████| 600/600 [00:34<00:00, 17.31it/s, dets=12725]


Detection Pipeline Summary
Input type: MOT17 Sequence
Total frames: 600
Total detections: 12725
Average detections per frame: 21.2

Outputs saved to:
- Detections: /csehome/b22ai025/Project/Scratch/Detections/sequence_output/detections.pkl
- Video: /csehome/b22ai025/Project/Scratch/Detections/sequence_output/detections.avi

Pipeline completed successfully!





## 8. DeepSORT Implementation
Custom DeepSORT tracker with:
- Kalman filtering
- Hungarian matching
- Appearance features
- Track management
- ID assignment

In [30]:
# Import required libraries
import cv2
import torch
from ultralytics import YOLO
import os
import pickle
import logging
import numpy as np
from datetime import datetime
import colorsys
import shutil
import glob
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment
from filterpy.kalman import KalmanFilter

In [31]:
def initialize_kalman_filter():
    """Initialize a Kalman Filter for tracking bounding box motion."""
    kf = KalmanFilter(dim_x=8, dim_z=4)  # State: [x, y, w, h, vx, vy, vw, vh], Measurement: [x, y, w, h]
    
    # State transition matrix (constant velocity model)
    kf.F = np.array([[1, 0, 0, 0, 1, 0, 0, 0],
                     [0, 1, 0, 0, 0, 1, 0, 0],
                     [0, 0, 1, 0, 0, 0, 1, 0],
                     [0, 0, 0, 1, 0, 0, 0, 1],
                     [0, 0, 0, 0, 1, 0, 0, 0],
                     [0, 0, 0, 0, 0, 1, 0, 0],
                     [0, 0, 0, 0, 0, 0, 1, 0],
                     [0, 0, 0, 0, 0, 0, 0, 1]])
    
    # Measurement function (observe only position and size)
    kf.H = np.array([[1, 0, 0, 0, 0, 0, 0, 0],
                     [0, 1, 0, 0, 0, 0, 0, 0],
                     [0, 0, 1, 0, 0, 0, 0, 0],
                     [0, 0, 0, 1, 0, 0, 0, 0]])
    
    # Noise covariance matrices
    kf.R = np.eye(4) * 5  # Measurement noise
    kf.P *= 1000.0  # Initial uncertainty
    kf.Q = np.eye(8) * 0.1  # Process noise
    return kf

In [32]:
class Track:
    """Class to manage the state of a single tracked object."""
    def __init__(self, track_id, bbox, appearance):
        # Ensure bbox is a numpy array
        bbox = np.array(bbox, dtype=np.float32)
        
        # Validate bbox shape
        if bbox.size != 4:
            raise ValueError(f"Invalid bbox shape. Expected 4 elements, got {bbox.shape}")
        
        self.track_id = track_id
        self.kf = initialize_kalman_filter()
        
        # Reshape to column vector if needed
        self.kf.x[:4] = bbox.reshape(4, 1)  
        
        self.appearance = appearance  # Appearance descriptor
        self.time_since_update = 0  
        self.hits = 1  
        logging.info(f"Initialized track {track_id} with bbox {bbox}")

    def predict(self):
        """Predict the next state using the Kalman Filter."""
        self.kf.predict()
        self.time_since_update += 1
        return self.kf.x[:4].flatten()

    def update(self, bbox, appearance):
        """Update the track with a new detection."""
        # Ensure bbox is a numpy array
        bbox = np.array(bbox, dtype=np.float32)
        
        if bbox.size != 4:
            logging.warning(f"Invalid bbox for track {self.track_id}. Skipping update.")
            return
        
        self.kf.update(bbox.reshape(4, 1))
        self.appearance = appearance
        self.time_since_update = 0
        self.hits += 1
        logging.debug(f"Updated track {self.track_id} with bbox {bbox}")




In [33]:
class DeepSORT:
    """DeepSORT tracker implementation with improved error handling."""
    def __init__(self, max_age=30, n_init=3, nn_budget=100, 
                 iou_threshold=0.3, appearance_threshold=0.95):
        self.max_age = max_age
        self.n_init = n_init
        self.nn_budget = nn_budget
        self.iou_threshold = iou_threshold
        self.appearance_threshold = appearance_threshold
        self.tracks = []
        self.next_id = 1
        logging.info("Initialized DeepSORT tracker")

    def update(self, detections, frame):
        """
        Update the tracker with new detections for the current frame.
        
        Args:
            detections (list): List of detection dictionaries 
                               with 'bbox' and optional 'appearance' keys
            frame (numpy.ndarray): Current video frame
        
        Returns:
            list: Confirmed tracks with (track_id, bbox)
        """
        # Validate inputs
        if not detections:
            logging.info("No detections in current frame")
            return []
        
        try:
            # Extract bounding boxes
            det_boxes = [
                det.get('bbox') if isinstance(det, dict) else det 
                for det in detections
            ]
            
            # Validate detection boxes
            det_boxes = [
                np.array(box, dtype=np.float32).flatten()[:4] 
                for box in det_boxes if box is not None
            ]
            
            # Compute appearances (histogram-based)
            det_appearances = [
                cv2.calcHist(
                    [frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]], 
                    [0], None, [256], [0, 256]
                ) for box in det_boxes
            ]

            # Predict existing tracks
            predicted_boxes = [track.predict() for track in self.tracks]

            # Associate detections to tracks
            matches, unmatched_dets, unmatched_tracks = self.associate_detections_to_tracks(
                predicted_boxes, det_boxes, det_appearances
            )

            # Update matched tracks
            for track_idx, det_idx in matches:
                self.tracks[track_idx].update(det_boxes[det_idx], det_appearances[det_idx])

            # Create new tracks for unmatched detections
            for det_idx in unmatched_dets:
                try:
                    new_track = Track(
                        self.next_id, 
                        det_boxes[det_idx], 
                        det_appearances[det_idx]
                    )
                    self.tracks.append(new_track)
                    self.next_id += 1
                except ValueError as e:
                    logging.error(f"Failed to create track: {e}")

            # Remove old unmatched tracks
            self.tracks = [
                t for t in self.tracks 
                if t.time_since_update <= self.max_age
            ]

            # Return confirmed tracks
            confirmed_tracks = [
                (t.track_id, t.kf.x[:4].flatten()) 
                for t in self.tracks 
                if t.hits >= self.n_init
            ]

            logging.debug(f"Active tracks: {len(self.tracks)}")
            return confirmed_tracks

        except Exception as e:
            logging.error(f"Error in tracking update: {e}")
            return []

    def compute_iou(self, box1, box2):
        """Compute Intersection over Union between two bounding boxes."""
        x1, y1, x2, y2 = box1
        x1_, y1_, x2_, y2_ = box2
        xi1, yi1 = max(x1, x1_), max(y1, y1_)
        xi2, yi2 = min(x2, x2_), min(y2, y2_)
        inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
        box1_area = (x2 - x1) * (y2 - y1)
        box2_area = (x2_ - x1_) * (y2_ - y1_)
        union_area = box1_area + box2_area - inter_area
        iou = inter_area / union_area if union_area > 0 else 0
        return iou

    def compute_appearance_distance(self, track_appearance, det_appearance):
        """Compute cosine distance between appearance features."""
        try:
            distance = cdist(track_appearance.reshape(1, -1), det_appearance.reshape(1, -1), metric='cosine')[0, 0]
            return distance
        except Exception as e:
            logging.error(f"Error computing appearance distance: {e}")
            return 1.0  # Default to max distance on error

    def associate_detections_to_tracks(self, predicted_boxes, det_boxes, det_appearances):
        """Associate detections to tracks using motion and appearance costs."""
        if not predicted_boxes or not det_boxes:
            return [], set(range(len(det_boxes))), set(range(len(self.tracks)))

        # Compute motion cost (1 - IoU)
        motion_cost = np.array([[1 - self.compute_iou(p, d) for d in det_boxes] for p in predicted_boxes])
        
        # Compute appearance cost
        appearance_cost = np.array([[self.compute_appearance_distance(t.appearance, d) 
                                   for d in det_appearances] for t in self.tracks])
        
        # Combine costs (weighted sum)
        cost_matrix = 0.5 * motion_cost + 0.5 * appearance_cost

        # Hungarian assignment
        row_ind, col_ind = linear_sum_assignment(cost_matrix)
        
        # Filter matches based on thresholds
        matches = []
        unmatched_dets = set(range(len(det_boxes)))
        unmatched_tracks = set(range(len(self.tracks)))
        for r, c in zip(row_ind, col_ind):
            if (motion_cost[r, c] < 1 - self.iou_threshold and 
                appearance_cost[r, c] < self.appearance_threshold):
                matches.append((r, c))
                unmatched_dets.discard(c)
                unmatched_tracks.discard(r)

        return matches, unmatched_dets, unmatched_tracks

    def update(self, detections, frame):
        """Update the tracker with new detections for the current frame."""
        # Predict existing tracks
        predicted_boxes = [track.predict() for track in self.tracks]

        # Extract detections
        det_boxes = [det["bbox"] for det in detections]
        det_appearances = [cv2.calcHist([frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]], 
                                        [0], None, [256], [0, 256]) for box in det_boxes]

        # Associate detections to tracks
        matches, unmatched_dets, unmatched_tracks = self.associate_detections_to_tracks(
            predicted_boxes, det_boxes, det_appearances
        )

        # Update matched tracks
        for track_idx, det_idx in matches:
            self.tracks[track_idx].update(det_boxes[det_idx], det_appearances[det_idx])

        # Create new tracks for unmatched detections
        for det_idx in unmatched_dets:
            new_track = Track(self.next_id, det_boxes[det_idx], det_appearances[det_idx])
            self.tracks.append(new_track)
            self.next_id += 1
            logging.info(f"Created new track {new_track.track_id}")

        # Remove old unmatched tracks
        self.tracks = [t for t in self.tracks if t.time_since_update <= self.max_age]
        logging.debug(f"Active tracks: {len(self.tracks)}")

        # Return confirmed tracks
        confirmed_tracks = [(t.track_id, t.kf.x[:4].flatten()) for t in self.tracks if t.hits >= self.n_init]
        return confirmed_tracks


## 9. MOT Metrics
Evaluation metrics implementation:
- MOTA (Multi-Object Tracking Accuracy)
- MOTP (Multi-Object Tracking Precision)
- IDF1 (ID F1 Score)
- Track statistics

In [34]:
import numpy as np
from collections import defaultdict
import pandas as pd

class MOTMetrics:
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.matches = 0
        self.misses = 0
        self.false_positives = 0
        self.id_switches = 0
        self.gt_tracks = set()
        self.pred_tracks = set()
        self.prev_matches = {}
    
    def compute_iou(self, bbox1, bbox2):
        """Compute IOU between two bounding boxes [x,y,w,h]"""
        x1, y1, w1, h1 = bbox1
        x2, y2, w2, h2 = bbox2
        
        # Convert to x1,y1,x2,y2 format
        box1 = [x1, y1, x1+w1, y1+h1]
        box2 = [x2, y2, x2+w2, y2+h2]
        
        xi1 = max(box1[0], box2[0])
        yi1 = max(box1[1], box2[1])
        xi2 = min(box1[2], box2[2])
        yi2 = min(box1[3], box2[3])
        
        intersection = max(0, xi2 - xi1) * max(0, yi2 - yi1)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = box1_area + box2_area - intersection
        
        return intersection / (union + 1e-6)
    
    def update(self, gt_frame, pred_frame, iou_threshold=0.5):
        """Update metrics for one frame"""
        matched_gt = set()
        matched_pred = set()
        
        # Convert frame data to dictionaries
        gt_dict = {int(row[1]): row[2:6] for row in gt_frame}
        pred_dict = {int(row[1]): row[2:6] for row in pred_frame}
        
        # Update track sets
        self.gt_tracks.update(gt_dict.keys())
        self.pred_tracks.update(pred_dict.keys())
        
        # Compute IOUs between all pairs
        for gt_id, gt_bbox in gt_dict.items():
            for pred_id, pred_bbox in pred_dict.items():
                if pred_id in matched_pred:
                    continue
                    
                iou = self.compute_iou(gt_bbox, pred_bbox)
                if iou >= iou_threshold:
                    matched_gt.add(gt_id)
                    matched_pred.add(pred_id)
                    self.matches += 1
                    
                    # Check for ID switch
                    if gt_id in self.prev_matches and self.prev_matches[gt_id] != pred_id:
                        self.id_switches += 1
                    self.prev_matches[gt_id] = pred_id
                    break
        
        # Count misses and false positives
        self.misses += len(gt_dict) - len(matched_gt)
        self.false_positives += len(pred_dict) - len(matched_pred)
    
    def compute_metrics(self):
        """Compute final MOT metrics"""
        total_gt = self.matches + self.misses
        total_pred = self.matches + self.false_positives
        
        # MOTA (Multi-Object Tracking Accuracy)
        mota = 1 - (self.misses + self.false_positives + self.id_switches) / (total_gt + 1e-6)
        
        # MOTP (Multi-Object Tracking Precision)
        precision = self.matches / (total_pred + 1e-6)
        
        # IDF1 (ID F1 Score)
        idf1 = 2 * self.matches / (total_gt + total_pred + 1e-6)
        
        # Track statistics
        num_tracks = len(self.gt_tracks)
        
        return {
            'MOTA': mota,
            'MOTP': precision,
            'IDF1': idf1,
            'Misses': self.misses,
            'False Positives': self.false_positives,
            'ID Switches': self.id_switches,
            'Total Tracks': num_tracks
        }

## 10. Tracking Pipeline
Complete tracking system that:
- Loads detections
- Applies DeepSORT tracking
- Generates visualizations
- Computes metrics

In [35]:
def evaluate_tracking(gt_path, tracking_results_path):
    """Evaluate tracking results against ground truth"""
    # Load ground truth and tracking results
    gt_data = np.loadtxt(gt_path, delimiter=',')
    pred_data = np.loadtxt(tracking_results_path, delimiter=',')
    
    # Group by frame
    gt_frames = defaultdict(list)
    pred_frames = defaultdict(list)
    
    for row in gt_data:
        if row[6] == 1:  # Consider only ground truth objects marked as visible
            gt_frames[int(row[0])].append(row)
    for row in pred_data:
        pred_frames[int(row[0])].append(row)
    
    # Initialize metrics
    metrics = MOTMetrics()
    
    # Process each frame
    all_frames = sorted(set(gt_frames.keys()) | set(pred_frames.keys()))
    for frame_id in all_frames:
        gt_frame = gt_frames.get(frame_id, [])
        pred_frame = pred_frames.get(frame_id, [])
        metrics.update(gt_frame, pred_frame)
    
    # Compute final metrics
    results = metrics.compute_metrics()
    
    # Print results
    print("\nTracking Evaluation Results:")
    print("-" * 30)
    for metric, value in results.items():
        print(f"{metric}: {value:.3f}")
    
    # Save results to CSV
    df = pd.DataFrame([results])
    results_csv = tracking_results_path.replace('.txt', '_metrics.csv')
    df.to_csv(results_csv, index=False)
    print(f"\nMetrics saved to: {results_csv}")
    
    return results

In [36]:
def plot_metrics(metrics, output_path):
    """Plot tracking metrics visualization"""
    plt.figure(figsize=(12, 6))
    
    # Create bar plot for main metrics
    main_metrics = ['MOTA', 'MOTP', 'IDF1']
    values = [metrics[m] for m in main_metrics]
    
    bars = plt.bar(main_metrics, values, color=['#2ecc71', '#3498db', '#e74c3c'])
    plt.title('Multiple Object Tracking Metrics', fontsize=14, pad=20)
    plt.ylabel('Score', fontsize=12)
    plt.ylim(0, 1)
    
    # Add value labels on bars
    for bar, val in zip(bars, values):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{val:.3f}',
                ha='center', va='bottom')
    
    # Add additional metrics table
    other_metrics = ['Misses', 'False Positives', 'ID Switches']
    other_values = [metrics[m] for m in other_metrics]
    table_data = [[metric, f"{value:.0f}"] for metric, value in zip(other_metrics, other_values)]
    
    plt.table(cellText=table_data,
              cellLoc='center',
              loc='bottom',
              bbox=[0.2, -0.3, 0.6, 0.2])
    
    plt.tight_layout()
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.close()

In [37]:
def run_tracking(input_path, detections_pkl, output_path, gt_path=None):
    """Run tracking pipeline for both sequence and video inputs"""
    # Load detections
    with open(detections_pkl, 'rb') as f:
        all_detections = pickle.load(f)
    
    # Initialize tracker
    tracker = DeepSORT(
        max_age=30,
        n_init=3,
        nn_budget=100,
        iou_threshold=0.3,
        appearance_threshold=0.95
    )
    
    # Determine input type
    is_video = input_path.endswith(('.mp4', '.avi', '.mov'))
    
    if is_video:
        # For video input, read frames directly
        cap = cv2.VideoCapture(input_path)
        if not cap.isOpened():
            raise ValueError(f"Failed to open video: {input_path}")
        
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        ret, first_frame = cap.read()
        if not ret:
            raise ValueError("Failed to read first frame")
        
        frame_size = (first_frame.shape[1], first_frame.shape[0])
    else:
        # For sequence input, read from img1 directory
        img_dir = os.path.join(input_path, 'img1')
        if not os.path.exists(img_dir):
            raise ValueError(f"Invalid sequence directory: {img_dir}")
        
        img_files = sorted(os.listdir(img_dir))
        total_frames = len(img_files)
        fps = 30  # Default for MOT17
        first_frame = cv2.imread(os.path.join(img_dir, img_files[0]))
        frame_size = (first_frame.shape[1], first_frame.shape[0])
    
    # Initialize video writer
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frame_size)
    
    # Initialize tracking results
    tracking_results = []
    
    # Process frames
    print("\nRunning tracking...")
    with tqdm(total=total_frames, desc="Tracking", bar_format='{l_bar}{bar:30}{r_bar}') as pbar:
        frame_id = 0
        
        while True:
            if is_video:
                ret, frame = cap.read()
                if not ret:
                    break
            else:
                if frame_id >= total_frames:
                    break
                frame = cv2.imread(os.path.join(img_dir, img_files[frame_id]))
            
            # Get detections for current frame
            frame_dets = [d for d in all_detections if d['frame'] == frame_id + 1]
            
            if frame_dets:
                # Update tracker
                tracks = tracker.update(frame_dets, frame)
                
                # Process tracks
                for track_id, bbox in tracks:
                    x1, y1, x2, y2 = bbox
                    w, h = x2 - x1, y2 - y1
                    
                    # Store results
                    tracking_results.append([
                        frame_id + 1, track_id, x1, y1, w, h, 1, -1, -1, -1
                    ])
                    
                    # Visualize
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), 
                                (0, 255, 0), 2)
                    cv2.putText(frame, f'ID: {track_id}', (int(x1), int(y1)-10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
            # Write frame
            out.write(frame)
            frame_id += 1
            pbar.update(1)
    
    # Cleanup
    out.release()
    if is_video:
        cap.release()
    
    # Save tracking results
    if tracking_results:
        results_path = output_path.replace('.avi', '.txt')
        np.savetxt(results_path, np.array(tracking_results),
                  delimiter=',',
                  fmt='%d,%d,%.2f,%.2f,%.2f,%.2f,%d,%d,%d,%d')
    
    # Evaluate if ground truth is provided
    if gt_path and os.path.exists(gt_path):
        print("\nEvaluating tracking results...")
        metrics = evaluate_tracking(gt_path, results_path)
        return metrics
    
    return None

In [38]:
def main():
    """Main function for tracking with multi-input support"""
    print("\n" + "="*50)
    print("Multi-Object Tracking Pipeline")
    print("="*50)
    
    # Select input type
    print("\nSelect input type:")
    print("1. MOT17 Sequence")
    print("2. Video file")
    
    while True:
        choice = input("\nEnter choice (1 or 2): ").strip()
        if choice in ['1', '2']:
            break
        print("Invalid choice. Please enter 1 or 2.")
    
    # Handle input based on choice
    if choice == '1':
        # MOT17 Sequence
        sequence_path = input("\nEnter MOT17 sequence path (or press Enter for default): ").strip()
        if not sequence_path:
            sequence_path = "/scratch/b22ai025/MOT17/train/MOT17-02-DPM"
        
        if not os.path.exists(sequence_path):
            raise ValueError(f"Invalid sequence path: {sequence_path}")
            
        input_path = sequence_path
        detections_pkl = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "sequence_output", "detections.pkl")
        output_path = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "sequence_output", "tracked.avi")
        gt_path = os.path.join(sequence_path, "gt/gt.txt")
        
    else:
        # Video file
        video_path = input("\nEnter video file path (.mp4/.avi): ").strip()
        if not video_path.endswith(('.mp4', '.avi', '.mov')):
            raise ValueError("Invalid video format. Supported formats: .mp4, .avi, .mov")
        if not os.path.exists(video_path):
            raise FileNotFoundError(f"Video file not found: {video_path}")
            
        input_path = video_path
        detections_pkl = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "video_output", "detections.pkl")
        output_path = os.path.join("/csehome/b22ai025/Project/Scratch/Detections", "video_output", "tracked.avi")
        gt_path = None  # No ground truth for video input
    
    # Create output directory
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Verify detections file exists
    if not os.path.exists(detections_pkl):
        raise FileNotFoundError(
            f"Detections file not found: {detections_pkl}\n"
            "Please run detection pipeline first!"
        )
    
    # Run tracking pipeline
    print("\nStarting tracking pipeline...")
    print(f"Using detections from: {detections_pkl}")
    
    try:
        metrics = run_tracking(input_path, detections_pkl, output_path, gt_path)
        
        print("\n" + "="*50)
        print("Tracking Pipeline Summary")
        print("="*50)
        print(f"Input type: {'MOT17 Sequence' if choice == '1' else 'Video'}")
        print(f"Output saved to: {output_path}")
        
        if metrics:
            print("\nTracking Metrics:")
            print("-"*20)
            for metric, value in metrics.items():
                print(f"{metric}: {value:.3f}")
        else:
            print("\nNote: Evaluation skipped (no ground truth available)")
        
        print("="*50)
        
    except Exception as e:
        print(f"\nError during tracking: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Multi-Object Tracking Pipeline

Select input type:
1. MOT17 Sequence
2. Video file
Invalid choice. Please enter 1 or 2.
Invalid choice. Please enter 1 or 2.
Invalid choice. Please enter 1 or 2.
Invalid choice. Please enter 1 or 2.

Starting tracking pipeline...
Using detections from: /csehome/b22ai025/Project/Scratch/Detections/sequence_output/detections.pkl

Running tracking...


Tracking: 100%|██████████████████████████████| 600/600 [00:21<00:00, 27.74it/s]



Evaluating tracking results...

Tracking Evaluation Results:
------------------------------
MOTA: 0.529
MOTP: 0.783
IDF1: 0.769
Misses: 4529.000
False Positives: 3905.000
ID Switches: 325.000
Total Tracks: 62.000

Metrics saved to: /csehome/b22ai025/Project/Scratch/Detections/sequence_output/tracked_metrics.csv

Tracking Pipeline Summary
Input type: MOT17 Sequence
Output saved to: /csehome/b22ai025/Project/Scratch/Detections/sequence_output/tracked.avi

Tracking Metrics:
--------------------
MOTA: 0.529
MOTP: 0.783
IDF1: 0.769
Misses: 4529.000
False Positives: 3905.000
ID Switches: 325.000
Total Tracks: 62.000
