In [3]:
import os
import re
import torch
import cv2
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO
import mediapipe as mp
from torchvision import transforms
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Load YOLOv8 model
yolo_model = YOLO("yolov8x.pt")

# Load Mediapipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

def detect_humans(frame):
    """Detect humans using YOLOv8."""
    results = yolo_model(frame)[0]
    humans = [box.xyxy.tolist() for box in results.boxes if int(box.cls) == 0]
    return humans

def extract_keypoints(frame):
    """Extract human keypoints using Mediapipe."""
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Ensure frame is uint8 with values in range [0, 255]
    frame_rgb = (frame_rgb * 255).astype(np.uint8) if frame_rgb.dtype != np.uint8 else frame_rgb

    results = pose.process(frame_rgb)
    keypoints = []
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            keypoints.append([landmark.x, landmark.y, landmark.z, landmark.visibility])
    
    return np.array(keypoints).flatten()


def extract_features(frame):
    """Extract YOLO & Mediapipe features from a frame."""
    humans = detect_humans(frame)
    keypoints = extract_keypoints(frame)

    human_bbox = np.array(humans).flatten()[:8]  # First 8 values (x1, y1, x2, y2, confidence, class, etc.)
    if len(human_bbox) < 8:
        human_bbox = np.pad(human_bbox, (0, 8 - len(human_bbox)), mode="constant")

    # Ensure keypoints do not exceed 75
    keypoints = keypoints[:75]  # Trim excess if necessary

    # Pad keypoints only if they are fewer than 75
    if len(keypoints) < 75:
        keypoints = np.pad(keypoints, (0, 75 - len(keypoints)), mode="constant")

    return np.concatenate((human_bbox, keypoints))


class C3D(nn.Module):
    def __init__(self, num_classes=5):
        super(C3D, self).__init__()
        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn1 = nn.BatchNorm3d(64)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn2 = nn.BatchNorm3d(128)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3 = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn3 = nn.BatchNorm3d(256)
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self._feature_size = self._get_conv_output()

        self.fc1 = nn.Linear(self._feature_size, 512)
        self.dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(512, num_classes)

    def _get_conv_output(self):
        with torch.no_grad():
            x = torch.zeros(1, 3, 16, 112, 112)
            x = self.pool1(torch.relu(self.bn1(self.conv1(x))))
            x = self.pool2(torch.relu(self.bn2(self.conv2(x))))
            x = self.pool3(torch.relu(self.bn3(self.conv3(x))))
            return int(np.prod(x.shape[1:]))

    def forward(self, x):
        x = self.pool1(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool2(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool3(torch.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

class VideoDataset(Dataset):
    def __init__(self, video_paths, labels, sequence_length=16):
        self.video_paths = video_paths
        self.labels = labels
        self.sequence_length = sequence_length
        self.label_to_idx = {label: idx for idx, label in enumerate(set(labels))}

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        label = self.labels[idx]
        frames = self.load_frames(video_path)

        frames = np.stack(frames, axis=0)
        frames = frames.transpose(3, 0, 1, 2)
        frames = torch.tensor(frames, dtype=torch.float32) / 255.0

        feature_vector = extract_features(frames[:, -1, :, :].permute(1, 2, 0).numpy())

        return frames, torch.tensor(feature_vector, dtype=torch.float32), self.label_to_idx[label]

    def load_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (112, 112))
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
            if len(frames) == self.sequence_length:
                break
        cap.release()

        while len(frames) < self.sequence_length:
            frames.append(frames[-1])

        return frames

def extract_label(filename):
    match = re.match(r"^[^\d]+", filename)
    return match.group(0).strip() if match else None

def load_video_paths_and_labels(folder_path):
    video_paths, labels = [], []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov')):
                video_path = os.path.join(root, file)
                label = extract_label(file)
                if label:
                    video_paths.append(video_path)
                    labels.append(label)
    return video_paths, labels

folder_path = "D:/CLASS NOTES/EPICS/Model Dataset/Anomaly-Videos"
video_paths, labels = load_video_paths_and_labels(folder_path)

train_paths, val_paths, train_labels, val_labels = train_test_split(video_paths, labels, test_size=0.2, random_state=42)

train_dataset = VideoDataset(train_paths, train_labels)
val_dataset = VideoDataset(val_paths, val_labels)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

num_classes = len(set(labels))
c3d_model = C3D(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
c3d_model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(c3d_model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

num_epochs = 10
for epoch in range(num_epochs):
    c3d_model.train()
    running_loss = 0.0
    total_batches = len(train_loader)
    pbar = tqdm(enumerate(train_loader), total=total_batches, desc=f"Epoch {epoch+1}/{num_epochs}")

    for batch_idx, (inputs, features, labels) in pbar:
        inputs, features, labels = inputs.to(device), features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = c3d_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        pbar.set_postfix(loss=running_loss / (batch_idx + 1))

    scheduler.step()

    c3d_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, features, labels in val_loader:
            inputs, features, labels = inputs.to(device), features.to(device), labels.to(device)
            outputs = c3d_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {100 * correct / total}%")

torch.save(c3d_model.state_dict(), 'c3d_anomaly_detection.pth')


Epoch 1/10:   0%|          | 0/25 [00:00<?, ?it/s]


0: 640x640 (no detections), 2428.3ms
Speed: 7.7ms preprocess, 2428.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2422.5ms
Speed: 9.0ms preprocess, 2422.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2382.4ms
Speed: 8.4ms preprocess, 2382.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2347.7ms
Speed: 7.0ms preprocess, 2347.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2352.3ms
Speed: 8.0ms preprocess, 2352.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2379.2ms
Speed: 8.0ms preprocess, 2379.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2348.4ms
Speed: 7.7ms preprocess, 2348.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2349.9ms
Spe

Epoch 1/10:   4%|▍         | 1/25 [00:27<10:49, 27.06s/it, loss=1.39]


0: 640x640 (no detections), 2431.8ms
Speed: 9.0ms preprocess, 2431.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2416.9ms
Speed: 6.3ms preprocess, 2416.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2415.3ms
Speed: 7.0ms preprocess, 2415.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2417.1ms
Speed: 8.9ms preprocess, 2417.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2419.1ms
Speed: 7.4ms preprocess, 2419.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2423.1ms
Speed: 6.4ms preprocess, 2423.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2412.4ms
Speed: 9.4ms preprocess, 2412.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2418.6ms
Spe

Epoch 1/10:   8%|▊         | 2/25 [00:51<09:48, 25.57s/it, loss=88]  


0: 640x640 (no detections), 2415.8ms
Speed: 9.0ms preprocess, 2415.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2417.3ms
Speed: 8.0ms preprocess, 2417.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2415.7ms
Speed: 7.1ms preprocess, 2415.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2412.6ms
Speed: 8.0ms preprocess, 2412.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2416.7ms
Speed: 8.0ms preprocess, 2416.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2415.2ms
Speed: 7.5ms preprocess, 2415.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2412.3ms
Speed: 8.5ms preprocess, 2412.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2422.6ms
Spe

Epoch 1/10:  12%|█▏        | 3/25 [01:16<09:11, 25.08s/it, loss=113]


0: 640x640 (no detections), 2422.1ms
Speed: 8.0ms preprocess, 2422.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2419.1ms
Speed: 8.1ms preprocess, 2419.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2422.2ms
Speed: 9.0ms preprocess, 2422.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2420.3ms
Speed: 6.0ms preprocess, 2420.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2420.5ms
Speed: 7.6ms preprocess, 2420.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2420.8ms
Speed: 7.4ms preprocess, 2420.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2419.9ms
Speed: 7.0ms preprocess, 2419.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2423.5ms
Spe

Epoch 1/10:  16%|█▌        | 4/25 [01:40<08:42, 24.87s/it, loss=153]


0: 640x640 (no detections), 2423.5ms
Speed: 9.0ms preprocess, 2423.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2416.1ms
Speed: 8.3ms preprocess, 2416.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2418.3ms
Speed: 8.0ms preprocess, 2418.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2417.4ms
Speed: 8.0ms preprocess, 2417.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2418.8ms
Speed: 7.8ms preprocess, 2418.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2416.8ms
Speed: 9.0ms preprocess, 2416.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2426.0ms
Speed: 8.6ms preprocess, 2426.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2422.3ms
Spe

Epoch 1/10:  20%|██        | 5/25 [02:05<08:15, 24.76s/it, loss=145]


0: 640x640 (no detections), 2425.6ms
Speed: 9.0ms preprocess, 2425.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2426.3ms
Speed: 8.0ms preprocess, 2426.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2425.1ms
Speed: 7.5ms preprocess, 2425.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2419.4ms
Speed: 9.2ms preprocess, 2419.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2420.6ms
Speed: 8.0ms preprocess, 2420.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2494.9ms
Speed: 9.0ms preprocess, 2494.9ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2513.1ms
Speed: 9.0ms preprocess, 2513.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2497.6ms
Spe

Epoch 1/10:  24%|██▍       | 6/25 [02:30<07:52, 24.85s/it, loss=133]


0: 640x640 (no detections), 2498.2ms
Speed: 10.7ms preprocess, 2498.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2491.9ms
Speed: 19.6ms preprocess, 2491.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2501.5ms
Speed: 8.2ms preprocess, 2501.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2488.7ms
Speed: 9.0ms preprocess, 2488.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2495.0ms
Speed: 8.1ms preprocess, 2495.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2495.7ms
Speed: 8.4ms preprocess, 2495.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2469.5ms
Speed: 10.2ms preprocess, 2469.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2407.9ms


Epoch 1/10:  28%|██▊       | 7/25 [02:55<07:27, 24.88s/it, loss=132]


0: 640x640 (no detections), 2353.8ms
Speed: 9.0ms preprocess, 2353.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2379.4ms
Speed: 8.3ms preprocess, 2379.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.3ms
Speed: 8.3ms preprocess, 2363.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.2ms
Speed: 8.2ms preprocess, 2358.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.8ms
Speed: 9.0ms preprocess, 2363.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.9ms
Speed: 10.0ms preprocess, 2360.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.6ms
Speed: 15.0ms preprocess, 2358.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.5ms
S

Epoch 1/10:  32%|███▏      | 8/25 [03:19<06:59, 24.65s/it, loss=123]


0: 640x640 (no detections), 2362.2ms
Speed: 13.0ms preprocess, 2362.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.6ms
Speed: 14.0ms preprocess, 2358.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.3ms
Speed: 13.0ms preprocess, 2362.3ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2365.6ms
Speed: 13.0ms preprocess, 2365.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.9ms
Speed: 13.0ms preprocess, 2359.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.9ms
Speed: 14.0ms preprocess, 2359.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2410.8ms
Speed: 12.0ms preprocess, 2410.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2385.

Epoch 1/10:  36%|███▌      | 9/25 [03:43<06:33, 24.57s/it, loss=127]


0: 640x640 (no detections), 2373.7ms
Speed: 11.0ms preprocess, 2373.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.1ms
Speed: 16.0ms preprocess, 2360.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.4ms
Speed: 11.3ms preprocess, 2360.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.3ms
Speed: 12.0ms preprocess, 2362.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2365.9ms
Speed: 14.0ms preprocess, 2365.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2365.2ms
Speed: 12.0ms preprocess, 2365.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.9ms
Speed: 12.0ms preprocess, 2363.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2353.

Epoch 1/10:  40%|████      | 10/25 [04:07<06:07, 24.48s/it, loss=129]


0: 640x640 (no detections), 2364.9ms
Speed: 12.0ms preprocess, 2364.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.8ms
Speed: 19.5ms preprocess, 2360.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.2ms
Speed: 11.0ms preprocess, 2361.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2364.4ms
Speed: 13.5ms preprocess, 2364.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2357.5ms
Speed: 13.4ms preprocess, 2357.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.8ms
Speed: 12.7ms preprocess, 2360.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.0ms
Speed: 11.5ms preprocess, 2363.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.

Epoch 1/10:  44%|████▍     | 11/25 [04:32<05:41, 24.41s/it, loss=126]


0: 640x640 (no detections), 2363.8ms
Speed: 14.0ms preprocess, 2363.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.7ms
Speed: 11.6ms preprocess, 2363.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2369.1ms
Speed: 11.5ms preprocess, 2369.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.8ms
Speed: 12.0ms preprocess, 2362.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.4ms
Speed: 12.6ms preprocess, 2360.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2370.8ms
Speed: 12.0ms preprocess, 2370.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2438.4ms
Speed: 13.0ms preprocess, 2438.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2415.

Epoch 1/10:  48%|████▊     | 12/25 [04:56<05:17, 24.44s/it, loss=125]


0: 640x640 (no detections), 2425.2ms
Speed: 10.0ms preprocess, 2425.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2413.8ms
Speed: 9.4ms preprocess, 2413.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2408.3ms
Speed: 8.5ms preprocess, 2408.3ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2417.2ms
Speed: 9.0ms preprocess, 2417.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2437.8ms
Speed: 9.3ms preprocess, 2437.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2436.5ms
Speed: 9.0ms preprocess, 2436.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2436.8ms
Speed: 10.0ms preprocess, 2436.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2415.6ms
S

Epoch 1/10:  52%|█████▏    | 13/25 [05:21<04:53, 24.46s/it, loss=120]


0: 640x640 (no detections), 2412.9ms
Speed: 9.0ms preprocess, 2412.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2387.9ms
Speed: 8.1ms preprocess, 2387.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2364.2ms
Speed: 7.1ms preprocess, 2364.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2366.3ms
Speed: 8.0ms preprocess, 2366.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.9ms
Speed: 7.8ms preprocess, 2363.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.0ms
Speed: 10.0ms preprocess, 2361.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2367.3ms
Speed: 8.0ms preprocess, 2367.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.6ms
Sp

Epoch 1/10:  56%|█████▌    | 14/25 [05:45<04:27, 24.33s/it, loss=116]


0: 640x640 (no detections), 2362.7ms
Speed: 10.0ms preprocess, 2362.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.2ms
Speed: 9.5ms preprocess, 2359.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.7ms
Speed: 8.3ms preprocess, 2361.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.6ms
Speed: 8.0ms preprocess, 2362.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.9ms
Speed: 8.2ms preprocess, 2360.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.1ms
Speed: 9.0ms preprocess, 2361.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.4ms
Speed: 10.0ms preprocess, 2362.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2357.5ms
S

Epoch 1/10:  60%|██████    | 15/25 [06:09<04:02, 24.24s/it, loss=112]


0: 640x640 (no detections), 2362.3ms
Speed: 10.0ms preprocess, 2362.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2369.5ms
Speed: 7.2ms preprocess, 2369.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.7ms
Speed: 7.0ms preprocess, 2363.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.1ms
Speed: 9.0ms preprocess, 2361.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.5ms
Speed: 10.0ms preprocess, 2361.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2361.8ms
Speed: 8.6ms preprocess, 2361.8ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.0ms
Speed: 8.4ms preprocess, 2360.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.0ms
S

Epoch 1/10:  64%|██████▍   | 16/25 [06:33<03:37, 24.17s/it, loss=110]


0: 640x640 (no detections), 2367.1ms
Speed: 9.1ms preprocess, 2367.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.6ms
Speed: 9.0ms preprocess, 2360.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2362.2ms
Speed: 9.0ms preprocess, 2362.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.9ms
Speed: 7.6ms preprocess, 2363.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.5ms
Speed: 9.0ms preprocess, 2360.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.0ms
Speed: 8.0ms preprocess, 2363.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.3ms
Speed: 8.0ms preprocess, 2363.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.8ms
Spe

Epoch 1/10:  68%|██████▊   | 17/25 [06:57<03:12, 24.12s/it, loss=106]


0: 640x640 (no detections), 2363.7ms
Speed: 9.0ms preprocess, 2363.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.7ms
Speed: 7.0ms preprocess, 2363.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.5ms
Speed: 8.0ms preprocess, 2358.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.1ms
Speed: 9.3ms preprocess, 2358.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2357.6ms
Speed: 7.0ms preprocess, 2357.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.2ms
Speed: 9.3ms preprocess, 2359.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2364.5ms
Speed: 9.0ms preprocess, 2364.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.8ms
Spe

Epoch 1/10:  72%|███████▏  | 18/25 [07:21<02:48, 24.08s/it, loss=106]


0: 640x640 (no detections), 2356.1ms
Speed: 8.0ms preprocess, 2356.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2363.2ms
Speed: 8.0ms preprocess, 2363.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2356.8ms
Speed: 9.1ms preprocess, 2356.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2372.3ms
Speed: 8.0ms preprocess, 2372.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2359.9ms
Speed: 9.0ms preprocess, 2359.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2353.2ms
Speed: 9.0ms preprocess, 2353.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2356.4ms
Speed: 8.4ms preprocess, 2356.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2358.4ms
Spe

Epoch 1/10:  76%|███████▌  | 19/25 [07:45<02:24, 24.03s/it, loss=103]


0: 640x640 (no detections), 2362.5ms
Speed: 8.4ms preprocess, 2362.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2364.9ms
Speed: 7.0ms preprocess, 2364.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2356.4ms
Speed: 9.2ms preprocess, 2356.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2360.3ms
Speed: 8.0ms preprocess, 2360.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2390.3ms
Speed: 7.5ms preprocess, 2390.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2421.0ms
Speed: 9.0ms preprocess, 2421.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2434.8ms
Speed: 8.0ms preprocess, 2434.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2413.2ms
Spe

Epoch 1/10:  80%|████████  | 20/25 [08:09<02:00, 24.13s/it, loss=101]


0: 640x640 (no detections), 2421.2ms
Speed: 8.0ms preprocess, 2421.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2400.9ms
Speed: 8.0ms preprocess, 2400.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2386.9ms
Speed: 8.3ms preprocess, 2386.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2445.5ms
Speed: 8.0ms preprocess, 2445.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2391.3ms
Speed: 9.1ms preprocess, 2391.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2395.2ms
Speed: 10.5ms preprocess, 2395.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2394.1ms
Speed: 8.0ms preprocess, 2394.1ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2403.3ms
Sp

Epoch 1/10:  84%|████████▍ | 21/25 [08:34<01:36, 24.24s/it, loss=98.7]


0: 640x640 (no detections), 2493.0ms
Speed: 11.5ms preprocess, 2493.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2458.9ms
Speed: 9.0ms preprocess, 2458.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2468.5ms
Speed: 7.9ms preprocess, 2468.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2436.3ms
Speed: 8.0ms preprocess, 2436.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2461.6ms
Speed: 8.5ms preprocess, 2461.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2451.0ms
Speed: 11.3ms preprocess, 2451.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2447.4ms
Speed: 10.0ms preprocess, 2447.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2491.9ms


Epoch 1/10:  88%|████████▊ | 22/25 [08:59<01:13, 24.45s/it, loss=95.9]


0: 640x640 (no detections), 2483.6ms
Speed: 9.8ms preprocess, 2483.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2430.1ms
Speed: 12.0ms preprocess, 2430.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2420.3ms
Speed: 9.0ms preprocess, 2420.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)



Epoch 1/10:  88%|████████▊ | 22/25 [09:09<01:14, 24.96s/it, loss=95.9]


KeyboardInterrupt: 