### Imports
---

In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import AutoImageProcessor, VideoMAEForVideoClassification, TrainingArguments, Trainer
import evaluate
from PIL import Image
import numpy as np
from pathlib import Path
from tqdm import tqdm

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

import cv2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pytorchvideo.data

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    RemoveKey,
    ShortSideScale,
    UniformTemporalSubsample,
)

from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize,
)


### Data Directories
---

In [3]:
train_root = r"C:\Users\rayaa\Downloads\ucf_crime_v2\Train"
test_root = r"C:\Users\rayaa\Downloads\ucf_crime_v2\Test"
val_root = r"C:\Users\rayaa\Downloads\ucf_crime_v2\Validation"

output_dir = r"C:\Users\rayaa\Downloads\ucf_crime_v2\output"
os.makedirs(output_dir, exist_ok=True)


### Verify Data
---

In [4]:
def count_videos(root_dir):
    total = 0
    for cls in os.listdir(root_dir):
        cls_path = os.path.join(root_dir, cls)
        if not os.path.isdir(cls_path):
            continue
        # Count subdirectories (each subdirectory is a video folder)
        num_videos = sum(
            1 for file in os.listdir(cls_path)
            if file.lower().endswith('.mp4')
        )
        print(f"{cls}: {num_videos} videos")
        total += num_videos
    return total

train_count = count_videos(train_root)
test_count = count_videos(test_root)
val_count = count_videos(val_root)

print(f"\nTotal Train videos: {train_count}")
print(f"Total Test videos: {test_count}")
print(f"Total Validation videos: {val_count}")

Abuse: 40 videos
Arrest: 40 videos
Arson: 40 videos
Assault: 40 videos
Burglary: 80 videos
Explosion: 40 videos
Fighting: 40 videos
NormalVideos: 170 videos
RoadAccidents: 120 videos
Robbery: 120 videos
Shooting: 40 videos
Shoplifting: 40 videos
Stealing: 80 videos
Vandalism: 40 videos
Abuse: 5 videos
Arrest: 5 videos
Arson: 5 videos
Assault: 5 videos
Burglary: 10 videos
Explosion: 5 videos
Fighting: 5 videos
NormalVideos: 150 videos
RoadAccidents: 15 videos
Robbery: 15 videos
Shooting: 5 videos
Shoplifting: 5 videos
Stealing: 10 videos
Vandalism: 5 videos
Abuse: 5 videos
Arrest: 5 videos
Arson: 5 videos
Assault: 5 videos
Burglary: 10 videos
Explosion: 5 videos
Fighting: 5 videos
NormalVideos: 19 videos
RoadAccidents: 15 videos
Robbery: 15 videos
Shooting: 5 videos
Shoplifting: 5 videos
Stealing: 10 videos
Vandalism: 5 videos

Total Train videos: 930
Total Test videos: 245
Total Validation videos: 114


### Data Processing
---

In [5]:
class_labels = sorted(os.listdir(train_root))
num_classes = len(class_labels)
label2id = {label: idx for idx, label in enumerate(class_labels)}
id2label = {idx: label for label, idx in label2id.items()}

print(label2id)

{'Abuse': 0, 'Arrest': 1, 'Arson': 2, 'Assault': 3, 'Burglary': 4, 'Explosion': 5, 'Fighting': 6, 'NormalVideos': 7, 'RoadAccidents': 8, 'Robbery': 9, 'Shooting': 10, 'Shoplifting': 11, 'Stealing': 12, 'Vandalism': 13}


In [6]:
def MapAndPath(root, label2id):
    videopaths = []

    for clss in sorted(os.listdir(root)):
        class_dir = os.path.join(root, clss)
        if not os.path.isdir(class_dir):
            continue

        for file in os.listdir(class_dir):
            if file.lower().endswith('.mp4'):
                filepath = os.path.join(class_dir, file)
                label_id = label2id[clss]
                videopaths.append((filepath, {"label": label_id, "video_path": filepath}))
                
    return videopaths
    

### Dataset Class
---

In [7]:
import numpy as np
import cv2
import torch
from torch.utils.data import Dataset

class SafeLabeledVideoDataset(Dataset):
    def __init__(self, labeled_video_paths, transform=None, num_frames=16, frame_size=224):
        self.labeled_video_paths = labeled_video_paths
        self.transform = transform
        self.num_frames = num_frames
        self.frame_size = frame_size
        self.length = len(labeled_video_paths)

    def __len__(self):
        return self.length

    def _load_video_cv2(self, video_path):
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            raise ValueError("Could not open video")

        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (self.frame_size, self.frame_size))
            # Store as (H, W, C) first
            frames.append(frame)

        cap.release()

        if len(frames) == 0:
            raise ValueError("No frames extracted")

        # Convert to numpy array and normalize
        video_array = np.array(frames).astype(np.float32) / 255.0  # (T, H, W, C)
        # Convert to tensor and permute to (C, T, H, W) for transforms
        video_tensor = torch.from_numpy(video_array).permute(3, 0, 1, 2)  # (C, T, H, W)
        
        return video_tensor

    def __getitem__(self, idx):
        video_path, metadata = self.labeled_video_paths[idx]
        try:
            video_tensor = self._load_video_cv2(video_path)
            print(f"After load: {video_tensor.shape}")

            if self.transform:
                video_dict = {"video": video_tensor}
                print(f"Before transform: {video_dict['video'].shape}")
                video_dict = self.transform(video_dict)
                print(f"After transform: {video_dict['video'].shape}")
                video_tensor = video_dict["video"]

            return {
                "video": video_tensor,
                "label": metadata["label"],
                "video_path": video_path,
            }

        except Exception as e:
            print(f"[LOAD FAILED] {video_path} | Error: {e}")
            import traceback
            traceback.print_exc()
            # Create dummy tensor in correct format
            dummy = torch.zeros(3, self.num_frames, self.frame_size, self.frame_size)
            return {"video": dummy, "label": metadata["label"], "video_path": video_path}

### Load VideoMAE
---

In [8]:
import transformers.utils as utils
utils.is_torch_available = lambda: True

In [9]:
model_name = "MCG-NJU/videomae-base"
image_processor = AutoImageProcessor.from_pretrained(model_name)
model = VideoMAEForVideoClassification.from_pretrained(model_name, num_labels=num_classes, label2id=label2id, id2label=id2label, ignore_mismatched_sizes=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

device

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device(type='cuda')

### Data Transforms & Dataset
---

In [10]:
mean = image_processor.image_mean
std = image_processor.image_std
if "shortest_edge" in image_processor.size:
    height = width = image_processor.size["shortest_edge"]
else:
    height = image_processor.size["height"]
    width = image_processor.size["width"]
resize_to = (height, width)

num_frames_to_sample = model.config.num_frames
sample_rate = 4
fps = 30
clip_duration = num_frames_to_sample * sample_rate / fps

train_transform = Compose(
    [
        ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames_to_sample),
                    Lambda(lambda x: x / 255.0),
                    Normalize(mean, std),
                    RandomShortSideScale(min_size=256, max_size=320),
                    RandomCrop(resize_to),
                    RandomHorizontalFlip(p=0.5),
                ]
            ),
        ),
    ]
)

val_transform = Compose(
    [
        ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames_to_sample),
                    Lambda(lambda x: x / 255.0),
                    Normalize(mean, std),
                    Resize(resize_to),
                ]
            ),
        ),
    ]
)




In [11]:
train_paths = MapAndPath(train_root, label2id)
val_paths = MapAndPath(val_root, label2id)
test_paths = MapAndPath(test_root, label2id)

print(f"Train: {len(train_paths)}, Test: {len(test_paths)}, Val: {len(val_paths)}")

train_dataset = SafeLabeledVideoDataset(
    labeled_video_paths=train_paths,
    transform=train_transform
    
)

test_dataset = SafeLabeledVideoDataset(
    labeled_video_paths=test_paths,
    transform=val_transform
    
)

val_dataset = SafeLabeledVideoDataset(
    labeled_video_paths=val_paths,
    transform=val_transform
    
)


Train: 930, Test: 245, Val: 114


### Trainere (NO TRAINING)
---

In [12]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    preds = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=preds, references=eval_pred.label_ids)

def collate_fn(examples):
    pixel_values = torch.stack([ex["video"] for ex in examples]) 
    # Ensure the tensor is in the right format for VideoMAE
    # VideoMAE expects: (batch_size, num_frames, num_channels, height, width)
    pixel_values = pixel_values.permute(0, 2, 1, 3, 4)  # from (B, C, T, H, W) to (B, T, C, H, W)
    
    labels = torch.tensor([ex["label"] for ex in examples])
    paths = [ex.get("video_path", "unknown") for ex in examples]
    return {"pixel_values": pixel_values, "labels": labels, "video_path": paths}

num_epochs = 10
batch_size = 3

training_args = TrainingArguments(
        output_dir=str(output_dir),
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_steps=10,
        learning_rate=5e-5,
        warmup_ratio=0.1,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        num_train_epochs=num_epochs,
        fp16=torch.cuda.is_available(),
        push_to_hub=False,
        remove_unused_columns=False,
        max_steps=(len(train_dataset) // batch_size) * num_epochs,
    )

trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=image_processor,
        compute_metrics=compute_metrics,
        data_collator=collate_fn,
    )


  trainer = Trainer(


In [13]:
# Check what the model expects
print(f"Model num_frames: {model.config.num_frames}")
print(f"Model image size: {model.config.image_size}")
print(f"Model num channels: {model.config.num_channels}")

# Make sure your transforms match the model config
num_frames_to_sample = model.config.num_frames

Model num_frames: 16
Model image size: 224
Model num channels: 3


### Testing Loop
---

In [14]:
model.eval()
all_preds, all_labels, failed_paths = [], [], []

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    num_workers=0,
    shuffle=False,
    collate_fn=collate_fn,
)

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating"):
        try:
            inputs = {k: v.to(device) for k, v in batch.items() if k not in ["labels", "video_path"]}
            labels = batch["labels"].to(device)
            outputs = model(**inputs)
            preds = outputs.logits.argmax(dim=1).cpu().tolist()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().tolist())
        except Exception as e:
            print(f"Batch failed: {batch.get('video_path', [])} | {e}")
            failed_paths.extend(batch.get("video_path", []))
            # Add dummy preds to keep length
            all_preds.extend([0] * len(labels))
            all_labels.extend(labels.cpu().tolist())

print(f"Failed videos: {len(failed_paths)}")
if failed_paths:
    print("First 5 failed paths:", failed_paths[:5])

cm = confusion_matrix(all_labels, all_preds, labels=np.arange(num_classes))
plt.figure(figsize=(12, 10))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=class_labels,
    yticklabels=class_labels,
)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Test Set Confusion Matrix")
plt.tight_layout()
cm_path = output_dir / "confusion_matrix.png"
plt.savefig(cm_path)
plt.close()
print(f"Confusion matrix saved to {cm_path}")

# Classification report
report = classification_report(
    all_labels, all_preds, target_names=class_labels, digits=4
)
report_path = output_dir / "classification_report.txt"
with open(report_path, "w") as f:
    f.write(report)
print("\nClassification report:\n", report)
print(f"Report saved to {report_path}")

acc = np.trace(cm) / np.sum(cm)
print(f"\nOverall Test Accuracy: {acc:.4f}")

Evaluating:   0%|          | 0/82 [00:00<?, ?it/s]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   1%|          | 1/82 [00:02<03:01,  2.24s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   2%|▏         | 2/82 [00:04<02:42,  2.03s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   4%|▎         | 3/82 [00:06<02:39,  2.01s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   5%|▍         | 4/82 [00:07<02:29,  1.92s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   6%|▌         | 5/82 [00:09<02:24,  1.88s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   7%|▋         | 6/82 [00:11<02:31,  1.99s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:   9%|▊         | 7/82 [00:13<02:29,  1.99s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  10%|▉         | 8/82 [00:16<02:33,  2.07s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  11%|█         | 9/82 [00:18<02:26,  2.01s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  12%|█▏        | 10/82 [00:20<02:25,  2.02s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  13%|█▎        | 11/82 [00:22<02:24,  2.04s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  15%|█▍        | 12/82 [00:24<02:23,  2.05s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  16%|█▌        | 13/82 [00:26<02:23,  2.08s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  17%|█▋        | 14/82 [00:28<02:20,  2.06s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  18%|█▊        | 15/82 [00:30<02:14,  2.01s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  20%|█▉        | 16/82 [00:32<02:13,  2.02s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  21%|██        | 17/82 [00:34<02:09,  1.99s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  22%|██▏       | 18/82 [00:36<02:10,  2.03s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  23%|██▎       | 19/82 [00:38<02:07,  2.03s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  24%|██▍       | 20/82 [00:40<02:04,  2.01s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  26%|██▌       | 21/82 [00:42<02:01,  2.00s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  27%|██▋       | 22/82 [00:44<01:57,  1.96s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  28%|██▊       | 23/82 [00:46<01:56,  1.97s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  29%|██▉       | 24/82 [00:48<01:54,  1.97s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  30%|███       | 25/82 [00:50<01:52,  1.98s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  32%|███▏      | 26/82 [00:52<01:48,  1.95s/it]

After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])
After load: torch.Size([3, 16, 224, 224])
Before transform: torch.Size([3, 16, 224, 224])
After transform: torch.Size([3, 16, 224, 224])


Evaluating:  32%|███▏      | 26/82 [00:54<01:56,  2.08s/it]


KeyboardInterrupt: 