In [10]:
import os
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [11]:
# Ana dataset klasörü
dataset_path = "./dataset/Videos/Videos/"

# Normal ve Abnormal klasörlerini al
normal_path = os.path.join(dataset_path, "normal")
abnormal_path = os.path.join(dataset_path, "abnormal")

# Tüm sınıfları belirleyelim
abnormal_classes = [folder for folder in os.listdir(abnormal_path) if os.path.isdir(os.path.join(abnormal_path, folder))]
class_to_index = {cls: idx + 1 for idx, cls in enumerate(abnormal_classes)}  # Abnormal sınıfları 1, 2, 3... olarak etiketliyoruz
class_to_index["normal"] = 0  # Normal aktiviteleri 0 olarak etiketliyoruz

# DataFrame oluşturmak için liste
data = []

# Normal videoları ekleyelim
for subfolder in os.listdir(normal_path):
    subfolder_path = os.path.join(normal_path, subfolder)
    if os.path.isdir(subfolder_path):
        videos = [os.path.join(subfolder_path, v) for v in os.listdir(subfolder_path) if v.endswith(('.mp4', '.avi', '.mov'))]
        data.extend([(video, "normal", 0) for video in videos])  # Label = 0

# Abnormal videoları ekleyelim
for subfolder in abnormal_classes:
    subfolder_path = os.path.join(abnormal_path, subfolder)
    videos = [os.path.join(subfolder_path, v) for v in os.listdir(subfolder_path) if v.endswith(('.mp4', '.avi', '.mov'))]
    data.extend([(video, subfolder, class_to_index[subfolder]) for video in videos])  # Label = 1, 2, 3, ...

# DataFrame oluştur
df = pd.DataFrame(data, columns=["video_path", "category", "label"])


In [12]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["label"],random_state=42)

print(f"Train set {len(train_df)} videos")
print(f"Test set {len(test_df)} videos")

Train set 2224 videos
Test set 556 videos


# Buraya preprocessing adımı olarak Veri döndürme, zoomout, kontrast vs eklenebilir

In [13]:
import torch
import torchvision
import torch.nn.functional as F
from torch.utils.data import Dataset

class VideoDataset(Dataset):
    def __init__(self, df, num_frames=8, transform=None, resize_size=(192, 192)):
        self.df = df
        self.num_frames = num_frames
        self.transform = transform
        self.resize_size = resize_size  # Sabit video boyutu (H, W)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        video_path = self.df.iloc[idx]["video_path"]
        label = self.df.iloc[idx]["label"]

        # GPU hızlandırmalı video okuma
        frames = self.extract_frames(video_path, self.num_frames)

        # Eğer transform varsa uygula
        if self.transform:
            frames = self.transform(frames)

        return frames, torch.tensor(label, dtype=torch.long)

    def extract_frames(self, video_path, num_frames):
        vr = torchvision.io.VideoReader(video_path, "video")
        vr.set_current_stream("video")  # Video stream'ini seç

        frames = []
        for frame in vr:
            frame = frame['data'].float() / 255.0  # Normalize (C, H, W)

            # Frame boyutlarını MoviNet'e uygun hale getir
            frame = F.interpolate(frame.unsqueeze(0), size=self.resize_size, mode="bilinear", align_corners=False)
            frame = frame.squeeze(0).permute(1, 2, 0)  # (C, H, W) → (H, W, C)

            frames.append(frame)

            if len(frames) >= num_frames:
                break

        # Eğer video kısa ise son frame ile tamamla
        while len(frames) < num_frames:
            frames.append(frames[-1].clone())

        frames = torch.stack(frames)  # (T, H, W, C) olacak şekilde birleştir
        frames = frames[:num_frames]  # Sabit sayıda frame al (T, H, W, C)

        frames = frames.permute(3, 0, 1, 2)  # (T, H, W, C) → (C, T, H, W)

        return frames.to("cpu", dtype=torch.float16)  # 🔥 **Veriyi float16 yap!**

In [14]:
train_dataset = VideoDataset(train_df, num_frames=32)
test_dataset = VideoDataset(test_df, num_frames=32)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print(f"Train loader {len(train_loader)} batches")
print(f"Test loader {len(test_loader)} batches")

Train loader 139 batches
Test loader 35 batches


In [15]:
for videos, labels in train_loader:
    print(f"Videos shape:{videos.shape}, Labels shape: {labels.shape}, dtype: {videos.dtype}")
    break;

Videos shape:torch.Size([16, 3, 32, 192, 192]), Labels shape: torch.Size([16]), dtype: torch.float16


In [16]:
# Movinet Modelini Yükleme
import torch
from movinets import MoViNet
from movinets.config import _C as config

model = MoViNet(config.MODEL.MoViNetA0, causal = True, pretrained = True )
model.to("cpu")


MoViNet(
  (conv1): ConvBlock3D(
    (conv_1): Conv2dBNActivation(
      (conv2d): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (norm): BatchNorm2d(8, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (act): Hardswish()
    )
  )
  (blocks): Sequential(
    (b0_l0): BasicBneck(
      (expand): ConvBlock3D(
        (conv_1): Conv2dBNActivation(
          (conv2d): Conv2d(8, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act): Hardswish()
        )
      )
      (deep): ConvBlock3D(
        (conv_1): Conv2dBNActivation(
          (conv2d): Conv2d(24, 24, kernel_size=(5, 5), stride=(2, 2), groups=24, bias=False)
          (norm): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act): Hardswish()
        )
      )
      (se): SqueezeExcitation(
        (temporal_cumualtive_GAvg3D): TemporalCGAvgPo

In [17]:
#Çıkış katmanlarını düzenleme
num_classes = len(train_df["category"].unique())

model.classifier[3] = torch.nn.Linear(in_features=2048, out_features=num_classes)

print(model.blocks[-1].proj)
import torch

model = model.to("cpu")
dummy_input = torch.randn(1, 3, 16, 160, 160)

torch.onnx.export(model, dummy_input, "x3d_xs.onnx", opset_version=11
                  ,do_constant_folding=True, input_names=["input"], output_names=["output"])

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

# 🎯 Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-3)  # Weight decay ile overfitting'i önle

# ⚡ Yeni PyTorch Formatına Uygun AMP (Automatic Mixed Precision)
scaler = torch.amp.GradScaler()

# 🛑 Early Stopping Parametreleri
patience = 3  # Kaç epoch boyunca iyileşme olmazsa durdursun
min_delta = 0.01  # İyileşme için gereken minimum fark
best_loss = float("inf")  # En iyi validation loss başta sonsuz olarak ayarlanır
counter = 0  # İyileşme olmayan epoch sayısı
accuracy_threshold = 85.0  # Accuracy'nin geçmesi gereken eşik

# 📌 MoviNet INPUT FORMAT: (B, C, T, H, W)
num_frames = 8  # MoviNet genelde 8 frame kullanıyor, bunu **dataloader ile eşleştir**
input_size = (192, 192)  # MoviNet için standart giriş boyutu

for epoch in range(40):
    model.train()
    running_loss = 0.0

    for videos, labels in train_loader:
        videos, labels = videos.to("cpu"), labels.to("cpu")

        optimizer.zero_grad()
        with torch.amp.autocast("cpu"):
            outputs = model(videos)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)  # Train loss'u hesapla

    # 📌 VALIDATION AŞAMASI
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for videos, labels in test_loader:
            videos, labels = videos.to("cpu"), labels.to("cpu")
            outputs = model(videos)
            loss = criterion(outputs, labels)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32768x1 and 2048x12)