In [1]:
from datasets import load_dataset
from torchvision.transforms import Compose, Resize, ToTensor, Lambda
import torchvision.transforms.functional as TF
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
from torch.nn.utils.rnn import pad_sequence
from torchvision.transforms.functional import to_pil_image


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Directly load with splits if supported
datasets = load_dataset("jinmang2/ucf_crime")
datasets = datasets['train'].shuffle(seed=42)


Using the latest cached version of the module from C:\Users\belkh\.cache\huggingface\modules\datasets_modules\datasets\jinmang2--ucf_crime\b97c17ec177f7e377de2b363616b940b64939f4e0766504732b45efc5a69139b (last modified on Mon Apr 22 17:51:43 2024) since it couldn't be found locally at jinmang2/ucf_crime, or remotely on the Hugging Face Hub.


In [4]:
train_test_split = datasets.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

train_val_split = train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_val_split['train']
val_dataset = train_val_split['test']


In [6]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import cv2

class VideoDataset(Dataset):
    def __init__(self, dataset, target_fps=1, frame_skip=3, transform=None):
        self.dataset = dataset
        self.target_fps = target_fps
        self.frame_skip = frame_skip  # Skip every 'frame_skip' frames to reduce temporal resolution
        self.transform = transform or transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((64, 64)),  # Resize frames to a smaller dimension
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.feature_extractor = models.mobilenet_v2(pretrained=True)
        self.feature_extractor.classifier[1] = torch.nn.Identity()  # Removing the final classifier layer
        self.feature_extractor.eval()

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        video_path = self.dataset[idx]['video_path']
        frames = self.load_video(video_path, self.target_fps, self.frame_skip)
        features = []
        with torch.no_grad():
            for frame in frames:
                frame = self.transform(frame)
                frame = frame.unsqueeze(0)  # Add batch dimension
                feature = self.feature_extractor(frame)
                features.append(feature.squeeze(0))
        features = torch.stack(features)
        label = self.dataset[idx]['anomaly']
        return features, label

    def load_video(self, video_path, target_fps, frame_skip):
        cap = cv2.VideoCapture(video_path)
        frames = []
        native_fps = cap.get(cv2.CAP_PROP_FPS)
        frame_skip_ratio = max(1, round(native_fps / target_fps)) * frame_skip

        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_idx % frame_skip_ratio == 0:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)
            frame_idx += 1
        cap.release()
        return frames


In [7]:
from torch.utils.data import DataLoader

# Assuming 'datasets' is your loaded dataset, e.g., from Hugging Face or another source
train_dataset = VideoDataset(train_dataset)
val_dataset = VideoDataset(val_dataset)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)



In [8]:
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.input_size = 1280  # Number of input features
        self.hidden_size = 256  # Number of features in hidden state
        self.num_layers = 1  # Number of LSTM layers
        self.num_classes = 2  # Number of output classes
        self.fc = nn.Linear(self.hidden_size, self.num_classes)  # Output layer assumes last hidden state as input
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)


    def forward(self, x):
        # Forward pass through LSTM layer
        # x of shape (batch, seq, feature)
        output, (hn, cn) = self.lstm(x)
        # Assuming using the last hidden state
        out = self.fc(hn[-1])
        return out



In [9]:
def validate(model, val_loader, device):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Assuming outputs are logits and you are doing a classification task
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = correct_predictions / total_samples
    model.train()  # Set the model back to training mode
    return accuracy


In [10]:
import matplotlib.pyplot as plt

def train(model, data_loader, val_loader, criterion, optimizer, num_epochs, device, save_path='best_model.pth'):
    model = model.to(device)
    previous_val_accuracy= float('inf')
    best_val_accuracy = float('inf')

    for epoch in range(num_epochs):
        model.train()
        total_correct = 0
        total_samples = 0
        batch_losses = []
        batch_accuracies = []

        for i, (inputs, labels) in enumerate(data_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            batch_losses.append(loss.item())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Calculate batch accuracy
            _, predicted = torch.max(outputs, 1)
            correct = (predicted == labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)

            if (i + 1) % 100 == 0:
                batch_accuracy = 100.0 * total_correct / total_samples
                batch_accuracies.append(batch_accuracy)
                print(f'Epoch {epoch+1}, Step {i+1}, Loss: {sum(batch_losses) / len(batch_losses):.4f}, '
                      f'Accuracy: {batch_accuracy:.2f}%')
                total_correct = 0
                total_samples = 0
                batch_losses = []

        # Validation after each epoch
        val_accuracy = validate(model, val_loader, criterion, device)
        print(f'Epoch {epoch+1}: Validation Loss: {val_accuracy:.4f}')

        # Saving the model if it has the best validation loss
        if val_accuracy < best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), save_path)
            print(f'Saved best model to {save_path}')

        # Early stopping condition (less than 10% decrease)
        if previous_val_accuracy - val_accuracy < 0.1 * previous_val_accuracy:
            print("Stopping early due to less than 10% decrease in validation loss.")
            break
        previous_val_accuracy = val_accuracy

        # Plotting
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.plot(batch_accuracies, label='Accuracy per 100 examples')
        plt.title('Accuracy per 100 examples')
        plt.xlabel('Batch')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()

    # Optionally save the final model state
    final_model_path = 'final_model.pth'
    torch.save(model.state_dict(), final_model_path)
    print(f'Saved final model state to {final_model_path}')

# Assumptions about other components of your setup
model = LSTM()  # Your LSTM model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assuming 'train_loader' and 'val_loader' are defined (your DataLoader instances)
train(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, device=device)



Epoch 1, Step 100, Loss: 0.7284, Accuracy: 56.00%


In [9]:
# Load the test dataset
test_dataset = VideoDataset(test_dataset)  # Assuming datasets['test'] is your test set

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)




In [10]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    counter = 0
    with torch.no_grad():  # Disable gradient computation
            for inputs, labels in data_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_predictions += (predicted == labels).sum().item()
                total_predictions += labels.size(0)
                counter += 1
                print(predicted, labels, correct_predictions, total_predictions)
    
    avg_loss = total_loss / len(data_loader)
    accuracy = correct_predictions / total_predictions
    return avg_loss, accuracy

# Evaluate the model
test_loss, test_accuracy = evaluate_model(model, test_loader, device)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')


tensor([0], device='cuda:0') tensor([0], device='cuda:0') 1 1
tensor([0], device='cuda:0') tensor([12], device='cuda:0') 1 2
tensor([0], device='cuda:0') tensor([1], device='cuda:0') 1 3
tensor([0], device='cuda:0') tensor([13], device='cuda:0') 1 4
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 2 5
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 3 6
tensor([0], device='cuda:0') tensor([7], device='cuda:0') 3 7
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 4 8
tensor([0], device='cuda:0') tensor([5], device='cuda:0') 4 9
tensor([3], device='cuda:0') tensor([3], device='cuda:0') 5 10
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 6 11
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 7 12
tensor([0], device='cuda:0') tensor([10], device='cuda:0') 7 13
tensor([0], device='cuda:0') tensor([9], device='cuda:0') 7 14
tensor([0], device='cuda:0') tensor([9], device='cuda:0') 7 15
tensor([0], device='cuda:0') tensor([0], device='cuda:0') 8 1

In [11]:

import time

def train(model, data_loader, criterion, optimizer, num_epochs, device):
    model = model.to(device)
    for epoch in range(num_epochs):
        for i, (inputs, labels) in enumerate(data_loader):
            print(inputs.shape, labels)
            start_time = time.time()  # Start time for processing one batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            elapsed_time = time.time() - start_time
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(data_loader)}], Loss: {loss.item():.4f}, Batch Time: {elapsed_time:.2f} sec')

# Model configuration
input_size = 2048  # Number of input features
hidden_size = 256  # Number of features in hidden state
num_layers = 1  # Number of LSTM layers
num_classes = 2  # Number of output classes

model = SimpleLSTM(input_size, hidden_size, num_layers, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Number of epochs
num_epochs = 5

# Assuming 'train_loader' is defined (your DataLoader instance)
train(model, data_loader, criterion, optimizer, num_epochs, device)


NameError: name 'SimpleLSTM' is not defined

In [None]:
class CRNNModel(nn.Module):
    def __init__(self):
        super(CRNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.lstm = nn.LSTM(16 * 112 * 112, 128, 2, batch_first=True)
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        # x: tensor of dimensions (batch_size, sequence_length, C, H, W)
        batch_size, seq_length, C, H, W = x.size()
        c_in = x.view(batch_size * seq_length, C, H, W)
        c_out = self.maxpool(self.relu(self.conv1(c_in)))
        r_in = c_out.view(batch_size, seq_length, -1)
        r_out, _ = self.lstm(r_in)
        r_out = self.fc(r_out[:, -1, :])
        return r_out

In [None]:
def train_model(model, data_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for frames, labels in data_loader:
            optimizer.zero_grad()
            outputs = model(frames)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Example: Training CRNN
model = CRNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, data_loader, criterion, optimizer)
