In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import time

# Define transformations for the input video frames
transform = transforms.Compose([
    transforms.Resize((112, 112)),  # Resize video frames to (112, 112)
    transforms.ToTensor(),  # Convert frames to PyTorch tensors
    transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),  # Normalize frames
])

# Load the Kinetics-400 dataset
kinetics_dataset = datasets.Kinetics400(root='path_to_kinetics400_dataset', frames_per_clip=16, step_between_clips=1, transform=transform)

# Create data loaders for training and validation sets
train_loader = DataLoader(kinetics_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)

# Define the model
model = models.video.r3d_18(pretrained=True)  # Load pre-trained ResNet-34 model for videos
num_classes = 400
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Modify last fully connected layer for 400 classes

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    start_time = time.time()

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU

        optimizer.zero_grad()  # Zero the parameter gradients

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}')
            running_loss = 0.0

    end_time = time.time()
    print(f'Training time for epoch {epoch+1}: {end_time - start_time} seconds')

print('Finished Training')

# Save the trained model if needed



In [None]:
import torch.onnx

# dummy input
dummy_input = torch.randn(1, 3, 16, 112, 112).to(device)

# Export the model to ONNX format
torch.onnx.export(model, dummy_input, 'kinetics400_resnet34.onnx', input_names=['input'], output_names=['output'])

print('Model saved in ONNX format.')
