In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:

# Define a dummy ST-GCN model (simplified version)
class DummySTGCN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(DummySTGCN, self).__init__()
        # A simple convolution simulating spatial-temporal processing
        self.conv = nn.Conv2d(in_channels, 64, kernel_size=(9, 1), padding=(4, 0))
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)
    
    def forward(self, x):
        # x shape: (N, C, T, V, M). For simplicity, assume M=1.
        N, C, T, V, M = x.size()
        # Remove the M dimension as we assume a single person
        x = x.view(N, C, T, V)
        x = self.conv(x)
        x = self.pool(x)
        x = x.view(N, -1)
        out = self.fc(x)
        return out

In [None]:

# Create a synthetic dataset of videos with skeleton data
class SkeletonVideoDataset(Dataset):
    def __init__(self, num_videos, num_classes, T=30, V=25, C=2, M=1):
        """
        Args:
            num_videos: number of videos in the dataset.
            num_classes: number of action classes.
            T: number of frames per video.
            V: number of joints per frame.
            C: number of coordinate channels (e.g., x, y).
            M: number of persons (typically 1 for single-person actions).
        """
        self.num_videos = num_videos
        self.num_classes = num_classes
        self.T = T
        self.V = V
        self.C = C
        self.M = M
        
    def __len__(self):
        return self.num_videos
    
    def __getitem__(self, idx):
        # Generate random skeleton data for a video: (C, T, V, M)
        skeleton_data = torch.randn(self.C, self.T, self.V, self.M)
        # Generate a random label between 0 and (num_classes - 1)
        label = torch.randint(0, self.num_classes, (1,)).item()
        return skeleton_data, label

In [None]:
# Hyperparameters
num_videos = 100        # Total videos in the dataset
num_classes = 10        # Number of action classes
batch_size = 8
learning_rate = 0.001
num_epochs = 5

In [None]:
# Instantiate the dataset and dataloader
dataset = SkeletonVideoDataset(num_videos=num_videos, num_classes=num_classes)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Create the model instance
model = DummySTGCN(in_channels=2, num_classes=num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training loop
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(dataloader):
        # inputs shape: (batch_size, C, T, V, M)
        optimizer.zero_grad()  # Zero the parameter gradients
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if (i + 1) % 5 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(dataloader)}], Loss: {running_loss/5:.4f}")
            running_loss = 0.0

print("Training completed.")
