In [1]:
import os
import json
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random


## Dataloader

In [2]:

def fill_collate_fn(batch):
    # Unpack the batch
    data, labels = zip(*batch)

    # Convert data to list of lists to use append
    data = [x.tolist() for x in data]

    # Find the maximum length of sequences in the batch
    max_len = max(len(item) for sequence in data for item in sequence)

    filled_data = []
    for sequence in data:
        # Handle the case where the sequence is shorter than the maximum length
        while len(sequence) < max_len:
            random_sequence_index = random.randint(0, len(data) - 1)
            random_frame_index = random.randint(0, len(data[random_sequence_index]) - 1)
            random_point = data[random_sequence_index][random_frame_index]
            sequence.append(random_point)
        filled_data.append(np.array(sequence))  # Convert back to numpy array after padding

    # Convert filled_data and labels to PyTorch tensors
    # Since filled_data is a list of numpy arrays, we need to convert each numpy array to a tensor and then stack
    # filled_data = torch.stack([torch.tensor(seq, dtype=torch.float32) for seq in filled_data])
    filled_data = torch.tensor(filled_data)
    print(labels)
    # labels = torch.tensor(labels, dtype=torch.float32)

    return filled_data, labels



In [21]:
import os
import glob

class ShotDataset(Dataset):
    def __init__(self, shot_dir, background_dir, segment_length=20):
        self.shot_dir = shot_dir
        self.background_dir = background_dir
        self.segment_length = segment_length
        
        # Load JSON paths and sort them by frame numbers in file names
        self.shot_files = sorted(glob.glob(os.path.join(shot_dir, '*.json')), key=lambda x: int(x.split('_')[-1].split('.')[0]))
        self.background_files = sorted(glob.glob(os.path.join(background_dir, '*.json')), key=lambda x: int(x.split('_')[-1].split('.')[0]))
        
        # Combine paths and labels
        self.data_files = [(path, 1) for path in self.shot_files] + [(path, 0) for path in self.background_files]
        self.data_files = sorted(self.data_files, key=lambda x: int(x[0].split('_')[-1].split('.')[0]))  # Ensure global order if needed


    def load_json(self, file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())
        
        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['nearest_person_keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(current_segment)
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(current_segment)

        return grouped_values

    def __getitem__(self, idx):
            file_path, label = self.data_files[idx]
            print('file_path', file_path)
            json_data = self.load_json(file_path)
            grouped_frames = self.group_frames(json_data)
            
            if grouped_frames:
                return np.array(grouped_frames), np.array([label] * len(grouped_frames))
            else:
                # Assuming no data corresponds to segment_length keypoints all zeros
                return np.zeros((self.segment_length, 17, 2)), np.array([label])
            
    def __len__(self):
        return len(self.data_files)


In [22]:

# Assume `df` is your dataframe loaded with pandas
shot_dir = 'datasets/Front-All/jsons_keypoints_classwise/Shot'
background_dir = 'datasets/Front-All/jsons_keypoints_classwise/Background'

shot_dataset = ShotDataset(shot_dir=shot_dir, background_dir=background_dir)
shot_loader = DataLoader(shot_dataset, batch_size=1, shuffle=True)#, collate_fn= fill_collate_fn)




In [31]:
# Create an iterator
data_iter = iter(shot_loader)

# Get the first batch
batch_data, batch_labels = next(data_iter)

# Print the data and labels
print("Batch Data Shape:", batch_data.shape)
print("Batch Labels:", batch_labels)

file_path datasets/Front-All/jsons_keypoints_classwise/Background/Front-All_15112_15227.json
Batch Data Shape: torch.Size([1, 5, 20, 17, 2])
Batch Labels: tensor([[0, 0, 0, 0, 0]])


In [15]:
len(batch_labels[0])

1

In [16]:
count = 0
for batch_data, labels in shot_loader:
    # batch_data.shape[0] will now give you the batch size
    batch_size = batch_data.shape[0]
    for j in range(batch_size):  # Loop over each sample in the batch
        num_groups = batch_data.shape[1]
        for i in range(num_groups):  # Iterate over each group in the sample
            single_group = batch_data[j, i]  # Access the i-th group in the j-th sample
            label = labels[j, i]             # Access the corresponding label
            print(single_group.shape, "label:", label)
            

torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 17, 2]) label: tensor(0)
torch.Size([20, 

## Model Training

In [17]:
# Define the PyTorch model
class ConvLSTMModel(nn.Module):
    def __init__(self, input_shape, hidden_dim, num_classes):
        super(ConvLSTMModel, self).__init__()
        self.segment_length, self.keypoints, self.coords = input_shape
        # Conv1D layer: input channels = keypoints * coords
        self.conv1d = nn.Conv1d(in_channels=self.keypoints * self.coords, out_channels=32, kernel_size=3, padding=1)
        # LSTM layer
        self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_dim, batch_first=True)
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # Reshape x to (batch_size, keypoints * coords, segment_length) for Conv1D
        x = x.view(x.size(0), self.keypoints * self.coords, self.segment_length)
        # Pass through Conv1D
        x = self.conv1d(x)
        x = torch.relu(x)
        # Transpose x to (batch_size, segment_length, 32) for LSTM
        x = x.transpose(1, 2)
        # Pass through LSTM
        x, (hn, cn) = self.lstm(x)
        # Use only the last hidden state
        x = x[:, -1, :]
        # Pass through fully connected layer
        x = self.fc(x)
        # Apply sigmoid for binary classification
        x = torch.sigmoid(x)
        return x



In [18]:
# Training script
def train_model(dataloader, input_shape, hidden_dim, num_classes, num_epochs=10):
    # Initialize model, loss function, and optimizer
    model = ConvLSTMModel(input_shape, hidden_dim, num_classes)
    criterion = nn.BCELoss()  # Binary cross-entropy loss
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Set the model to training mode
    model.train()

    # Training loop
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        
        for batch_data, batch_labels in dataloader:
            
            for i in range(batch_data.shape[1]):  # Iterate over each group in the batch
                batch_size = batch_data.shape[0]
                for j in range(batch_size):  # Loop over each sample in the batch
                    num_groups = batch_data.shape[1]
                    for i in range(num_groups):  # Iterate over each group in the sample
                        single_group = batch_data[j, i]  # Access the i-th group in the j-th sample
                        label = batch_labels[j, i]             # Access the corresponding label
                        # print(single_group.shape, "label:", label)
                # data = batch_data[:, i, :, :, :]  # Adjust shape as necessary
                # label = batch_labels[:, i]
                # print(data.shape)
                # print(label.shape)

                        # Ensure data and label are correct shape
                        data = single_group.view(1, *single_group.shape)  # Reshape data for the model if necessary
                        label = label.view(1, -1)  # Ensure label is the right shape


                        # Zero the parameter gradients
                        optimizer.zero_grad()

                        # Forward pass
                        output = model(data.float())
                        loss = criterion(output, label.float())

                        # Backward pass and optimize
                        loss.backward()
                        optimizer.step()

                        # Update running loss
                        running_loss += loss.item()

                        # Convert outputs to binary predictions
                        predicted = (output > 0.5).float()
                        correct_predictions += (predicted == label).sum().item()
                        total_predictions += label.numel()

        # Print the loss and accuracy for this epoch
        epoch_loss = running_loss / len(dataloader)
        epoch_accuracy = correct_predictions / total_predictions
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

    print("Training complete.")
    return model


In [19]:
input_shape = (20, 17, 2)  # (segment_length, keypoints, coords)
hidden_dim = 64
num_classes = 1 

model = train_model(shot_loader, input_shape, hidden_dim, num_classes, num_epochs=50)


Epoch [1/50], Loss: 6.3226, Accuracy: 0.8867
Epoch [2/50], Loss: 6.1672, Accuracy: 0.8829
Epoch [3/50], Loss: 6.3043, Accuracy: 0.8859
Epoch [4/50], Loss: 6.0500, Accuracy: 0.8848
Epoch [5/50], Loss: 6.3362, Accuracy: 0.8860
Epoch [6/50], Loss: 6.2649, Accuracy: 0.8872
Epoch [7/50], Loss: 6.3141, Accuracy: 0.8862
Epoch [8/50], Loss: 6.1529, Accuracy: 0.8832
Epoch [9/50], Loss: 6.2872, Accuracy: 0.8857
Epoch [10/50], Loss: 6.2517, Accuracy: 0.8864
Epoch [11/50], Loss: 6.2531, Accuracy: 0.8871
Epoch [12/50], Loss: 6.2397, Accuracy: 0.8873
Epoch [13/50], Loss: 6.2316, Accuracy: 0.8852
Epoch [14/50], Loss: 6.2288, Accuracy: 0.8857
Epoch [15/50], Loss: 6.0885, Accuracy: 0.8852
Epoch [16/50], Loss: 6.1777, Accuracy: 0.8843
Epoch [17/50], Loss: 6.1680, Accuracy: 0.8893
Epoch [18/50], Loss: 6.0719, Accuracy: 0.8890
Epoch [19/50], Loss: 6.2317, Accuracy: 0.8893
Epoch [20/50], Loss: 6.2663, Accuracy: 0.8893
Epoch [21/50], Loss: 6.1443, Accuracy: 0.8881
Epoch [22/50], Loss: 6.2044, Accuracy: 0.88