In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from neo4j import GraphDatabase
from pymongo import MongoClient
import warnings
warnings.filterwarnings('ignore')

In [2]:
 #MongoDB connection
mongo_client = MongoClient("mongodb://admin:password@localhost:27017/")
mongo_db = mongo_client["SportsAnalysis"]
labels_collection = mongo_db["metadata"]

# Neo4j connection
neo4j_driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))

In [3]:
def fetch_all_video_ids():
    return [doc['video_id'] for doc in labels_collection.find({}, {'video_id': 1})]

def fetch_label(video_id):
    doc = labels_collection.find_one({'video_id': video_id})
    return doc['label'] if doc else -1

In [4]:
def fetch_graphs_from_neo4j(video_id):
    with neo4j_driver.session() as session:
        # Get all unique time steps
        result = session.run("""
            MATCH (n:PoseNode {video_id: $video_id})
            RETURN DISTINCT n.time_index AS timestep
            ORDER BY timestep ASC
        """, video_id=video_id)
        time_steps = [record["timestep"] for record in result]

        graphs = []
        for t in time_steps:
            # Fetch nodes
            node_query = session.run("""
                MATCH (n:PoseNode {video_id: $video_id, time_index: $t})
                RETURN n.node_index AS idx, n.angle AS angle, n.time AS time
                ORDER BY idx
            """, video_id=video_id, t=t)

            node_data = []
            time_value = 0
            for record in node_query:
                node_data.append(float(record["angle"]))
                time_value = float(record["time"])

            x = torch.tensor(node_data, dtype=torch.float).view(-1, 1)

            # Fetch edges
            edge_query = session.run("""
                MATCH (a:PoseNode {video_id: $video_id, time_index: $t})-[r:CONNECTED_TO]->(b:PoseNode)
                RETURN a.node_index AS src, b.node_index AS dst, r.weight AS weight
            """, video_id=video_id, t=t)

            edge_index = []
            edge_attr = []
            for record in edge_query:
                edge_index.append([int(record["src"]), int(record["dst"])])
                edge_attr.append([float(record["weight"])])

            edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
            edge_attr = torch.tensor(edge_attr, dtype=torch.float)

            graphs.append({
                "edge_index": edge_index,
                "edge_attr": edge_attr,
                "angle_features": x,
                "time": time_value,
                "source_video": video_id,
                "label": fetch_label(video_id),
                "node_mapping": {},
                "reverse_mapping": {},
                "node_features": x.clone()
            })
        return graphs


In [5]:
def load_graph_sequences_from_db():
    video_ids = fetch_all_video_ids()
    all_data = []

    for vid in video_ids:
        try:
            graph_sequence = fetch_graphs_from_neo4j(vid)
            if graph_sequence:
                all_data.append(graph_sequence)
        except Exception as e:
            print(f"Error loading video {vid}: {e}")

    print(f"✅ Loaded {len(all_data)} videos from DB")
    return all_data

In [6]:
raw_data = load_graph_sequences_from_db()
print(f"Loaded {len(raw_data)} video sequences")

✅ Loaded 88 videos from DB
Loaded 88 video sequences


In [7]:
def transform_to_tabular(raw_data):
    """
    Transform graph-oriented data to tabular format for RNN
    Returns: sequences (list of sequences), labels (list), video_ids (list)
    """
    sequences = []
    labels = []
    video_ids = []
    
    for video_sequence in raw_data:
        if not video_sequence:
            continue
            
        # Extract sequence data
        sequence_data = []
        video_label = None
        video_id = None
        
        for timestep_graph in video_sequence:
            # Extract angles from the graph
            angles = timestep_graph['angle_features'].flatten().numpy()
            time = timestep_graph['time']
            
            # Combine angles and time into a single feature vector
            features = np.append(angles, time)
            sequence_data.append(features)
            
            # Get label and video_id (same for all timesteps in sequence)
            if video_label is None:
                video_label = timestep_graph['label']
                video_id = timestep_graph['source_video']
        
        if sequence_data and video_label != -1:  # Valid sequence with label
            sequences.append(np.array(sequence_data))
            labels.append(video_label)
            video_ids.append(video_id)
    
    return sequences, labels, video_ids

In [8]:
sequences, labels, video_ids = transform_to_tabular(raw_data)
print(f"Transformed {len(sequences)} sequences")
print(f"Feature dimension: {sequences[0].shape[1] if sequences else 'N/A'}")
print(f"Label distribution: {np.bincount(labels) if labels else 'N/A'}")

Transformed 88 sequences
Feature dimension: 34
Label distribution: [19 69]


In [9]:
def pad_sequences(sequences, max_length=None):
    """Pad sequences to the same length"""
    if max_length is None:
        max_length = max(len(seq) for seq in sequences)
    
    padded_sequences = []
    for seq in sequences:
        if len(seq) >= max_length:
            padded_sequences.append(seq[:max_length])
        else:
            # Pad with zeros
            padding = np.zeros((max_length - len(seq), seq.shape[1]))
            padded_seq = np.vstack([seq, padding])
            padded_sequences.append(padded_seq)
    
    return np.array(padded_sequences), max_length

In [10]:
X, max_seq_length = pad_sequences(sequences)
y = np.array(labels)

print(f"Padded sequence shape: {X.shape}")
print(f"Max sequence length: {max_seq_length}")

Padded sequence shape: (88, 44, 34)
Max sequence length: 44


In [11]:
class BasketballDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = torch.FloatTensor(sequences)
        self.labels = torch.LongTensor(labels)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Create datasets
train_dataset = BasketballDataset(X_train, y_train)
test_dataset = BasketballDataset(X_test, y_test)

# Create data loaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")
print(f"Feature dimensions: {X.shape[2]}")

Training samples: 61
Testing samples: 27
Feature dimensions: 34


In [35]:
class BasketballRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.2):
        super(BasketballRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # RNN layer
        self.rnn = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Dropout layer
        self.dropout = nn.Dropout(dropout)
        
        # Output layer
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Forward propagate through RNN
        out, _ = self.rnn(x, (h0, c0))
        
        # Take the output from the last time step
        out = out[:, -1, :]
        
        # Apply dropout
        out = self.dropout(out)
        
        # Apply final linear layer
        out = self.fc(out)
        
        return out

In [36]:
input_size = X.shape[2]  # Number of features per timestep
hidden_size = 64
num_layers = 2
num_classes = len(np.unique(y))

# Initialize model
model = BasketballRNN(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [37]:
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

BasketballRNN(
  (rnn): LSTM(34, 64, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=64, out_features=2, bias=True)
)

In [38]:
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.train()
    train_losses = []
    train_accuracies = []
    
    for epoch in range(num_epochs):
        total_loss = 0
        correct = 0
        total = 0
        
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Statistics
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        # Calculate epoch metrics
        epoch_loss = total_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    
    return train_losses, train_accuracies

In [39]:
train_losses, train_accuracies = train_model(model, train_loader, criterion, optimizer, num_epochs, device)

Epoch [1/10], Loss: 0.7362, Accuracy: 21.31%
Epoch [2/10], Loss: 0.6950, Accuracy: 47.54%
Epoch [3/10], Loss: 0.6483, Accuracy: 78.69%
Epoch [4/10], Loss: 0.5937, Accuracy: 78.69%
Epoch [5/10], Loss: 0.5369, Accuracy: 78.69%
Epoch [6/10], Loss: 0.4974, Accuracy: 78.69%
Epoch [7/10], Loss: 0.5318, Accuracy: 78.69%
Epoch [8/10], Loss: 0.5067, Accuracy: 78.69%
Epoch [9/10], Loss: 0.5010, Accuracy: 78.69%
Epoch [10/10], Loss: 0.4998, Accuracy: 78.69%


In [40]:
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for sequences, labels in test_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            
            outputs = model(sequences)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = 100 * correct / total
    return accuracy, all_predictions, all_labels

In [41]:
test_accuracy, predictions, true_labels = test_model(model, test_loader, device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

Test Accuracy: 77.78%
