In [148]:
import torch
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, global_mean_pool
import torch.nn.functional as F

In [149]:
# Define the original GCN model
class PoseGCN(torch.nn.Module):
    """Original GCN model for pose classification."""
    def __init__(self, num_node_features):
        super(PoseGCN, self).__init__()
        # GCN layers
        self.conv1 = GCNConv(num_node_features, 64)
        self.conv2 = GCNConv(64, 64)
        
        # Classification layers
        self.linear = torch.nn.Linear(64, 2)  # 2 classes: correct/incorrect
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        # First GCN layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        # Second GCN layer
        x = self.conv2(x, edge_index)
        
        # Global pooling (average all node features)
        x = global_mean_pool(x, data.batch)
        
        # Classification layer
        x = self.linear(x)
        
        return F.log_softmax(x, dim=1)

# Define the deep GCN model
class DeepPoseGCN(torch.nn.Module):
    """Enhanced deeper GCN model with residual connections and batch normalization."""
    def __init__(self, num_node_features, hidden_channels=64, num_classes=2):
        super(DeepPoseGCN, self).__init__()
        
        # Multiple GCN layers with residual connections
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.conv4 = GCNConv(hidden_channels, hidden_channels)
        
        # Batch normalization layers
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn3 = torch.nn.BatchNorm1d(hidden_channels)
        
        # Final classification layers
        self.linear1 = torch.nn.Linear(hidden_channels, hidden_channels//2)
        self.linear2 = torch.nn.Linear(hidden_channels//2, num_classes)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        # First block
        x1 = self.conv1(x, edge_index)
        x1 = self.bn1(x1)
        x1 = F.relu(x1)
        x1 = F.dropout(x1, p=0.2, training=self.training)
        
        # Second block with residual connection
        x2 = self.conv2(x1, edge_index)
        x2 = self.bn2(x2)
        x2 = F.relu(x2)
        x2 = F.dropout(x2, p=0.2, training=self.training)
        x2 = x2 + x1  # Residual connection
        
        # Third block with residual connection
        x3 = self.conv3(x2, edge_index)
        x3 = self.bn3(x3)
        x3 = F.relu(x3)
        x3 = F.dropout(x3, p=0.2, training=self.training)
        x3 = x3 + x2  # Residual connection
        
        # Fourth block
        x4 = self.conv4(x3, edge_index)
        
        # Global pooling
        x = global_mean_pool(x4, data.batch)
        
        # Classification layers
        x = F.relu(self.linear1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.linear2(x)
        
        return F.log_softmax(x, dim=1)

In [150]:
def load_trained_model(model_class, model_path, device):
    # """Load the trained model from a state dict file."""
    # # Initialize model
    # model = model_class(num_node_features=2).to(device)
    
    # # Load state dict
    # state_dict = torch.load(model_path, map_location=device)
    # model.load_state_dict(state_dict)
    # print(f"{model_class.__name__} loaded successfully")
    
    # return model
    """Load the trained model from a checkpoint file."""
    # Initialize model
    model = model_class(num_node_features=2).to(device)
    
    # Load checkpoint
    checkpoint = torch.load(model_path, map_location=device)
    
    # Extract the model's state dictionary
    if 'model_state_dict' in checkpoint:
        state_dict = checkpoint['model_state_dict']
    else:
        state_dict = checkpoint  # Assume it's a direct state dict if key not found
    
    # Load the state dictionary into the model
    model.load_state_dict(state_dict)
    print(f"{model_class.__name__} loaded successfully")
    
    return model

def prepare_single_pose_input(keypoints, visibility=None):
    """
    Prepare a single pose input for prediction.
    
    Args:
        keypoints: numpy array or list of shape [17, 2] containing x,y coordinates
        visibility: optional numpy array or list of shape [17] containing visibility flags
    """
    # Convert input to numpy array if it's a list
    keypoints = np.array(keypoints)
    
    # Validate input shape
    if keypoints.shape != (17, 2):
        raise ValueError(f"Expected keypoints shape (17, 2), got {keypoints.shape}")
    
    # Handle visibility
    if visibility is None:
        visibility = np.full(17, 2)  # All keypoints visible
    else:
        visibility = np.array(visibility)
        if visibility.shape != (17,):
            raise ValueError(f"Expected visibility shape (17,), got {visibility.shape}")
    
    # Convert to torch tensors
    x = torch.tensor(keypoints, dtype=torch.float)
    v = torch.tensor(visibility, dtype=torch.float).view(-1, 1)
    
    # Create edge index for the pose graph
    edge_index = torch.tensor([
        [0, 1], [1, 0],  # nose - left eye
        [0, 2], [2, 0],  # nose - right eye
        [1, 3], [3, 1],  # left eye - left ear
        [2, 4], [4, 2],  # right eye - right ear
        [5, 6], [6, 5],  # left shoulder - right shoulder
        [5, 7], [7, 5],  # left shoulder - left elbow
        [6, 8], [8, 6],  # right shoulder - right elbow
        [7, 9], [9, 7],  # left elbow - left wrist
        [8, 10], [10, 8],  # right elbow - right wrist
        [5, 11], [11, 5],  # left shoulder - left hip
        [6, 12], [12, 6],  # right shoulder - right hip
        [11, 12], [12, 11],  # left hip - right hip
        [11, 13], [13, 11],  # left hip - left knee
        [12, 14], [14, 12],  # right hip - right knee
        [13, 15], [15, 13],  # left knee - left ankle
        [14, 16], [16, 14],  # right knee - right ankle
    ], dtype=torch.long)
    
    # Create PyG Data object
    data = Data(x=x, edge_index=edge_index.t().contiguous(), vis=v)
    return data

In [151]:
def predict_single(model, data, device):
    """Make prediction for a single pose."""
    model.eval()
    with torch.no_grad():
        data = data.to(device)
        output = model(data)
        pred = output.max(dim=1)[1]
        prob = torch.exp(output)  # Convert log probabilities to probabilities
    return pred.item(), prob[0].cpu().numpy()

In [152]:
def predict_pose(model_class, model_path, keypoints, visibility=None):
    """
    Predict whether a pose is correct or incorrect using a specified model.
    
    Args:
        model_class: The class of the model to use (PoseGCN or DeepPoseGCN)
        model_path: Path to the model's state dict file
        keypoints: numpy array or list of shape [17, 2] containing x,y coordinates in COCO format
        visibility: optional numpy array or list of shape [17] containing visibility flags
    
    Returns:
        prediction: "Correct" or "Incorrect"
        confidence: confidence score of the prediction
    """
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Load the trained model
    model = load_trained_model(model_class, model_path, device)
    
    # Prepare input data
    data = prepare_single_pose_input(keypoints, visibility)
    
    # Make prediction
    prediction, probabilities = predict_single(model, data, device)
    
    # Convert prediction to class name and get confidence
    pred_class = "Correct" if prediction == 1 else "Incorrect"
    confidence = max(probabilities)
    
    return pred_class, confidence

In [153]:
# # Predict Input - YOLO Pose output
# # keypoint = result.keypoints.data[0]
# # Example keypoints from the ultralytics object
# keypoints_tensor = torch.tensor([[ 25.3896,  44.3985,   0.9876],
#         [ 19.5475,  37.6896,   0.9656],
#         [ 20.2674,  39.5152,   0.9838],
#         [ 25.3273,  26.4775,   0.9327],
#         [ 26.9300,  28.1562,   0.9810],
#         [ 67.0843,  26.7597,   0.9872],
#         [ 68.6909,  30.5649,   0.9859],
#         [105.2338,  39.4868,   0.9922],
#         [107.9833,  44.5882,   0.9917],
#         [ 93.6414,  76.5510,   0.9988],
#         [ 94.0114,  83.4342,   0.9986],
#         [125.6109,  63.7715,   0.9641],
#         [128.7283,  67.5244,   0.9530],
#         [ 82.6642, 132.5754,   0.9990],
#         [ 83.2223, 141.3123,   0.9987],
#         [ 84.7002, 193.8848,   0.9298],
#         [ 89.8226, 205.7054,   0.7688]])
# # Extract x, y coordinates and visibility
# keypoints = keypoints_tensor[:, :2].numpy()
# visibility = (keypoints_tensor[:, 2] > 0.5).int().numpy() * 2  # Convert to visibility flags

In [154]:
print(keypoints)
print(visibility)

[[ 23.38  45.06]
 [ 18.12  39.27]
 [ 18.78  37.79]
 [ 25.84  18.9 ]
 [ 28.26  18.24]
 [ 67.08  68.7 ]
 [ 77.33  40.  ]
 [ 78.44 101.45]
 [ 88.28  85.78]
 [ 78.23 136.18]
 [ 85.99 113.58]
 [132.19  63.82]
 [150.18  57.63]
 [ 99.56 147.38]
 [107.83 143.6 ]
 [105.16 212.56]
 [112.12 213.67]]
[2. 1. 2. 1. 2. 1. 2. 1. 1. 1. 1. 1. 2. 1. 1. 1. 2.]


In [155]:
# Use the original GCN model to classify
pred_class_original, confidence_original = predict_pose(PoseGCN, 'models/model-7/original/model_PoseGCN_best.pth', keypoints, visibility)
print(f"Original Model Prediction: {pred_class_original}, Confidence: {confidence_original:.2%}")

PoseGCN loaded successfully
Original Model Prediction: Correct, Confidence: 100.00%


In [156]:
# Use the deep GCN model to classify
pred_class_deep, confidence_deep = predict_pose(DeepPoseGCN, 'models/model-7/deep/model_DeepPoseGCN_best.pth', keypoints, visibility)
print(f"Deep Model Prediction: {pred_class_deep}, Confidence: {confidence_deep:.2%}")

DeepPoseGCN loaded successfully
Deep Model Prediction: Correct, Confidence: 100.00%


In [157]:
# Example usage
flat_keypoints = [
    28.85, 49.25, 2, 23.59, 43.46, 1, 24.26, 41.98, 2, 31.31, 23.08, 1,
        33.74, 22.43, 2, 75.53, 49.52, 1, 85.37, 14.73, 1, 100.41, 69.53, 1,
        118.81, 22.88, 1, 95.06, 104.4, 1, 103.65, 57.29, 1, 128.42, 64.95, 1,
        155.05, 60.26, 1, 98.46, 147.89, 1, 104.67, 148.01, 2, 110.63, 224.03,
        1, 115.85, 226.6, 2
]

In [158]:
def process_keypoints(flat_keypoints):
    """
    Process flat keypoints list into separate arrays for coordinates and visibility.
    
    Args:
        flat_keypoints: List of keypoints in the format [x1, y1, v1, x2, y2, v2, ..., x17, y17, v17]
    
    Returns:
        keypoints: numpy array of shape (17, 2) containing x, y coordinates
        visibility: numpy array of shape (17,) containing visibility flags
    """
    # Reshape the flat list into a (17, 3) array
    keypoints_array = np.array(flat_keypoints).reshape(-1, 3)
    
    # Extract x, y coordinates
    keypoints = keypoints_array[:, :2]
    
    # Extract visibility flags
    visibility = keypoints_array[:, 2]
    
    return keypoints, visibility

In [159]:
keypoints, visibility = process_keypoints(flat_keypoints)

In [160]:
# Now you can use these keypoints and visibility with the predict_pose function
pred_class, confidence = predict_pose(PoseGCN, 'models/model-7/original/model_PoseGCN_best.pth', keypoints, visibility)
print(f"Prediction: {pred_class}, Confidence: {confidence:.2%}")

PoseGCN loaded successfully
Prediction: Correct, Confidence: 100.00%


In [161]:
# Use the deep GCN model to classify
pred_class_deep, confidence_deep = predict_pose(DeepPoseGCN, 'models/model-7/deep/model_DeepPoseGCN_best.pth', keypoints, visibility)
print(f"Deep Model Prediction: {pred_class_deep}, Confidence: {confidence_deep:.2%}")

DeepPoseGCN loaded successfully


Deep Model Prediction: Correct, Confidence: 100.00%
