# Pose Estimation and Mesh Generation

#### Pre-process

##### Understanding Dataset Properties
###### File Types:
###### _image.png: Original image, used for input.
###### _joints.npy: Contains joint positions, used for target output in pose estimation.
###### _render_light.png: Rendered version of the image, can be used for additional input channels or visualization.
###### _quality_info.txt, _fit_crop_info.txt, _dataset_info.txt: Contain metadata that may help in filtering or augmenting the data.
###### _body.pkl: May contain a 3D body model mesh; useful if doing 3D pose estimation or mesh reconstruction.
###### Data Splits:
###### train.txt, val.txt, test.txt, trainval.txt: Lists of images for training, validation, and testing splits.

### Data preprocessing

In [None]:
import os
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

def load_dataset(data_dir, file_list):
    """
    Load the dataset from the given directory and list of files.
    
    Args:
        data_dir (str): The directory containing the data files.
        file_list (list): A list of file names (without extensions).
    
    Returns:
        keypoints_2d (list): List of 2D keypoints.
        pose_params (list): List of SMPL pose parameters.
        shape_params (list): List of SMPL shape parameters.
    """
    keypoints_2d, pose_params, shape_params = [], [], []
    
    for file_name in file_list:
        # Load 2D keypoints
        joints_file = os.path.join(data_dir, f"{file_name}_joints.npy")
        joints_2d = np.load(joints_file)
        keypoints_2d.append(joints_2d)
        
        # Load SMPL parameters
        body_file = os.path.join(data_dir, f"{file_name}_body.pkl")
        with open(body_file, 'rb') as f:
            smpl_data = pickle.load(f)
        
        # Extract pose and shape parameters from SMPL data
        pose_params.append(smpl_data['pose'])
        shape_params.append(smpl_data['betas'])
    
    return np.array(keypoints_2d), np.array(pose_params), np.array(shape_params)

def load_file_list(file_path):
    """
    Load a list of file names from a text file.
    
    Args:
        file_path (str): The path to the text file containing file names.
    
    Returns:
        file_list (list): List of file names (without extensions).
    """
    with open(file_path, 'r') as f:
        file_list = f.read().splitlines()
    return file_list

# Define paths
data_dir = 'Pose_Estimation_and_Mesh_Generation/UP3D_FP/up-3d'
train_file = os.path.join(data_dir, 'train.txt')
val_file = os.path.join(data_dir, 'val.txt')
test_file = os.path.join(data_dir, 'test.txt')

# Load train, validation, and test file lists
train_files = load_file_list(train_file)
val_files = load_file_list(val_file)
test_files = load_file_list(test_file)

# Load dataset
train_keypoints, train_pose, train_shape = load_dataset(data_dir, train_files)
val_keypoints, val_pose, val_shape = load_dataset(data_dir, val_files)
test_keypoints, test_pose, test_shape = load_dataset(data_dir, test_files)

### Model Training

#### Training the PosePrior Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define PosePrior model
class PosePriorModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(PosePriorModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Prepare data for training
def prepare_data(keypoints, pose_params):
    inputs = torch.tensor(keypoints, dtype=torch.float32).reshape(len(keypoints), -1)
    targets = torch.tensor(pose_params, dtype=torch.float32)
    dataset = TensorDataset(inputs, targets)
    return dataset

# Training function
def train_model(train_loader, model, criterion, optimizer, num_epochs=20):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

# Data preparation
train_dataset = prepare_data(train_keypoints, train_pose)
val_dataset = prepare_data(val_keypoints, val_pose)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model initialization
input_size = train_keypoints.shape[1] * train_keypoints.shape[2]
output_size = train_pose.shape[1]
pose_model = PosePriorModel(input_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(pose_model.parameters(), lr=1e-3)

# Train PosePrior model
train_model(train_loader, pose_model, criterion, optimizer)

##### Training the ShapePrior Model

In [None]:
# Define ShapePrior model
class ShapePriorModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(ShapePriorModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Data preparation for ShapePrior model
train_dataset_shape = prepare_data(train_pose, train_shape)
val_dataset_shape = prepare_data(val_pose, val_shape)
train_loader_shape = DataLoader(train_dataset_shape, batch_size=32, shuffle=True)
val_loader_shape = DataLoader(val_dataset_shape, batch_size=32, shuffle=False)

# Model initialization
input_size_shape = train_pose.shape[1]
output_size_shape = train_shape.shape[1]
shape_model = ShapePriorModel(input_size_shape, output_size_shape)
optimizer_shape = optim.Adam(shape_model.parameters(), lr=1e-3)

# Train ShapePrior model
train_model(train_loader_shape, shape_model, criterion, optimizer_shape)

### Evaluation and Results

In [None]:
def evaluate_model(test_loader, model):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(test_loader)

# Prepare test dataset
test_dataset_pose = prepare_data(test_keypoints, test_pose)
test_loader_pose = DataLoader(test_dataset_pose, batch_size=32, shuffle=False)

# Evaluate PosePrior model
pose_loss = evaluate_model(test_loader_pose, pose_model)
print(f"PosePrior Model Test Loss: {pose_loss:.4f}")

# Prepare test dataset for ShapePrior model
test_dataset_shape = prepare_data(test_pose, test_shape)
test_loader_shape = DataLoader(test_dataset_shape, batch_size=32, shuffle=False)

# Evaluate ShapePrior model
shape_loss = evaluate_model(test_loader_shape, shape_model)
print(f"ShapePrior Model Test Loss: {shape_loss:.4f}")

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def visualize_predictions(test_images, predicted_poses, actual_poses):
    fig = plt.figure(figsize=(15, 5))
    for i in range(len(test_images)):
        ax = fig.add_subplot(1, len(test_images), i + 1, projection='3d')
        ax.scatter(predicted_poses[i][:, 0], predicted_poses[i][:, 1], predicted_poses[i][:, 2], color='r', label='Predicted Pose')
        ax.scatter(actual_poses[i][:, 0], actual_poses[i][:, 1], actual_poses[i][:, 2], color='g', label='Ground Truth')
        ax.set_title(f"Pose {i+1}")
        ax.legend()
    plt.show()

# Assume test_images is loaded with test image data corresponding to test poses
visualize_predictions(test_images, predicted_pose_outputs, test_pose)

###  Mesh Rendering

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d

def project_3d_to_2d(points_3d, intrinsic_matrix):
    # Project 3D points to 2D using intrinsic camera parameters
    points_2d = intrinsic_matrix @ points_3d.T
    points_2d = points_2d[:2, :] / points_2d[2, :]  # Normalize by the third coordinate
    return points_2d.T

def render_mesh_with_keypoints(pose_params, shape_params, intrinsic_matrix, keypoints_2d):
    # Generate SMPL mesh based on pose and shape parameters (pseudo-code)
    mesh = smpl_model(pose_params, shape_params)  # Implement SMPL model accordingly
    
    # Convert mesh to Open3D format
    o3d_mesh = o3d.geometry.TriangleMesh()
    o3d_mesh.vertices = o3d.utility.Vector3dVector(mesh.vertices)
    o3d_mesh.triangles = o3d.utility.Vector3iVector(mesh.faces)
    
    # Project 3D mesh vertices to 2D
    projected_vertices = project_3d_to_2d(mesh.vertices, intrinsic_matrix)

    # Visualize the mesh
    vis = o3d.visualization.Visualizer()
    vis.create_window()

    # Add the mesh to the visualizer
    vis.add_geometry(o3d_mesh)

    # Overlay projected 2D keypoints
    for keypoint in keypoints_2d:
        # Create a sphere for each keypoint
        sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.02)
        sphere.translate(keypoint)  # Move sphere to keypoint position
        vis.add_geometry(sphere)

    vis.run()
    vis.destroy_window()

# Example usage
# Define intrinsic matrix (example values)
intrinsic_matrix = np.array([[800, 0, 320],
                              [0, 800, 240],
                              [0, 0, 1]])

# Assume predicted_pose_outputs[0] and predicted_shape_outputs[0] are available
# Assume keypoints_2d is the predicted 2D keypoints from the model
render_mesh_with_keypoints(predicted_pose_outputs[0], predicted_shape_outputs[0], intrinsic_matrix, keypoints_2d)