## 0. Install Dependecies

In [1]:
pip install torch

Collecting torch
  Downloading torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl (797.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m797.1/797.1 MB[0m [31m?[0m eta [36m0:00:00[0m
[?25hCollecting filelock (from torch)
  Downloading filelock-3.16.1-py3-none-any.whl (16 kB)
Collecting typing-extensions>=4.8.0 (from torch)
  Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Collecting sympy (from torch)
  Downloading sympy-1.13.3-py3-none-any.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting networkx (from torch)
  Downloading networkx-3.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Collecting fsspec (from torch)
  Downloading fsspec-2024.10.0-py3-none-any.whl (179 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.6/179.6 kB[0m [31m7.0 MB

**Set device to GPU if is available otherwise set device as cpu**

In [36]:
import torch
# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


**Import libraries**

In [58]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.base import BaseEstimator, RegressorMixin

##1. Dataset##

- The Datatset used in this project was generated using the Mujoco simulator with three different configurations:
- 2D (2 joints)
- 2D (3 joints)
- 3D (5 joints)

The format of the data is in CSV format, including information about Joint angles, fingertip position, and orientation.


1.1. Visualise data from the simulator

In [38]:
!head -5 logfiler2.csv

j0;j1;cos(j0);cos(j1);sin(j0);sin(j1);ft_x;ft_y;ft_qw;ft_qz
 0.055; -0.012;  0.998;  1.000;  0.055; -0.012;  0.210;  0.010;  1.000;  0.021
 0.076; -0.017;  0.997;  1.000;  0.076; -0.017;  0.210;  0.014;  1.000;  0.030
 0.148; -0.011;  0.989;  1.000;  0.147; -0.011;  0.208;  0.030;  0.998;  0.068
 0.214;  0.048;  0.977;  0.999;  0.212;  0.048;  0.204;  0.050;  0.991;  0.131


In [None]:
!head -5 logfiler3.csv

j0;j1;j2;cos(j0);cos(j1);cos(j2);sin(j0);sin(j1);sin(j2);ft_x;ft_y;ft_qw;ft_qz
 0.055; -0.012;  0.072;  0.998;  1.000;  0.997;  0.055; -0.012;  0.072;  0.309;  0.022;  0.998;  0.057
 0.076; -0.017;  0.100;  0.997;  1.000;  0.995;  0.076; -0.017;  0.100;  0.308;  0.031;  0.997;  0.080
 0.135; -0.059;  0.194;  0.991;  0.998;  0.981;  0.135; -0.059;  0.193;  0.305;  0.050;  0.991;  0.135
 0.228; -0.110;  0.295;  0.974;  0.994;  0.957;  0.226; -0.109;  0.290;  0.297;  0.079;  0.979;  0.205


In [None]:
!head -5 logfiler5.csv

j0;j1;j2;j3;j4;cos(j0);cos(j1);cos(j2);cos(j3);cos(j4);sin(j0);sin(j1);sin(j2);sin(j3);sin(j4);ft_x;ft_y;ft_z;ft_qw;ft_qx;ft_qy;ft_qz
 0.000;  0.000;  0.000;  0.000;  0.000;  1.000;  1.000;  1.000;  1.000;  1.000;  0.000;  0.000;  0.000;  0.000;  0.000;  0.000;  0.000;  0.590;  1.000;  0.000;  0.000;  0.000
 0.022; -0.005;  0.028;  0.016; -0.032;  1.000;  1.000;  1.000;  1.000;  0.999;  0.022; -0.005;  0.028;  0.016; -0.032;  0.011;  0.004;  0.590;  1.000; -0.016;  0.019;  0.011
 0.103;  0.005;  0.107;  0.017; -0.100;  0.995;  1.000;  0.994;  1.000;  0.995;  0.102;  0.005;  0.106;  0.017; -0.099;  0.041;  0.016;  0.587;  0.995; -0.053;  0.061;  0.054
 0.209;  0.067;  0.216;  0.013; -0.174;  0.978;  0.998;  0.977;  1.000;  0.985;  0.208;  0.067;  0.215;  0.013; -0.173;  0.100;  0.042;  0.573;  0.979; -0.101;  0.138;  0.116


1.2. Preprocess the data


2R Robot

In [39]:
# Load dataset
data = pd.read_csv('logfiler2.csv', delimiter=';')

# Preprocessing: Extract inputs (joint angles and their trigonometric functions) and outputs (fingertip positions and quaternions)
X = data[['j0', 'j1', 'cos(j0)', 'cos(j1)', 'sin(j0)', 'sin(j1)']].values
y = data[['ft_x', 'ft_y', 'ft_qw', 'ft_qz']].values

# Normalize input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

- Split the data into training and testing sets

In [80]:
# Split data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

##2. Train Forward Kinematics Models##



### 2.1. Robot 2R

- Define the architecture of the model (Feedforward Neural Network) to learn forward kinematics.




- Define the loss function and optimizer

In [200]:
# Define the Neural Network model with Dropout
class ForwardKinematicsModel(nn.Module):
    def __init__(self):
        super(ForwardKinematicsModel, self).__init__()
        # Define a simple feedforward network with 2 hidden layers and Dropout
        self.fc1 = nn.Linear(6, 64)  # Input layer (6 features)
        self.dropout1 = nn.Dropout(p=0.3)  # Dropout with 30% probability after first hidden layer
        self.fc2 = nn.Linear(64, 64)       # Hidden layer
        self.dropout2 = nn.Dropout(p=0.3)  # Dropout with 30% probability after second hidden layer
        self.fc3 = nn.Linear(64, 4)        # Output layer (2 outputs)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)  # Apply dropout after first hidden layer
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)  # Apply dropout after second hidden layer
        x = self.fc3(x)
        return x

- Hyperparameter Search

Using Grid Search

In [201]:
# Wrapper class for PyTorch model to use with scikit-learn GridSearchCV
class PyTorchRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, hidden_size=hidden_size, lr=learning_rate, epochs=100):
        self.hidden_size = hidden_size
        self.lr = lr
        self.epochs = epochs
        self.model = ForwardKinematicsModel(hidden_size=self.hidden_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def fit(self, X, y):
        # Convert data to torch tensors
        X_train = torch.tensor(X, dtype=torch.float32)
        y_train = torch.tensor(y, dtype=torch.float32)

        # Training loop
        for epoch in range(self.epochs):
            self.model.train()
            self.optimizer.zero_grad()
            output = self.model(X_train)
            loss = self.criterion(output, y_train)
            loss.backward()
            self.optimizer.step()

        return self

    def predict(self, X):
        # Convert data to torch tensor and return predictions
        X_test = torch.tensor(X, dtype=torch.float32)
        self.model.eval()
        with torch.no_grad():
            predictions = self.model(X_test)
        return predictions.numpy()

# Load your dataset (replace this with your actual dataset)
df = pd.read_csv('logfiler2.csv', sep=';')
X = df[['j0', 'j1', 'cos(j0)', 'cos(j1)', 'sin(j0)', 'sin(j1)']].values  # Joint angles
y = df[['ft_x', 'ft_y', 'ft_qw', 'ft_qz']].values  # Fingertip positions

# Split dataset into training and test sets (80/20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set up hyperparameters to search
param_grid = {
    'hidden_size': [16, 32, 64],       # Number of hidden units in the hidden layers
    'lr': [0.001, 0.01],            # Learning rates
    'epochs': [100, 200, 500],           # Number of epochs to train
}

# Instantiate the PyTorch model wrapper for GridSearchCV
pytorch_model = PyTorchRegressor()

# Use GridSearchCV with the model wrapper
grid_search = GridSearchCV(estimator=pytorch_model, param_grid=param_grid, cv=3, verbose=2, n_jobs=1)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Evaluate the best model on the test set
y_pred = best_model.predict(X_test)
test_loss = mean_squared_error(y_test, y_pred)
print(f"Best Hyperparameters: {best_params}")
print(f"Test MSE: {test_loss:.4f}")


TypeError: __init__() got an unexpected keyword argument 'hidden_size'

- Train the models on joint angle inputs to predict fingertip positions.

In [202]:
# Hyperparameters
num_epochs = 500
learning_rate = 0.001
hidden_size = 64

# Initialize the model
model = ForwardKinematicsModel()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [198]:
# Convert to PyTorch tensors only if necessary
X_train = X_train.clone().detach().float() if isinstance(X_train, torch.Tensor) else torch.tensor(X_train, dtype=torch.float32)
y_train = y_train.clone().detach().float() if isinstance(y_train, torch.Tensor) else torch.tensor(y_train, dtype=torch.float32)
X_val = X_val.clone().detach().float() if isinstance(X_val, torch.Tensor) else torch.tensor(X_val, dtype=torch.float32)
y_val = y_val.clone().detach().float() if isinstance(y_val, torch.Tensor) else torch.tensor(y_val, dtype=torch.float32)
X_test = X_test.clone().detach().float() if isinstance(X_test, torch.Tensor) else torch.tensor(X_test, dtype=torch.float32)
y_test = y_test.clone().detach().float() if isinstance(y_test, torch.Tensor) else torch.tensor(y_test, dtype=torch.float32)

In [203]:
# Initialize variables for early stopping
best_val_loss = float('inf')
patience = 5
no_improvement_epochs = 0

# Training loop with early stopping
for epoch in range(num_epochs):
    model.train()

    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Evaluate on validation set
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, y_val)

            # Calculate additional metrics
            mae = mean_absolute_error(y_val.numpy(), val_outputs.numpy())
            r2 = r2_score(y_val.numpy(), val_outputs.numpy())

            print(f"Epoch [{epoch+1}/{num_epochs}], "
                  f"Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, "
                  f"MAE: {mae:.4f}, R^2: {r2:.4f}")

            # Early stopping check
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improvement_epochs = 0
            else:
                no_improvement_epochs += 1

            if no_improvement_epochs >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}. Best Val Loss: {best_val_loss:.4f}")
                break

        model.train()  # Switch back to training mode

TypeError: linear(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

In [134]:
# Test the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test)
    mae_test = mean_absolute_error(y_test.numpy(), test_outputs.numpy())
    r2_test = r2_score(y_test.numpy(), test_outputs.numpy())

    print(f"Test Loss: {test_loss.item():.4f}")
    print(f"Test MAE: {mae_test:.4f}")
    print(f"Test R^2: {r2_test:.4f}")


Test Loss: 0.0001
Test MAE: 0.0076
Test R^2: 0.9969


##3. Compare Jacobians##



3.1. Compute the Jacobian matrix for the learned forward kinematics using automatic differentiation.



In [204]:
# Example input: [j0, j1, cos(j0), cos(j1), sin(j0), sin(j1)]
X_test = torch.tensor([[-3.097, 0.5, 0.3, 0.1, 0.4, 1.0]], dtype=torch.float32)  # 1 sample

# Initialize the model
model = ForwardKinematicsModel()

# Define the FK and FK_Jacobian functions
def FK(model, theta):
    # Reshape to batch size 1
    t = theta.view(1, -1)  # Ensure input is in the correct shape
    out = model(t)
    # Reshape to 1D vector (4D output for 4 outputs)
    out = out.view(-1)
    return out

def FK_Jacobian(model, x):
    # Enable gradient tracking for the input
    x = x.requires_grad_(True)
    y = FK(model, x)

    # Initialize an empty list to store Jacobian rows
    jacobian = []

    # Compute the gradient of each output with respect to each input
    for i in range(len(y)):  # len(y) = 4 (since we have 4 outputs)
        grad_outputs = torch.zeros_like(y)
        grad_outputs[i] = 1.0  # Create a vector with 1.0 at the i-th position

        # Get the gradient of y[i] with respect to the inputs (j0, j1, etc.)
        jacobian_row = torch.autograd.grad(y, x, grad_outputs=grad_outputs, retain_graph=True, create_graph=True)[0]

        # For each output, we are interested in the gradient with respect to j0 and j1 (the first two inputs)
        jacobian.append(jacobian_row[:2])  # Only keep gradients with respect to the first two inputs (j0, j1)

    # Stack the rows to create a 4x2 Jacobian matrix
    jacobian_matrix = torch.stack(jacobian, dim=0)  # This will give a 4x2 matrix
    return jacobian_matrix

# Use the FK and FK_Jacobian functions to compute the Jacobian
jacobian = FK_Jacobian(model, X_test)
print("Jacobian:\n", jacobian)

Jacobian:
 tensor([[[ 0.0338, -0.0060,  0.0148,  0.0256,  0.0068, -0.0353]],

        [[-0.0208,  0.0027,  0.0047, -0.0187, -0.0611, -0.0080]],

        [[ 0.0960, -0.1164, -0.0169,  0.0709,  0.1335, -0.0523]],

        [[-0.0054,  0.0261, -0.1101, -0.0339,  0.0750,  0.0561]]],
       grad_fn=<StackBackward0>)


3.2. Compare the computed Jacobian with the analytical Jacobian for the 2-joint robot.

In [180]:
import numpy as np

# Analytical Jacobian for a 2-DOF robot
def analytical_jacobian_from_test(data):
    # Extract joint angles from data
    j0 = data[0]  # j0
    j1 = data[1]  # j1
    l1 = 1.0  # Link 1 length
    l2 = 1.0  # Link 2 length

    # Calculate the Jacobian elements using the analytical formula
    J = np.array([
        [-l1 * np.sin(j0) - l2 * np.sin(j0 + j1), -l2 * np.sin(j0 + j1)],
        [l1 * np.cos(j0) + l2 * np.cos(j0 + j1), l2 * np.cos(j0 + j1)]
    ])
    return J

# Example input from X_test[0]
# j0, j1, cos(j0), cos(j1), sin(j0)
X_test = X_test[0]
# Compute analytical Jacobian
J_analytical = analytical_jacobian_from_test(X_test_0)
print(f"Analytical Jacobian:\n{J_analytical}")

Analytical Jacobian:
[[-0.39535362 -0.29552021]
 [ 1.95034065  0.95533649]]
