In [1]:
import cv2
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import os
import csv
import pandas as pd
import torch.nn as nn
import torch
from transformers import get_cosine_schedule_with_warmup

  from .autonotebook import tqdm as notebook_tqdm


**Load Data**

In [2]:
# Load the data
data = pd.read_csv('points.csv')

# Remove the 'tensor' prefix and convert to float
data = data.applymap(lambda x: float(x.replace('tensor(', '').replace(')', '')))

# Split the data into 3D and 2D points
input_3d = data[['x_3d', 'y_3d', 'z_3d']].values
target_2d = data[['x_2d', 'y_2d']].values

# Convert to tensors
input_3d = torch.tensor(input_3d, dtype=torch.float32)
target_2d = torch.tensor(target_2d, dtype=torch.float32)

# Print tensor shape
print('input_3d.shape:', input_3d.shape)
print('target_2d.shape:', target_2d.shape)
# print warning if any nan or inf
if torch.isnan(input_3d).any() or torch.isnan(target_2d).any() or torch.isinf(input_3d).any() or torch.isinf(target_2d).any():
    print('Warning: nan or inf found in input_3d or target_2d')

input_3d.shape: torch.Size([197, 3])
target_2d.shape: torch.Size([197, 2])


**Camera Model**

In [3]:
def apply_camera_model(predicted_params, points_3d):
    """
    Apply the camera model to project 3D points to 2D using predicted camera parameters.
    
    :param predicted_params: Tensor containing the predicted camera parameters [f_x, f_y, c_x, c_y, k1, k2, k3, p_1, p_2]
    :param points_3d: Tensor containing 3D points in camera coordinates, shape [N, 3]
    :return: Tensor containing the projected 2D points, shape [N, 2]
    """
    predicted_params = predicted_params.squeeze()

    f_x, f_y, c_x, c_y, k1, k2, k3, p_1, p_2 = predicted_params

    f_x =  f_x
    f_y = f_y 
    c_x = c_x 
    c_y = c_y 
    #print(f_x, f_y, c_x, c_y, k1, k2, k3, p_1, p_2)

    # Unpack K matrix
    
    # Normalize 3D points (X_c/Z_c, Y_c/Z_c)
    X_c = points_3d[:, 0]
    Y_c = points_3d[:, 1]
    Z_c = points_3d[:, 2] + 1e-6
    x_normalized = X_c / Z_c
    y_normalized = Y_c / Z_c
    
    # Compute r^2 = x_normalized^2 + y_normalized^2
    r_squared = x_normalized**2 + y_normalized**2
    
    # Apply radial distortion correction
    radial_factor = 1 + k1 * r_squared + k2 * r_squared**2 + k3 * r_squared**3
    x_radial = x_normalized * radial_factor
    y_radial = y_normalized * radial_factor
    
    # Apply tangential distortion correction
    x_tangential = 2 * p_1 * x_normalized * y_normalized + p_2 * (r_squared + 2 * x_normalized**2)
    y_tangential = p_1 * (r_squared + 2 * y_normalized**2) + 2 * p_2 * x_normalized * y_normalized
    
    # Combine radial and tangential distortions
    x_distorted = x_radial + x_tangential
    y_distorted = y_radial + y_tangential
    
    # Convert distorted coordinates back to pixel coordinates
    u = f_x * x_distorted + c_x
    v = f_y * y_distorted + c_y
    
    # Stack the 2D points into a single tensor
    points_2d = torch.stack([u, v], dim=-1)

    img_width = 240
    img_height = 520

    # Normalise the points
    points_2d[:, 0] = points_2d[:, 0] / img_width
    points_2d[:, 1] = points_2d[:, 1] / img_height

    
    return points_2d

In [4]:
# Make neural network
class CameraModel(nn.Module):
    def __init__(self):
        super(CameraModel, self).__init__()
        # Input is 3D points, output is predicted params size 9
        self.fc1 = nn.Linear(3, 9)
        self.fc2 = nn.Linear(9, 9)

        # initial guesses [500, 500, 300, 300, 0.01, 0.01, 0.01, 0.01, 0.01] of output change bias
        #self.fc1.bias.data = torch.tensor([500, 500, 300, 300, 0.01, 0.01, 0.01, 0.01, 0.01], dtype=torch.float32)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)

        # First 4 outputs between 0 and 1000
        x[:, 0] = torch.sigmoid(x[:, 0]) * 1000
        x[:, 1] = torch.sigmoid(x[:, 1]) * 1000
        x[:, 2] = torch.sigmoid(x[:, 2]) * 1000
        x[:, 3] = torch.sigmoid(x[:, 3]) * 1000

        # Last 5 outputs between -1 and 1
        x[:, 4] = torch.tanh(x[:, 4])
        x[:, 5] = torch.tanh(x[:, 5])
        x[:, 6] = torch.tanh(x[:, 6])
        x[:, 7] = torch.tanh(x[:, 7])
        x[:, 8] = torch.tanh(x[:, 8])

        return x
    

# Create the model
model = CameraModel()
# After model initialization, check the initial predictions
initial_pred_params = model(input_3d)
print("Initial Predicted Parameters:", initial_pred_params)

Initial Predicted Parameters: tensor([[ 4.2576e+02,  2.7939e+02,  4.0960e+02,  ..., -6.3437e-01,
         -4.3137e-01,  2.1490e-01],
        [ 4.2635e+02,  2.8092e+02,  4.1316e+02,  ..., -6.2891e-01,
         -4.3124e-01,  2.0730e-01],
        [ 4.2666e+02,  2.8183e+02,  4.1561e+02,  ..., -6.2528e-01,
         -4.3159e-01,  2.0228e-01],
        ...,
        [ 2.3313e+02,  1.4894e+02,  5.4689e+02,  ..., -3.3611e-01,
         -2.3022e-01,  7.1777e-01],
        [ 2.4413e+02,  1.5559e+02,  5.2744e+02,  ..., -3.8133e-01,
         -2.3334e-01,  6.9938e-01],
        [ 2.4951e+02,  1.5869e+02,  5.3910e+02,  ..., -3.5205e-01,
         -2.2979e-01,  6.9266e-01]], grad_fn=<CopySlices>)


**Neural Network**

In [5]:

# Define the loss function
criterion = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Define the learning rate scheduler
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=1000)

# Define the maximum norm for the gradients
max_grad_norm = 1.0

# Train the model
num_epochs = 1000
for epoch in range(num_epochs):
    # Print model weights and biases
    print("Model Weights and Biases:")
    for name, param in model.named_parameters():
        print(name, param.data)

    predicted_parameters = model(input_3d)
    if torch.isnan(predicted_parameters).any():
        print(f"NaNs in predicted parameters at epoch due to predparams {epoch}")
        break
    
    predicted_parameters_mean = torch.mean(predicted_parameters, dim=0)
    print(' predicted params mean', predicted_parameters_mean)
    predicted_2d = apply_camera_model(predicted_parameters_mean, input_3d)
    if torch.isnan(predicted_2d).any():
        print(f"NaNs in predicted 2D points at epoch {epoch}")
        break

    # print shapes of predicted_2d and target_2d
    print('predicted_2d.shape:', predicted_2d.shape)
    print('target_2d.shape:', target_2d.shape)
    loss = criterion(predicted_2d, target_2d)
    if torch.isnan(loss):
        print(f"NaNs in loss at epoch {epoch}")
        break

    # Backward pass
    optimizer.zero_grad()

    # Perform backpropagation
    loss.backward()

    # Gradient clipping
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

    # Update the weights
    optimizer.step()

    # Update the learning rate
    scheduler.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')




Model Weights and Biases:
fc1.weight tensor([[ 0.3998, -0.2866,  0.3898],
        [-0.1662,  0.0182,  0.3792],
        [-0.5714,  0.0038,  0.0243],
        [ 0.3180,  0.2138, -0.1072],
        [ 0.2047, -0.1865, -0.0146],
        [-0.4077, -0.0153,  0.2559],
        [ 0.3794,  0.3467,  0.3448],
        [ 0.1361, -0.0402,  0.2228],
        [-0.2950,  0.0619, -0.5731]])
fc1.bias tensor([-0.4851,  0.0483, -0.5558,  0.1374, -0.0189, -0.5002,  0.2897, -0.4367,
        -0.2480])
fc2.weight tensor([[-0.2944, -0.0159,  0.0388,  0.3086,  0.1204,  0.0346, -0.2327, -0.2463,
          0.0847],
        [-0.2147, -0.3289,  0.0657,  0.2757, -0.2158, -0.2080, -0.3190,  0.1232,
         -0.1537],
        [ 0.0015,  0.0793, -0.0657,  0.1321,  0.2011,  0.2203, -0.2893,  0.3308,
         -0.0745],
        [-0.1420,  0.0158,  0.2514, -0.3319, -0.2378, -0.1294, -0.3252,  0.3045,
          0.2267],
        [ 0.1668, -0.1102,  0.2499,  0.1753, -0.0994, -0.0689,  0.3255, -0.2859,
          0.0018],
        [ 0