In [1]:
import torch
import casadi_on_gpu as cog
import os, numpy as np

In [2]:
path = "/home/mr-robot/sci_ws/casadi-on-gpu/src/posterior.bin"
param_dim = cog.DYNAMICS_PARAM_DIM

# the file is float64, we cast to float32 for GPU.
params = np.fromfile(path, dtype=np.float64).reshape(-1, param_dim).astype(np.float32)

In [3]:
N = params.shape[0]
device = "cuda"
sim_p_all = torch.from_numpy(params).to(device)
sim_p_all = sim_p_all.contiguous()
assert sim_p_all.is_cuda and sim_p_all.dtype == torch.float32
assert torch.isfinite(sim_p_all).all()

In [4]:
# Same initial conditions as the C++ demo
sim_x = torch.tensor([0.1 * (i + 1) for i in range(cog.DYNAMICS_STATE_DIM)],
                    device=device, dtype=torch.float32)
sim_u = torch.tensor([0.05 * (i + 1) for i in range(cog.DYNAMICS_CONTROL_DIM)],
                    device=device, dtype=torch.float32)
f_ext = torch.zeros((cog.DYNAMICS_CONTROL_DIM,), device=device, dtype=torch.float32)

sim_x_next_all = torch.zeros((N, cog.DYNAMICS_OUT_DIM), device=device, dtype=torch.float32)

stream = torch.cuda.current_stream().cuda_stream
cog.dynamics_forward(
    sim_x.data_ptr(),
    sim_u.data_ptr(),
    sim_p_all.data_ptr(),
    0.04,
    f_ext.data_ptr(),
    sim_x_next_all.data_ptr(),
    N,
    threads_per_block=128,
    stream_ptr=stream,
    sync=True
)

print(torch.isfinite(sim_x_next_all).all(), sim_x_next_all[0])
print(sim_x_next_all)

tensor(False, device='cuda:0') tensor([0.1262, 0.2371, 0.3277, 0.4756, 0.5371, 0.6842, 0.5045, 0.7987, 0.8608,
        0.7026, 1.9532, 1.3321], device='cuda:0')
tensor([[ 0.1262,  0.2371,  0.3277,  ...,  0.7026,  1.9532,  1.3321],
        [ 0.1263,  0.2387,  0.3274,  ..., -3.0556,  1.4371,  1.2419],
        [ 0.1246,  0.2367,  0.3289,  ..., -0.3697,  2.4083,  1.1635],
        ...,
        [ 0.1259,  0.2366,  0.3277,  ..., -1.2767,  2.0363,  1.2118],
        [ 0.1246,  0.2362,  0.3288,  ..., -0.3268,  2.8331,  1.2396],
        [ 0.1269,  0.2373,  0.3270,  ..., -1.0844,  1.5446,  1.2502]],
       device='cuda:0')


In [5]:
bad_mask = ~torch.isfinite(sim_x_next_all)
bad_rows = bad_mask.any(dim=1).nonzero().squeeze()

print("num bad rows:", bad_rows.numel())
print("first bad rows:", bad_rows.tolist())

num bad rows: 27
first bad rows: [5335, 14711, 14948, 20024, 23836, 26868, 27361, 30188, 30728, 31097, 32035, 32367, 33293, 34664, 38133, 38204, 41643, 49828, 54433, 55219, 56256, 57265, 58309, 59475, 71954, 76139, 77676]
