In [1]:
import cupy as cp

def calculate_forces(positions, G):
    positions_diff = positions[:, cp.newaxis, :] - positions
    distances = cp.linalg.norm(positions_diff, axis=2)
    
    inv_distances_cubed = cp.nan_to_num(1.0 / distances ** 3, nan=0)
    
    forces = cp.sum(inv_distances_cubed[:, :, cp.newaxis] * (positions_diff), axis=1)
    forces *= G
    
    return forces

def update_positions(positions, velocities, forces, dt):
    accelerations = forces 
    new_velocities = velocities + accelerations * dt
    new_positions = positions + new_velocities * dt
    
    return new_positions, new_velocities

def simulate_n_body(positions, velocities, G, dt, num_steps):
    for _ in range(num_steps):
        forces = calculate_forces(positions, G)
        positions, velocities = update_positions(positions, velocities, forces, dt)
    
    return positions, velocities

In [2]:
import numpy as np

def numpy_calculate_forces(positions, G):    
    positions_diff = positions[:, np.newaxis, :] - positions
    distances = np.linalg.norm(positions_diff, axis=2)
    
    inv_distances_cubed = np.nan_to_num(1.0 / distances ** 3, nan=0)
    
    forces = np.sum(inv_distances_cubed[:, :, np.newaxis] * (positions_diff), axis=1)
    forces *= G 
    
    return forces

def numpy_update_positions(positions, velocities, forces, dt):
    accelerations = forces 
    new_velocities = velocities + accelerations * dt
    new_positions = positions + new_velocities * dt
    
    return new_positions, new_velocities

def numpy_simulate_n_body(positions, velocities, G, dt, num_steps):
    for _ in range(num_steps):
        forces = numpy_calculate_forces(positions, G)
        positions, velocities = numpy_update_positions(positions, velocities, forces, dt)
    
    return positions, velocities

In [3]:
# Example usage
num_bodies = 1000
num_dimensions = 3
G = 6.67430e-11  # gravitational constant
dt = 0.01  # time step
num_steps = 1000

positions = np.random.rand(num_bodies, num_dimensions)
velocities = np.random.rand(num_bodies, num_dimensions)

# copy of the same data to device
pos_gpu = cp.asarray(positions)
vel_gpu = cp.asarray(velocities)

# Generate random initial positions, velocities
def cupy_run(positions, velocities):
    # Run the simulation
    final_positions, final_velocities = simulate_n_body(positions, velocities, G, dt, num_steps)

    return final_positions, final_velocities

def numpy_run(positions, velocities):
    # Run the simulation
    final_positions, final_velocities = numpy_simulate_n_body(positions, velocities, G, dt, num_steps)

    return final_positions, final_velocities

In [4]:
# %%timeit
cp_pos, cp_vel = cupy_run(pos_gpu, vel_gpu)

In [5]:
# %%timeit
np_pos, np_vel = numpy_run(positions, velocities)

  inv_distances_cubed = np.nan_to_num(1.0 / distances ** 3, nan=0)


In [6]:
cpu_cp_pos = cp_pos.get()
cpu_cp_vel = cp_vel.get()

In [7]:
np.allclose(np_pos, cpu_cp_pos)

True

## Agnostic code

In [10]:
def agnostic_run(positions, velocities):
    # copy data to device
    xp = cp.get_array_module(positions)
    print(f'using: {xp.__name__}')

    def calculate_forces(positions, G):    
        positions_diff = positions[:, cp.newaxis, :] - positions
        distances = xp.linalg.norm(positions_diff, axis=2)

        inv_distances_cubed = xp.nan_to_num(1.0 / distances ** 3, nan=0)

        forces = xp.sum(inv_distances_cubed[:, :, xp.newaxis] * (positions_diff), axis=1)
        forces *= G

        return forces

    def update_positions(positions, velocities, forces, dt):
        accelerations = forces
        new_velocities = velocities + accelerations * dt
        new_positions = positions + new_velocities * dt

        return new_positions, new_velocities

    def simulate_n_body(positions, velocities, G, dt, num_steps):
        for _ in range(num_steps):
            forces = calculate_forces(positions, G)
            positions, velocities = update_positions(positions, velocities, forces, dt)

        return positions, velocities
    
    pos, vel = simulate_n_body(positions, velocities, G, dt, num_steps)

    return pos, vel

In [11]:
ag_pos, ag_vel = agnostic_run(pos_gpu, vel_gpu)

using: cupy


In [12]:
cpu_cp_pos = ag_pos.get()
cpu_cp_vel = ag_vel.get()

In [13]:
np.allclose(np_pos, cpu_cp_pos)

True