# Script Runge Kutta 4(5)

## Import libraries and overlay

### Import Python libraries

In [1]:
import numpy as np
import timeit
import math
import time
import os
import errno
import pandas as pd

### Import Pynq libraries

In [2]:
from pynq import Overlay
from pynq import allocate

### Import overlay

In [3]:
# Include overlay
# TODO after bitstream generation, move into the folder of Pynq:
#   - `VIVADO_ROOT/euler_propagator_vivado/euler_propagator_vivado.gen/sources_1/bd/design_1/hw_handoff/design_1.hwh` --> rename into `design_1_wrapper.hwh`
#   - `VIVADO_ROOT/euler_propagator_vivado/euler_propagator_vivado.runs/impl_1/design_1_wrapper.bit`

overlay = Overlay("./design_1_wrapper.bit")

In [4]:
# overlay?

## Initialization

## Utils

In [5]:
def write_to_csv(X, filename):
    script_directory = os.getcwd()
    
    # Path to the directory
    dir_path = script_directory + "/orbit_csv"

    print(dir_path)

    # Creates the directory if it doesn't exist
    try:
        os.makedirs(dir_path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise FileNotFoundError("Error: Unable to create directory " + dir_path)

    # Create dataframes for the array and matrix
    X_df = pd.DataFrame(X)
    
    # Write the dataframes to CSV files
    X_df.to_csv(os.path.join(dir_path, filename), index=False, header=None)

    print(filename + " updated.")

### Declare IP 

In [14]:
rk45_ip = overlay.runge_kutta_45_0

In [15]:
rk45_ip?

### Declaration of rk45_ip IP

In [16]:
rk45_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  yy_1 = Register(yy=write-only),
  yy_2 = Register(yy=write-only),
  tt_1 = Register(tt=write-only),
  tt_2 = Register(tt=write-only),
  tf_1 = Register(tf=write-only),
  tf_2 = Register(tf=write-only),
  h0_1 = Register(h0=write-only),
  h0_2 = Register(h0=write-only),
  tol_1 = Register(tol=write-only),
  tol_2 = Register(tol=write-only),
  mu_1 = Register(mu=write-only),
  mu_2 = Register(mu=write-only),
  size = Register(size=0),
  size_ctrl = Register(size_ap_vld=0, RESERVED=0)
}

In [17]:
rk45_ip.register_map.CTRL

Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0)

In [18]:
bin(rk45_ip.read(0x00))

'0b100'

### Constant declaration and initialization

In [19]:
D = 3
N = 2*D

T_REV = 5828.516637686026
N_REV = 5
TF = T_REV*N_REV # Oltre a 1284s l'errore porta h a frazioni di secondo. In simulazione non accade
MU = 398600.4418
TOL = 1e-09

h0 = 15.0
h_min = 0.1
max_rows = math.ceil(TF/h_min) + 1

r0 = np.array([6893.65420319622, 607.768615848904, 1052.68612189611])
v0 = np.array([-1.31035840240472, 3.71570593010086, 6.43579145691966])

## CPU computation

### Functions definition

In [20]:
# RK45 CONSTANTS
C = np.array([0, 1/4, 3/8, 12/13, 1, 1/2])

A = np.array([[0, 0, 0, 0, 0],
              [1/4, 0, 0, 0, 0],
              [3/32, 9/32, 0, 0, 0],
              [1932/2197, -7200/2197, 7296/2197, 0, 0],
              [439/216, -8, 3680/513, -845/4104, 0],
              [-8/27, 2, -3544/2565, 1859/4104, -11/40]])

B = np.array([16/135, 0, 6656/12825, 28561/56430, -9/50, 2/55]) # 5th order
Bs = np.array([25/216, 0, 1408/2565, 2197/4104, -1/5, 0])   # 4th order

def rk_45(f, t0, tf, y0, h, tol):
    
    # vettore degli istanti in cui risolvo la edo
    t_h = np.array([t0])

    h_h = np.array([h])

    # inizializzo il vettore che conterra' la soluzione discreta
    u_h = np.zeros((0,6))
    u_h = np.vstack((u_h, y0))

    while (t_h[-1] < tf):

        if (t_h[-1] + h > tf):
            h = tf - t_h[-1]

        # ciclo iterativo per calcolare la soluzione                
        k_0 = f(t_h + h       , u_h[-1])
        k_1 = f(t_h + C[1] * h, u_h[-1] + h * (A[1,0]*k_0))
        k_2 = f(t_h + C[2] * h, u_h[-1] + h * (A[2,0]*k_0 + A[2,1]*k_1))
        k_3 = f(t_h + C[3] * h, u_h[-1] + h * (A[3,0]*k_0 + A[3,1]*k_1 + A[3,2]*k_2))
        k_4 = f(t_h + C[4] * h, u_h[-1] + h * (A[4,0]*k_0 + A[4,1]*k_1 + A[4,2]*k_2 + A[4,3]*k_3))
        k_5 = f(t_h + C[5] * h, u_h[-1] + h * (A[5,0]*k_0 + A[5,1]*k_1 + A[5,2]*k_2 + A[5,3]*k_3 + A[5,4]*k_4))

        e = h * ( (B[0] - Bs[0])*k_0 + (B[1] - Bs[1])*k_1 + (B[2] - Bs[2])*k_2 + (B[3] - Bs[3])*k_3 + (B[4] - Bs[4])*k_4 + (B[5] - Bs[5])*k_5 )

        err = np.linalg.norm(e)  # I don't use the norm to see the difference with the C implementation
        scale = 1
        tol_step = tol * h / (tf-t0)

        if (err <= tol_step):
            u_h = np.vstack((u_h, u_h[-1] + h * (B[0]*k_0 + B[1]*k_1 + B[2]*k_2 + B[3]*k_3 + B[4]*k_4 + B[5]*k_5)))
            t_h = np.vstack((t_h, t_h[-1] + h))

            h_h = np.vstack(((h_h, h)))
            
            scale = 1.11
            
        else:
            scale = 0.99

        # # compute the optimal step size
        # scale = (2*tol_step/ (err + tol_step))**0.2

        h *= scale

    return t_h, u_h, h_h


def ode(t, y, mu):
    # t: time variable (unused in this function, but required for use with ode45)
    # in_vec: input vector of size 6 containing the position and velocity
    # mu: a constant parameter
    
    # extract the position and velocity vectors from the input
    r = y[0:3]
    v = y[3:6]
    
    # compute the new position and velocity
    r_new = v
    r_norm = np.linalg.norm(r)
    v_new = - r * mu / r_norm**3
    
    # combine the position and velocity into the output vector
    out = np.concatenate((r_new, v_new))
    
    return out

### Computation

In [15]:
ode_wrapper = lambda t, y: ode(t, y, MU)

tic = time.time()
t, y, h = rk_45(ode_wrapper, 0.0, TF, np.concatenate((r0, v0)), h_min, TOL) 
toc = time.time()
print(str(toc-tic) + "s")

[0.1]
[0.211]
[0.33421]
[0.4709731]
[0.62278014]
[0.79128596]
[0.97832741]
[1.18594343]
[1.4163972]
[1.6722009]
[1.956143]
[2.27131872]
[2.62116378]
[3.0094918]
[3.4405359]
[3.91899485]
[4.45008428]
[5.03959355]
[5.69394884]
[6.42028321]
[7.22651437]
[8.12143095]
[9.11478835]
[10.21741507]
[11.44133073]
[12.70758473]
[13.97873168]
[15.24202987]
[16.51020958]
[17.78328967]
[19.04850908]
[20.31861742]
[21.58088344]
[22.84802698]
[24.12006689]
[25.38425254]
[26.65332314]
[27.91455782]
[29.18066603]
[30.45166662]
[31.71481937]
[32.98285307]
[34.25578658]
[35.52086032]
[36.79082243]
[38.05294311]
[39.31994076]
[40.59183422]
[41.85587432]
[43.1247988]
[44.38588827]
[45.65185071]
[46.92270496]
[48.18571228]
[49.45359998]
[50.72638694]
[51.99131502]
[53.26113092]
[54.52310629]
[55.78995806]
[57.06170508]
[58.32559965]
[59.59437803]
[60.8553223]
[62.12113899]
[63.39184692]
[64.65470882]
[65.92245055]
[67.19509096]
[68.45987341]
[69.72954311]
[70.99137318]
[72.25807909]
[73.52967969]
[74.7934287

KeyboardInterrupt: 

### Write result of CPU in a csv file

In [15]:
# y_anormalized = np.concatenate((y[:, :3] * R, y[:, 3:] * V), axis=-1)

write_to_csv(y, "y_rk5_tol09_jupyter_cpu.csv")
write_to_csv(t, "t_rk5_tol09_jupyter_cpu.csv")

/home/davide/Projects/runge_kutta_5/jupyter/adimensional/orbit_csv
rk5_adim_jupyter_cpu_h4.0.csv updated.
/home/davide/Projects/runge_kutta_5/jupyter/adimensional/orbit_csv
t_rk5_adim_jupyter_cpu_h4.0.csv updated.


## FPGA computation

### Prepare memory buffer

In [21]:
buffer_y_FPGA = allocate(( max_rows, N ), np.float64)
buffer_t_FPGA = allocate(( max_rows, ), np.float64)

In [22]:
buffer_y_FPGA[0] = np.concatenate(((r0, v0)))
buffer_t_FPGA[0] = 0.0

In [23]:
print(buffer_y_FPGA)
print(buffer_t_FPGA)

[[ 6.89365420e+03  6.07768616e+02  1.05268612e+03 -1.31035840e+00
   3.71570593e+00  6.43579146e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 ...
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]]
[0. 0. 0. ... 0. 0. 0.]


In [24]:
# https://stackoverflow.com/questions/16444726/binary-representation-of-float-in-python-bits-not-hex

import struct
def float64_to_binary(num):
    return ''.join('{:0>8b}'.format(c) for c in struct.pack('!d', num))

In [25]:
import struct

def binary_to_float64(binary_str):
    # Convert the binary string to a bytes object
    b = int(binary_str, 2).to_bytes(8, byteorder='big')

    # Unpack the bytes object into a float64 value
    f = struct.unpack('!d', b)[0]

    return f

In [26]:
# Write buffer addresses to AXI lite registers
# TODO change addresses accordingly to `VITIS_ROOT/solution1/impl/misc/drivers/euler_propagator_v1_0/src/xeuler_propagator_hw.h`

ADDR_YY   = 0x10
ADDR_TT   = 0x1c
ADDR_TF   = 0x28
ADDR_H0   = 0x34
ADDR_TOL  = 0x40
ADDR_MU   = 0x4c
ADDR_SIZE = 0x58

tf_bin = float64_to_binary(TF)
MSB_tf = int(tf_bin[:32], 2)
LSB_tf = int(tf_bin[32:], 2)

h0_bin = float64_to_binary(h0)
MSB_h = int(h0_bin[:32], 2)
LSB_h = int(h0_bin[32:], 2)

tol_bin = float64_to_binary(TOL)
MSB_tol = int(tol_bin[:32], 2)
LSB_tol = int(tol_bin[32:], 2)

mu_bin = float64_to_binary(MU)
MSB_mu = int(mu_bin[:32], 2)
LSB_mu = int(mu_bin[32:], 2)

rk45_ip.write(ADDR_YY, buffer_y_FPGA.physical_address)
rk45_ip.write(ADDR_TT, buffer_t_FPGA.physical_address)

rk45_ip.write(ADDR_TF       , LSB_tf)
rk45_ip.write(ADDR_TF + 0x04, MSB_tf)

rk45_ip.write(ADDR_H0       , LSB_h)
rk45_ip.write(ADDR_H0 + 0x04, MSB_h)

rk45_ip.write(ADDR_TOL       , LSB_tol)
rk45_ip.write(ADDR_TOL + 0x04, MSB_tol)

rk45_ip.write(ADDR_MU       , LSB_mu)
rk45_ip.write(ADDR_MU + 0x04, MSB_mu)

# Check correctness
# Information for reading and writing more than 32 bit https://discuss.pynq.io/t/how-can-i-write-a-64-bit-number-in-control-register-from-python/5519/3?u=davide-giacomini
tf_memory_raw     = rk45_ip.mmio.read(ADDR_TF,  length=8)
h_memory_raw      = rk45_ip.mmio.read(ADDR_H0,   length=8)
mu_memory_raw     = rk45_ip.mmio.read(ADDR_MU,  length=8)
tol_memory_raw    = rk45_ip.mmio.read(ADDR_TOL,  length=8)

print(      
      TF          == binary_to_float64(bin(tf_memory_raw)[2:].zfill(64))      and
      h0          == binary_to_float64(bin(h_memory_raw)[2:].zfill(64))       and
      MU          == binary_to_float64(bin(mu_memory_raw)[2:].zfill(64))      and
      TOL         == binary_to_float64(bin(tol_memory_raw)[2:].zfill(64))
      )

True


### Declare function

In [27]:
def runge_kutta_5_fpga():
    
    rk45_ip.write(0x00, 1)
    while rk45_ip.read(0x00) & 0x04 != 0x04:
        pass
    # Mark this content invalid, so the processor fetches the data from the FPGA
    buffer_y_FPGA.invalidate()
    buffer_t_FPGA.invalidate()

In [28]:
# The commented code uses the buttons of the FPGA
iterations = 1
time = timeit.timeit(lambda: runge_kutta_5_fpga(), number=iterations)
print('Average of ' + str(time/iterations) + ' seconds')

Average of 110.34538149499895 seconds


In [29]:
size = rk45_ip.mmio.read(ADDR_SIZE,  length=4)

In [30]:
write_to_csv(np.array(buffer_y_FPGA)[:size], "y_fpga_tol09_jupyter.csv")
write_to_csv(np.array(buffer_t_FPGA)[:size], "t_fpga_tol09_jupyter.csv")

/home/xilinx/jupyter_notebooks/runge_kutta_45/orbit_csv
y_fpga_tol09_jupyter.csv updated.
/home/xilinx/jupyter_notebooks/runge_kutta_45/orbit_csv
t_fpga_tol09_jupyter.csv updated.


## Check FPGA vs CPU

In [None]:
# correct = np.allclose(X_FPGA[:], X_CPU[:], rtol=1e-02)

# for i in range(8):
#     rgb[i].off()

# if (correct):
#     rgb[1].on()
#     rgb[4].on()
#     print("Yeee")
# else:
#     rgb[2].on()
#     rgb[5].on()
#     print("Nope")

In [None]:
# Clear the rgb

# for i in range(8):
#     rgb[i].off()

In [None]:
# buffer_X_FPGA.freebuffer()