In [None]:
!pip install pycuda

In [None]:
import pycuda.autoinit
import numpy as np
from pycuda import gpuarray
from pycuda.compiler import SourceModule
import math

In [None]:
class MinimumEnergyControl:
    def __init__(self, x_des, x_0, step=50, dt=0.05, damping=False):

        ## gravity, criterion: moon
        gravity = -1.62     # N/kg

        ## no drag or something disturb movement
        if not damping:
            ## A
            self.state_transition_matrix = \
                np.array([[ 1, 0, 0,dt, 0, 0],
                          [ 0, 1, 0, 0,dt, 0],
                          [ 0, 0, 1, 0, 0,dt],
                          [ 0, 0, 0, 1, 0, 0],
                          [ 0, 0, 0, 0, 1, 0],
                          [ 0, 0, 0, 0, 0, 1]])

            ## B
            input_matrix = \
                np.array([[dt*dt/2,      0,      0],
                          [      0,dt*dt/2,      0],
                          [      0,      0,dt*dt/2],
                          [     dt,      0,      0],
                          [      0,     dt,      0],
                          [      0,      0,     dt]])
            
            self.input_matrix = gpuarray.to_gpu(np.float32(np.array([0.5*dt*dt, dt])))

            ## g
            gravity_matrix = \
                np.array([[              0],
                          [              0],
                          [gravity*dt*dt/2],
                          [              0],
                          [              0],
                          [     gravity*dt]])
                
            self.gravity_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*gravity*dt*dt, gravity*dt])))

        ## drag or something exist...
        else:
            pass

        ## desired state: x_des
        self.x_des = gpuarray.to_gpu(np.float32(x_des))

        ## initial state: x_0
        self.x_0 = gpuarray.to_gpu(np.float32(x_0))

        self.dt = np.float32(dt)

        self.step = np.int32(step)

        ## weight
        self.rho = np.int32(3)

        ## G, gram_G, Q
        self.G = gpuarray.to_gpu(np.float32(np.zeros((6,3*self.step))))
        self.gram_G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,3*self.step))))
        self.Q = gpuarray.to_gpu(np.float32(np.zeros((6,1))))

    def define_object_function(self):

        ## 6 x 150 matrix...
        self.G = np.zeros((6,3*self.step))
        
        for n in range(self.step):
            ## could be calculated in GPU
            value = self.input_matrix[0,0] + (self.step - n - 1) * self.input_matrix[3,0]

            self.G[0,3*n+0] = value
            self.G[1,3*n+1] = value
            self.G[2,3*n+2] = value

            self.G[3,3*n+0] = self.dt
            self.G[4,3*n+1] = self.dt
            self.G[5,3*n+2] = self.dt

        ## 6 x 1 matrix...
        self.Q = np.zeros((6,1))

        for n in range(self.step):
           ## could be calculated in GPU with shared memory 
           self.Q[2] += self.gravity[2] + (n * self.dt) * self.gravity[5]
           self.Q[5] += self.gravity[5]

        ## 6 x 6 matrix...
        value = self.step * self.dt

        A_power_n = \
            np.array([[    1,    0,    0,value,    0,    0],
                      [    0,    1,    0,    0,value,    0],
                      [    0,    0,    1,    0,    0,value],
                      [    0,    0,    0,    1,    0,    0],
                      [    0,    0,    0,    0,    1,    0],
                      [    0,    0,    0,    0,    0,    1]])
                      
        ## 6 x 1 matrix...
        self.C = self.x_des - self.Q - np.dot(A_power_n, self.x_0)

        ## object function: norm(Ax - b)
        self.rho_matrix = math.sqrt(self.rho) * np.identity(3*self.step)
        self.A = np.vstack((self.G, self.rho_matrix))

        self.b = np.vstack((self.C, np.zeros(3*self.step)))

        ## get gradient: (gram(G) + rho) @ u - G.T @ C
        self.gram_G = np.zeros((3*self.step,3*self.step))

        for p in range(self.step):
            for q in range(self.step):
                ## could be calculated in GPU with 2 dimension thread
                self.gram_G[3*p+0,3*q+0] = self.G[0,3*p+0]*self.G[0,3*q+0] + self.G[3,3*p+0]*self.G[3,3*q+0] + self.rho
                self.gram_G[3*p+1,3*q+1] = self.G[1,3*p+1]*self.G[1,3*q+1] + self.G[3,3*p+1]*self.G[3,3*q+1] + self.rho
                self.gram_G[3*p+2,3*q+2] = self.G[1,3*p+2]*self.G[1,3*q+2] + self.G[3,3*p+2]*self.G[3,3*q+2] + self.rho

        self.G_C = np.zeros((3*self.step,1))

    def define_object_function_at_kernel(self):
        self.ker_function()

        self.get_G_matrix(self.input_matrix, self.dt, self.G)
        self.get_Q_matrix()

    def ker_function(self):

        ## block=(6,1,1), grid=(self.step,1,1)
        get_G_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_G_matrix(float* input_matrix, float dt, float* G) {
            // 6: DOF, 18: DOF*axis
            int index = tx + (tx%3) * 6 + bx * 18;

            if (tx < 3) {
                float value;
                value = input_matrix[0] + (step - bx - 1) * input_matrix[1];

                G[index] = value;
            }
            else {
                G[index] = dt;
            }

            __syncthreads();
        }
        """
        get_G_matrix_ker = SourceModule(get_G_matrix_ker_function)

        ## block=(self.step,1,1), grid=(2,1,1)
        get_Q_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_Q_matrix(float* gravity, float dt, float* x_des, float* x_0, float* Q) {
            
            __shared__ float value[50];
            
            if (bx == 0) {
                value[tx] = gravity[0] + (tx * dt) * gravity[1];
            }
            else {
                value[tx] = gravity[1];
            }

            __syncthreads();

            if (bx == 0) {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[2] += value[i];
                    }
                }
            }
            else {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[5] += value[i];
                    }
                }
            }

            __syncthreads();
        }
        """
        get_Q_matrix_ker = SourceModule(get_Q_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,self.step,1)
        get_G_gram_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define by (blockIdx.y)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_gram_matrix(float* G, float rho, float* gram_G) {
            // 9: axis, 151: axis*step+1, 450: axis*axis*step
            int index1 = tx * 151 + bx * 3 + by * 450;

            // 7: DOF+1, 18: DOF*axis
            int index2 = tx * 7 + bx * 18;
                
            float value;
            value = G[index2] * G[index2] + G[index2+3] * G[index2+3];

            gram_G[index1] = value; 

            if (bx == by) {
                gram_G[index1] += rho;
            }
            else {
                gram_G[index1] += 0;
            }

            __syncthreads();
        }
        """
        get_G_gram_matrix_ker = SourceModule(get_G_gram_matrix_ker_function)

        self.get_G_matrix = get_G_matrix_ker.get_funtion("get_G_matrix")
        self.get_Q_matrix = get_Q_matrix_ker.get_funtion("get_Q_matrix")
        self.get_G_gram_matrix = get_G_gram_matrix_ker.get_function("get_G_gram_matrix")