In [1]:
!pip install pycuda

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycuda
  Downloading pycuda-2022.1.tar.gz (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 35.9 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting mako
  Downloading Mako-1.2.1-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 7.9 MB/s 
[?25hCollecting pytools>=2011.2
  Downloading pytools-2022.1.12.tar.gz (70 kB)
[K     |████████████████████████████████| 70 kB 8.1 MB/s 
Collecting platformdirs>=2.2.0
  Downloading platformdirs-2.5.2-py3-none-any.whl (14 kB)
Building wheels for collected packages: pycuda, pytools
  Building wheel for pycuda (PEP 517) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2022.1-cp37-cp37m-linux_x86_64.whl size=629484 sha256=7f6c699b6c0df31a977795d27569e07d6dd8a801109d

In [2]:
import pycuda.autoinit
from pycuda.compiler import SourceModule
from pycuda import gpuarray
import numpy as np
import math

In [6]:
import pycuda.driver as drv

drv.init()

for i in range(drv.Device.count()):
    gpu_device = drv.Device(i)
    print(f"Device {i}: {gpu_device.name()}")
    compute_capability = float("%d.%d" % gpu_device.compute_capability())
    print(f"\t Compute Capability: {compute_capability}")
    print(f"\t Total Memory: {gpu_device.total_memory() // (1024 ** 2)} megabytes")

Device 0: Tesla T4
	 Compute Capability: 7.5
	 Total Memory: 15109 megabytes


In [7]:
a = gpuarray.to_gpu(np.float32(np.zeros((1,1))))
a.nbytes

4

# Past function

## MEC(minimum energy control)

In [None]:
class MinimumEnergyControl:
    def __init__(self, x_des, x_0, step=50, dt=0.05, damping=False):

        ## gravity, criterion: moon
        gravity = -1.62     # N/kg

        ## no drag or something disturb movement
        if not damping:
            ## A
            self.state_transition_matrix = \
                np.array([[ 1, 0, 0,dt, 0, 0],
                          [ 0, 1, 0, 0,dt, 0],
                          [ 0, 0, 1, 0, 0,dt],
                          [ 0, 0, 0, 1, 0, 0],
                          [ 0, 0, 0, 0, 1, 0],
                          [ 0, 0, 0, 0, 0, 1]])

            ## B
            input_matrix = \
                np.array([[dt*dt/2,      0,      0],
                          [      0,dt*dt/2,      0],
                          [      0,      0,dt*dt/2],
                          [     dt,      0,      0],
                          [      0,     dt,      0],
                          [      0,      0,     dt]])
            
            self.input_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*dt*dt, dt])))

            ## g
            gravity_matrix = \
                np.array([[              0],
                          [              0],
                          [gravity*dt*dt/2],
                          [              0],
                          [              0],
                          [     gravity*dt]])
                
            self.gravity_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*gravity*dt*dt, gravity*dt])))

        ## drag or something exist...
        else:
            pass

        ## desired state: x_des
        self.x_des = gpuarray.to_gpu(np.float32(x_des))

        ## initial state: x_0
        self.x_0 = gpuarray.to_gpu(np.float32(x_0))

        self.dt = np.float32(dt)

        self.step = step

        ## weight
        self.rho = 1/3

    def run(self):
        self.get_gradient(self.gram_G,
                          self.u,
                          self.G_C,          
                          self.iteration,
                          self.gradient,
                          block=(self.TPB,1,1),
                          grid=(3*self.step,1,1))

    def optimal_size(self, n):
        thread_per_block = int(math.sqrt(n/2))

        iteration = int(n / thread_per_block) + 1

        return thread_per_block, np.int32(iteration)

    def define_problem(self):

        ## define matrices
        self.define_matrix_for_object_function()

        ## define matrices' value
        self.define_object_function_at_kernel()

    def define_matrix_for_object_function(self):
        self.rho_matrix = \
            gpuarray.to_gpu(np.float32(math.sqrt(self.rho) * np.identity(3*self.step)))
 
        ## solution!!!
        self.u = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))

        ## G, gram_G, Q
        self.G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,6)).reshape(6*3*self.step)))
        self.gram_G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,3*self.step)).reshape(3*3*self.step*self.step)))
        self.Q = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.C = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.G_C = gpuarray.to_gpu(np.float32(np.zeros((150,1))))
        self.gradient = gpuarray.to_gpu(np.float32(np.zeros((150,1))))

        ## TPB: thread_per_block, BPG: block_per_grid
        self.TPB, self.iteration = self.optimal_size(3*self.step)

    def define_object_function_at_kernel(self):
        self.ker_function()

        self.get_G_matrix(self.input_matrix, self.dt, self.G, block=(6,1,1), grid=(self.step,1,1))
        self.get_Q_matrix(self.gravity_matrix, self.dt, self.Q, block=(self.step,1,1), grid=(2,1,1))
        self.get_G_gram_matrix(self.G, self.rho_matrix, self.gram_G, block=(3,1,1), grid=(self.step,self.step,1))
        self.get_G_C_matrix(self.G, self.x_des, self.x_0, self.Q, self.C, self.G_C, block=(3,1,1), grid=(self.step,1,1))

    def ker_function(self):
        ## We'll gonna do 150 x 150 @ 150 x 1
        ## block=(thread_per_block,1,1), grid=(3*self.step,1,1)
        get_gradient_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_gradient(float* matrix, float* vector1, float* vector2, int iteration, float* result) {

            __shared__ float result_jerk[1000];

            result_jerk[tx] = 0.0;

            for (int i = 0; i < iteration; i++) {
                int index1 = i + tx * iteration;
                int index2 = index1 + bx * 150;

                if (index1 < gs) {
                    result_jerk[tx] += matrix[index2] * vector1[index1];
                }
                else {
                    result_jerk[1000-tx] = 0;
                }
            }

            __syncthreads();

            if (tx == 0) {
                for (int j = 0; j < bs; j++) {
                    result[bx] += result_jerk[j];
                }

                result[bx] -= vector2[bx];
            }
            else {
                result_jerk[1000-tx] = 0;
            }

            __syncthreads();
        }
        """
        get_gradient_ker = SourceModule(get_gradient_ker_function)

        ## block=(6,1,1), grid=(self.step,1,1)
        get_G_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_matrix(float* input_matrix, float dt, float* G) {
            // 6: DOF, 18: DOF*axis
            int index = tx + (tx%3) * 6 + bx * 18;

            if (tx < 3) {
                float value;
                value = input_matrix[0] + (step - bx - 1) * input_matrix[1];

                G[index] = value;
            }
            else {
                G[index] = dt;
            }

            __syncthreads();
        }
        """
        get_G_matrix_ker = SourceModule(get_G_matrix_ker_function)

        ## block=(self.step,1,1), grid=(2,1,1)
        get_Q_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_Q_matrix(float* gravity, float dt, float* Q) {
            
            __shared__ float value[50];
            
            if (bx == 0) {
                value[tx] = gravity[0] + (tx * dt) * gravity[1];
            }
            else {
                value[tx] = gravity[1];
            }

            __syncthreads();

            if (bx == 0) {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[2] += value[i];
                    }
                }
            }
            else {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[5] += value[i];
                    }
                }
            }

            __syncthreads();
        }
        """
        get_Q_matrix_ker = SourceModule(get_Q_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,self.step,1)
        get_G_gram_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define by (blockIdx.y)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_gram_matrix(float* G, float* rho_matrix, float* gram_G) {
            // 9: axis, 151: axis*step+1, 450: axis*axis*step
            int index1 = tx * 151 + bx * 3 + by * 450;

            // 7: DOF+1, 18: DOF*axis
            int index2 = tx * 7 + bx * 18;
                
            float value;
            value = G[index2] * G[index2] + G[index2+3] * G[index2+3];

            gram_G[index1] = value; 

            __syncthreads();

            gram_G[index1] += rho_matrix[index1]*rho_matrix[index1];

            __syncthreads();
        }
        """
        get_G_gram_matrix_ker = SourceModule(get_G_gram_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,1,1)
        get_G_C_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __global__ void get_G_C_matrix(float* G, float* x_des, float* x_0, float* Q, float* C, float* G_C) {
            // C first in each block
            __shared__ float C_jerk[6];

            C_jerk[tx] = x_des[tx] - Q[tx] - x_0[tx];
            C_jerk[tx+3] = x_des[tx+3] - Q[tx+3] - x_0[tx+3];

            __syncthreads();

            C[tx] = C_jerk[tx];
            C[tx+3] = C_jerk[tx+3];

            __syncthreads();

            // G_C Next
            int index1 = tx * 7 + bx * 18;
            int index2 = tx + bx * 3;

            float value;
            value = G[index1] * C_jerk[tx] + G[index1+3] * C_jerk[tx+3];

            __syncthreads();

            G_C[index2] = value;

            __syncthreads();
        }
        """
        get_G_C_matrix_ker = SourceModule(get_G_C_matrix_ker_function)

        self.get_G_matrix = get_G_matrix_ker.get_function("get_G_matrix")
        self.get_Q_matrix = get_Q_matrix_ker.get_function("get_Q_matrix")
        self.get_G_gram_matrix = get_G_gram_matrix_ker.get_function("get_G_gram_matrix")
        self.get_G_C_matrix = get_G_C_matrix_ker.get_function("get_G_C_matrix")
        self.get_gradient = get_gradient_ker.get_function("get_gradient")

In [None]:
class OptimizerForGuidance:
    def __init__(self, length, learning_rate):
        self.length = length
        self.learning_rate = np.float32(learning_rate)
        self.kernel_function()

    def run(self, theta, gradient):
        ## theta, gradient: gpuarray type variable
        self.basic_optimizer(theta,
                             gradient,
                             self.learning_rate,
                             block=(self.length,1,1),
                             grid=(1,1,1))

    def kernel_function(self):
        ## block=(length,1,1), grid=(1,1,1)
        basic_optimizer_ker_function = \
        """
        #define x (threadIdx.x)

        __global__ void basic_optimizer(float* theta, float* gradient, float learning_rate) {
            theta[x] -= gradient[x] * learning_rate;

            __syncthreads();
        }
        """
        basic_optimizer_ker = SourceModule(basic_optimizer_ker_function)

        self.basic_optimizer = basic_optimizer_ker.get_function("basic_optimizer")

## Constraint

In [None]:
class ConstraintsForInput:
    def __init__(self, problem, upper_boundary, downer_boundary):
        ## ex> MEC(minimum energy control)
        self.problem = problem

        self.upper_boundary = np.float32(upper_boundary)
        self.downer_boundary = np.float32(downer_boundary)

        self.kernel_function()

    def projection(self):
        self.project_function(self.problem.u,
                              self.upper_boundary,
                              self.downer_boundary,
                              block=(3,1,1),
                              grid=(self.problem.step,1,1))

    def kernel_function(self):
        ## block=(3,1,1), grid=(problem.step,1,1)
        projection_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float x, float y, float z) {
            float value;
            float norm;

            value = x * x + y * y + z * z;
            norm = square_root(value);

            return norm;    
        }

        __global__ void projection(float* theta, float upper_boundary, float downer_boundary) {
            __shared__ float u[3];
            __shared__ float norm[1];
            float value;

            int index = tx + bx * 3;

            u[tx] = theta[index];

            __syncthreads();

            if (tx == 0) {
                norm[0] = get_norm(u[0], u[1], u[2]);
            } 

            __syncthreads();

            if ((norm[0] > downer_boundary) && (norm[0] < upper_boundary)) {
                value = u[tx];
            }
            else {
                value = u[tx] * upper_boundary / norm[0];
            }

            __syncthreads();

            theta[index] = value;
        }
        """
        projection_ker = SourceModule(projection_ker_function)

        self.project_function = projection_ker.get_function("projection")

## Redefine Solver

In [None]:
class MinimumEnergyControlSolver:
    def __init__(self, x_des, x_0, upper_boundary, downer_boundary, max_iteration=100):
        ## max_iteration
        self.max_iteration = max_iteration

        ## initialize MEC(minimum energy control)
        self.MEC = MinimumEnergyControl(x_des, x_0)

        ## initialize optimizer
        learning_rate = 1e-4

        self.optimizer = OptimizerForGuidance(3*self.MEC.step, learning_rate)

        ## constraint
        self.upper_boundary = upper_boundary
        self.downer_boundary = downer_boundary

        self.constraint = ConstraintsForInput(self.MEC, self.upper_boundary, self.downer_boundary)

        ## evaluate
        self.error_vector = gpuarray.to_gpu(np.float32(np.zeros((3*self.MEC.step+6,1))))
        self.error = gpuarray.to_gpu(np.float32(np.zeros((1,self.max_iteration)))) 

        ## TPB = 5, iteration = 10
        self.TPB, self.iteration = self.MEC.optimal_size(self.MEC.step)

        self.kernel_function()

    def solve(self):
        ## define problem: fit matrices for left step
        self.MEC.define_problem()

        for i in range(100):
            ## get_gradient
            self.MEC.run()

            ## optimize
            self.optimizer.run(self.MEC.u, self.MEC.gradient)

            ## constraint
            self.constraint.projection()

            ## evaluate
            self.evaluate(i)


    def evaluate(self, current_iter):
        
        ## evaluate learning
        self.get_error_vector(self.MEC.G,
                              self.MEC.rho_matrix,
                              self.MEC.u,
                              self.MEC.C,
                              self.iteration, 
                              self.error_vector, 
                              block=(self.TPB,1,1),
                              grid=(156,1,1))
        
        self.get_error(self.error_vector,
                       self.error,
                       np.int32(current_iter),
                       block=(156,1,1),
                       grid=(1,1,1))
        
    def kernel_function(self):
        ## We'll gonna do 156 x 150 @ 150 x 1
        ## block=(TPB,1,1), grid=(156,1,1)
        get_error_vector_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)

        __global__ void get_error_vector(float* G, float* rho_matrix, float* u, float* C, int iteration, float* error_vector) {

            if (bx < 6) {
                
                __shared__ float value[100];

                value[tx] = 0.0;

                __syncthreads();

                for (int i = 0; i < iteration; i++) {
                    int index1 = bx % 3;
                    int index2 = i * 5 + tx % 5;
                    int index3 = index1 + index2*3;

                    // 7: DOF+1, 90: 5*DOF*axis
                    int index4 = bx + index1*6 + index2*18;

                    value[tx] += G[index4] * u[index3];
                }

                __syncthreads();
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    value[50] = 0.0;
                    for (int j = 0; j < bs; j++) {
                        value[50] += value[j];
                    }
                 
                    error_vector[bx] = value[50] - C[bx];
                }
                __syncthreads();
            }
            else {
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    int index1 = bx - 6;
                    int index2 = index1 * 151;

                    error_vector[bx] = rho_matrix[index2] * u[index1];
                }

                __syncthreads();
            }
        }
        """

        ## block=(156,1,1), grid=(1,1,1)
        get_error_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bs (blockDim.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float* vector, int length) {
            float value = 0.0;
            float norm;

            for (int i = 0; i < length; i++) {
                value += vector[i] * vector[i];
            } 

            norm = square_root(value);

            return norm;    
        }


        __global__ void get_error(float* error_vector, float* error, int current_iter) {

            __shared__ float value[1000];

            value[tx] = error_vector[tx];

            __syncthreads();

            if (tx == 0) {
                int length = bs;

                error[current_iter] = get_norm(value, length);
            }
            else {

                value[1000-tx] = 0.0;
            }
            
            __syncthreads();
        }
        """
        get_error_vector_ker = SourceModule(get_error_vector_ker_function)
        get_error_ker = SourceModule(get_error_ker_function)

        self.get_error_vector = get_error_vector_ker.get_function("get_error_vector")
        self.get_error = get_error_ker.get_function("get_error")

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
MECS.solve()

MECS.MEC.u[:12]

array([[-0.3841809 ],
       [ 0.        ],
       [ 5.787262  ],
       [-0.38417292],
       [ 0.        ],
       [ 5.787263  ],
       [-0.38416386],
       [ 0.        ],
       [ 5.787264  ],
       [-0.38415447],
       [ 0.        ],
       [ 5.787265  ]], dtype=float32)

# Test flexibility
## Fail...
May be memory ... 빵!

Few kernel functions design to calculate matrix which has size 150.... 

To make full flexibility, I need to make all function can deal with changable step...

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
class MinimumEnergyControlSolver:
    def __init__(self, x_des, x_0, upper_boundary, downer_boundary, max_iteration=100):
        ## max_iteration
        self.max_iteration = max_iteration

        ## initialize MEC(minimum energy control)
        self.MEC = MinimumEnergyControl(x_des, x_0)

        ## initialize optimizer
        learning_rate = 1e-4

        self.optimizer = OptimizerForGuidance(3*self.MEC.step, learning_rate)

        ## constraint
        self.upper_boundary = upper_boundary
        self.downer_boundary = downer_boundary

        self.constraint = ConstraintsForInput(self.MEC, self.upper_boundary, self.downer_boundary)

        ## evaluate
        self.error_vector = gpuarray.to_gpu(np.float32(np.zeros((3*self.MEC.step+6,1))))
        self.error = gpuarray.to_gpu(np.float32(np.zeros((1,self.max_iteration)))) 

        ## TPB = 5, iteration = 10
        self.TPB, self.iteration = self.MEC.optimal_size(self.MEC.step)

        self.kernel_function()

    def solve(self):
        ## define problem: fit matrices for left step
        self.MEC.define_problem()

        for i in range(100):
            ## get_gradient
            self.MEC.run()

            ## optimize
            self.optimizer.run(self.MEC.u, self.MEC.gradient)

            ## constraint
            self.constraint.projection()

            ## evaluate
            self.evaluate(i)


    def evaluate(self, current_iter):
        ## set size
        block_size = 3 * self.MEC.step + 6
        grid_size  = 3 * self.MEC.step + 6
        
        ## evaluate learning
        self.get_error_vector(self.MEC.G,
                              self.MEC.rho_matrix,
                              self.MEC.u,
                              self.MEC.C,
                              self.iteration, 
                              self.error_vector, 
                              block=(self.TPB,1,1),
                              grid=(grid_size,1,1))
        
        self.get_error(self.error_vector,
                       self.error,
                       np.int32(current_iter),
                       block=(block_size,1,1),
                       grid=(1,1,1))
        
    def kernel_function(self):
        ## We'll gonna do 156 x 150 @ 150 x 1
        ## block=(TPB,1,1), grid=(156,1,1)
        get_error_vector_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)

        __global__ void get_error_vector(float* G, float* rho_matrix, float* u, float* C, int iteration, float* error_vector) {

            if (bx < 6) {
                
                __shared__ float value[100];

                value[tx] = 0.0;

                __syncthreads();

                for (int i = 0; i < iteration; i++) {
                    int index1 = bx % 3;
                    int index2 = i * 5 + tx % 5;
                    int index3 = index1 + index2*3;

                    // 7: DOF+1, 90: 5*DOF*axis
                    int index4 = bx + index1*6 + index2*18;

                    value[tx] += G[index4] * u[index3];
                }

                __syncthreads();
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    value[50] = 0.0;
                    for (int j = 0; j < bs; j++) {
                        value[50] += value[j];
                    }
                 
                    error_vector[bx] = value[50] - C[bx];
                }
                __syncthreads();
            }
            else {
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    int index1 = bx - 6;
                    int index2 = index1 * 151;

                    error_vector[bx] = rho_matrix[index2] * u[index1];
                }

                __syncthreads();
            }
        }
        """

        ## block=(156,1,1), grid=(1,1,1)
        get_error_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bs (blockDim.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float* vector, int length) {
            float value = 0.0;
            float norm;

            for (int i = 0; i < length; i++) {
                value += vector[i] * vector[i];
            } 

            norm = square_root(value);

            return norm;    
        }


        __global__ void get_error(float* error_vector, float* error, int current_iter) {

            __shared__ float value[1000];

            value[tx] = error_vector[tx];

            __syncthreads();

            if (tx == 0) {
                int length = bs;

                error[current_iter] = get_norm(value, length);
            }
            else {

                value[1000-tx] = 0.0;
            }
            
            __syncthreads();
        }
        """
        get_error_vector_ker = SourceModule(get_error_vector_ker_function)
        get_error_ker = SourceModule(get_error_ker_function)

        self.get_error_vector = get_error_vector_ker.get_function("get_error_vector")
        self.get_error = get_error_ker.get_function("get_error")

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
for i in range(50):
    MECS.solve()

    MECS.MEC.u[:12].get()

    # MECS.MEC.step -= 1

# Fuck!

1. MEC: not flexible
2. optimizer: flexible
3. constaint: flexible
4. MECS: not flexible

## First of all... fix MEC..

Hopefully, I just fix kernel function indexing... shit...

Only I need to do is just make kernel functions deal with MEC.step...

In [None]:
class MinimumEnergyControl:
    def __init__(self, x_des, x_0, step=50, dt=0.05, damping=False):

        ## gravity, criterion: moon
        gravity = -1.62     # N/kg

        ## no drag or something disturb movement
        if not damping:
            ## A
            self.state_transition_matrix = \
                np.array([[ 1, 0, 0,dt, 0, 0],
                          [ 0, 1, 0, 0,dt, 0],
                          [ 0, 0, 1, 0, 0,dt],
                          [ 0, 0, 0, 1, 0, 0],
                          [ 0, 0, 0, 0, 1, 0],
                          [ 0, 0, 0, 0, 0, 1]])

            ## B
            input_matrix = \
                np.array([[dt*dt/2,      0,      0],
                          [      0,dt*dt/2,      0],
                          [      0,      0,dt*dt/2],
                          [     dt,      0,      0],
                          [      0,     dt,      0],
                          [      0,      0,     dt]])
            
            self.input_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*dt*dt, dt])))

            ## g
            gravity_matrix = \
                np.array([[              0],
                          [              0],
                          [gravity*dt*dt/2],
                          [              0],
                          [              0],
                          [     gravity*dt]])
                
            self.gravity_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*gravity*dt*dt, gravity*dt])))

        ## drag or something exist...
        else:
            pass

        ## desired state: x_des
        self.x_des = gpuarray.to_gpu(np.float32(x_des))

        ## initial state: x_0
        self.x_0 = gpuarray.to_gpu(np.float32(x_0))

        self.dt = np.float32(dt)

        self.step = step

        ## weight
        self.rho = 1/3

    def run(self):
        self.get_gradient(self.gram_G,
                          self.u,
                          self.G_C,          
                          self.iteration,
                          self.gradient,
                          np.int32(self.step),
                          block=(self.TPB,1,1),
                          grid=(3*self.step,1,1))

    def optimal_size(self, n):
        thread_per_block = int(math.sqrt(n/2))

        iteration = int(n / thread_per_block) + 1

        return thread_per_block, np.int32(iteration)

    def define_problem(self):

        ## define matrices
        self.define_matrix_for_object_function()

        ## define matrices' value
        self.define_object_function_at_kernel()

    def define_matrix_for_object_function(self):
        self.rho_matrix = \
            gpuarray.to_gpu(np.float32(math.sqrt(self.rho) * np.identity(3*self.step)))
 
        ## solution!!!
        self.u = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))

        ## G, gram_G, Q
        self.G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,6)).reshape(6*3*self.step)))
        self.gram_G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,3*self.step)).reshape(3*3*self.step*self.step)))
        self.Q = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.C = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.G_C = gpuarray.to_gpu(np.float32(np.zeros((150,1))))
        self.gradient = gpuarray.to_gpu(np.float32(np.zeros((150,1))))

        ## TPB: thread_per_block, BPG: block_per_grid
        self.TPB, self.iteration = self.optimal_size(3*self.step)

    def define_object_function_at_kernel(self):
        self.ker_function()

        self.get_G_matrix(self.input_matrix, self.dt, self.G, block=(6,1,1), grid=(self.step,1,1))
        self.get_Q_matrix(self.gravity_matrix, self.dt, self.Q, block=(self.step,1,1), grid=(2,1,1))
        self.get_G_gram_matrix(self.G, self.rho_matrix, self.gram_G, np.int32(self.step), block=(3,1,1), grid=(self.step,self.step,1))
        self.get_G_C_matrix(self.G, self.x_des, self.x_0, self.Q, self.C, self.G_C, block=(3,1,1), grid=(self.step,1,1))

    def ker_function(self):
        ## We'll gonna do 150 x 150 @ 150 x 1
        ## block=(thread_per_block,1,1), grid=(3*self.step,1,1)
        get_gradient_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_gradient(float* matrix, float* vector1, float* vector2, int iteration, float* result, int step) {

            __shared__ float result_jerk[1000];

            result_jerk[tx] = 0.0;

            for (int i = 0; i < iteration; i++) {
                int index1 = i + tx * iteration;
                int index2 = index1 + bx * 3 * step;

                if (index1 < gs) {
                    result_jerk[tx] += matrix[index2] * vector1[index1];
                }
                else {
                    result_jerk[1000-tx] = 0;
                }
            }

            __syncthreads();

            if (tx == 0) {
                for (int j = 0; j < bs; j++) {
                    result[bx] += result_jerk[j];
                }

                result[bx] -= vector2[bx];
            }
            else {
                result_jerk[1000-tx] = 0;
            }

            __syncthreads();
        }
        """
        get_gradient_ker = SourceModule(get_gradient_ker_function)

        ## block=(6,1,1), grid=(self.step,1,1)
        get_G_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_matrix(float* input_matrix, float dt, float* G) {
            // 6: DOF, 18: DOF*axis
            int index = tx + (tx%3) * 6 + bx * 18;

            if (tx < 3) {
                float value;
                value = input_matrix[0] + (step - bx - 1) * input_matrix[1];

                G[index] = value;
            }
            else {
                G[index] = dt;
            }

            __syncthreads();
        }
        """
        get_G_matrix_ker = SourceModule(get_G_matrix_ker_function)

        ## block=(self.step,1,1), grid=(2,1,1)
        get_Q_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_Q_matrix(float* gravity, float dt, float* Q) {
            
            __shared__ float value[50];
            
            if (bx == 0) {
                value[tx] = gravity[0] + (tx * dt) * gravity[1];
            }
            else {
                value[tx] = gravity[1];
            }

            __syncthreads();

            if (bx == 0) {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[2] += value[i];
                    }
                }
            }
            else {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[5] += value[i];
                    }
                }
            }

            __syncthreads();
        }
        """
        get_Q_matrix_ker = SourceModule(get_Q_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,self.step,1)
        get_G_gram_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define by (blockIdx.y)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_gram_matrix(float* G, float* rho_matrix, float* gram_G) {
            // 9: axis, 151: axis*step+1, 450: axis*axis*step
            int index1 = 3 * step + 1;
            int index2 = 3 * 3 * step;
            int index3 = tx * index1 + bx * 3 + by * index2;

            // 7: DOF+1, 18: DOF*axis
            int index4 = tx * 7 + bx * 18;
                
            float value = 0.0;
            value = G[index4] * G[index4] + G[index4+3] * G[index4+3];

            gram_G[index3] = value; 

            __syncthreads();

            gram_G[index3] += rho_matrix[index3]*rho_matrix[index3];

            __syncthreads();
        }
        """
        get_G_gram_matrix_ker = SourceModule(get_G_gram_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,1,1)
        get_G_C_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __global__ void get_G_C_matrix(float* G, float* x_des, float* x_current, float* Q, float* C, float* G_C) {
            // C first in each block
            __shared__ float C_jerk[6];

            C_jerk[tx] = x_des[tx] - Q[tx] - x_current[tx];
            C_jerk[tx+3] = x_des[tx+3] - Q[tx+3] - x_current[tx+3];

            __syncthreads();

            C[tx] = C_jerk[tx];
            C[tx+3] = C_jerk[tx+3];

            __syncthreads();

            // G_C Next
            int index1 = tx * 7 + bx * 18;
            int index2 = tx + bx * 3;

            float value;
            value = G[index1] * C_jerk[tx] + G[index1+3] * C_jerk[tx+3];

            __syncthreads();

            G_C[index2] = value;

            __syncthreads();
        }
        """
        get_G_C_matrix_ker = SourceModule(get_G_C_matrix_ker_function)

        self.get_G_matrix      = get_G_matrix_ker.get_function("get_G_matrix")
        self.get_Q_matrix      = get_Q_matrix_ker.get_function("get_Q_matrix")
        self.get_G_gram_matrix = get_G_gram_matrix_ker.get_function("get_G_gram_matrix")
        self.get_G_C_matrix    = get_G_C_matrix_ker.get_function("get_G_C_matrix")
        self.get_gradient      = get_gradient_ker.get_function("get_gradient")

## And then... MECS...

In [None]:
class MinimumEnergyControlSolver:
    def __init__(self, x_des, x_0, upper_boundary, downer_boundary, max_iteration=100):
        ## max_iteration
        self.max_iteration = max_iteration

        ## initialize MEC(minimum energy control)
        self.MEC = MinimumEnergyControl(x_des, x_0)

        ## initialize optimizer
        learning_rate = 1e-4

        self.optimizer = OptimizerForGuidance(3*self.MEC.step, learning_rate)

        ## constraint
        self.upper_boundary = upper_boundary
        self.downer_boundary = downer_boundary

        self.constraint = ConstraintsForInput(self.MEC, self.upper_boundary, self.downer_boundary)

        ## evaluate
        self.error_vector = gpuarray.to_gpu(np.float32(np.zeros((3*self.MEC.step+6,1))))
        self.error = gpuarray.to_gpu(np.float32(np.zeros((1,self.max_iteration)))) 

        ## TPB = 5, iteration = 10
        self.TPB, self.iteration = self.MEC.optimal_size(self.MEC.step)

        self.kernel_function()

    def solve(self):
        ## define problem: fit matrices for left step
        self.MEC.define_problem()

        for i in range(100):
            ## get_gradient
            self.MEC.run()

            ## optimize
            self.optimizer.run(self.MEC.u, self.MEC.gradient)

            ## constraint
            self.constraint.projection()

            ## evaluate
            self.evaluate(i)


    def evaluate(self, current_iter):
        ## set size
        block_size = 3 * self.MEC.step + 6
        grid_size  = 3 * self.MEC.step + 6
        
        ## evaluate learning
        self.get_error_vector(self.MEC.G,
                              self.MEC.rho_matrix,
                              self.MEC.u,
                              self.MEC.C,
                              self.iteration, 
                              self.error_vector, 
                              block=(self.TPB,1,1),
                              grid=(grid_size,1,1))
        
        self.get_error(self.error_vector,
                       self.error,
                       np.int32(current_iter),
                       block=(block_size,1,1),
                       grid=(1,1,1))
        
    def kernel_function(self):
        ## We'll gonna do 156 x 150 @ 150 x 1
        ## block=(TPB,1,1), grid=(156,1,1)
        get_error_vector_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_error_vector(float* G, float* rho_matrix, float* u, float* C, int iteration, float* error_vector) {

            if (bx < 6) {
                
                __shared__ float value[100];

                value[tx] = 0.0;

                __syncthreads();

                for (int i = 0; i < iteration; i++) {
                    int index1 = bx % 3;
                    int index2 = i * 5 + tx % 5;
                    int index3 = index1 + index2*3;

                    // 7: DOF+1, 90: 5*DOF*axis
                    int index4 = bx + index1*6 + index2*18;

                    value[tx] += G[index4] * u[index3];
                }

                __syncthreads();
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    value[50] = 0.0;
                    for (int j = 0; j < bs; j++) {
                        value[50] += value[j];
                    }
                 
                    error_vector[bx] = value[50] - C[bx];
                }
                __syncthreads();
            }
            else {
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    int index1 = bx - 6;
                    int index2 = gs - 5;
                    int index3 = index1 * index2;

                    error_vector[bx] = rho_matrix[index3] * u[index1];
                }

                __syncthreads();
            }
        }
        """

        ## block=(156,1,1), grid=(1,1,1)
        get_error_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bs (blockDim.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float* vector, int length) {
            float value = 0.0;
            float norm;

            for (int i = 0; i < length; i++) {
                value += vector[i] * vector[i];
            } 

            norm = square_root(value);

            return norm;    
        }


        __global__ void get_error(float* error_vector, float* error, int current_iter) {

            __shared__ float value[1000];

            value[tx] = error_vector[tx];

            __syncthreads();

            if (tx == 0) {
                int length = bs;

                error[current_iter] = get_norm(value, length);
            }
            else {

                value[1000-tx] = 0.0;
            }
            
            __syncthreads();
        }
        """
        get_error_vector_ker = SourceModule(get_error_vector_ker_function)
        get_error_ker = SourceModule(get_error_ker_function)

        self.get_error_vector = get_error_vector_ker.get_function("get_error_vector")
        self.get_error = get_error_ker.get_function("get_error")

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
MECS.solve()

MECS.MEC.u[:12]

array([[-0.3841809 ],
       [ 0.        ],
       [ 5.787262  ],
       [-0.38417292],
       [ 0.        ],
       [ 5.787263  ],
       [-0.38416386],
       [ 0.        ],
       [ 5.787264  ],
       [-0.38415447],
       [ 0.        ],
       [ 5.787265  ]], dtype=float32)

In [None]:
MECS.MEC.step -= 1
MECS.MEC.step

49

In [None]:
MECS.solve()

MECS.MEC.u[:12]

array([[-0.3842218 ],
       [ 0.        ],
       [ 5.78726   ],
       [-0.38421336],
       [ 0.        ],
       [ 5.7872605 ],
       [-0.3842037 ],
       [ 0.        ],
       [ 5.7872615 ],
       [-0.38419425],
       [ 0.        ],
       [ 5.7872615 ]], dtype=float32)

### Let's go!

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
for i in range(15):
    MECS.solve()

    print(MECS.MEC.u.shape)

(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)
(150, 1)


In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
for i in range(MECS.MEC.step-1):
    MECS.solve()

    print(MECS.MEC.u.shape)

    MECS.MEC.step -= 1

(150, 1)
(147, 1)
(144, 1)
(141, 1)
(138, 1)
(135, 1)
(132, 1)
(129, 1)
(126, 1)
(123, 1)
(120, 1)
(117, 1)
(114, 1)
(111, 1)
(108, 1)
(105, 1)
(102, 1)
(99, 1)
(96, 1)
(93, 1)
(90, 1)
(87, 1)
(84, 1)
(81, 1)
(78, 1)
(75, 1)
(72, 1)
(69, 1)
(66, 1)
(63, 1)
(60, 1)
(57, 1)
(54, 1)
(51, 1)
(48, 1)
(45, 1)
(42, 1)
(39, 1)
(36, 1)
(33, 1)
(30, 1)
(27, 1)
(24, 1)
(21, 1)
(18, 1)
(15, 1)
(12, 1)
(9, 1)
(6, 1)


In [None]:
MECS.MEC.G.shape

(36,)

In [None]:
MECS.MEC.G.reshape(6,6).T[:,:]

array([[0.05125, 0.     , 0.     , 0.00125, 0.     , 0.     ],
       [0.     , 0.05125, 0.     , 0.     , 0.00125, 0.     ],
       [0.     , 0.     , 0.05125, 0.     , 0.     , 0.00125],
       [0.05   , 0.     , 0.     , 0.05   , 0.     , 0.     ],
       [0.     , 0.05   , 0.     , 0.     , 0.05   , 0.     ],
       [0.     , 0.     , 0.05   , 0.     , 0.     , 0.05   ]],
      dtype=float32)

something strange...

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
MECS.solve()

print(MECS.MEC.G.reshape(150,6).T[:,:9])

MECS.MEC.step -= 1

[[2.45125   0.        0.        2.4012501 0.        0.        2.35125
  0.        0.       ]
 [0.        2.45125   0.        0.        2.4012501 0.        0.
  2.35125   0.       ]
 [0.        0.        2.45125   0.        0.        2.4012501 0.
  0.        2.35125  ]
 [0.05      0.        0.        0.05      0.        0.        0.05
  0.        0.       ]
 [0.        0.05      0.        0.        0.05      0.        0.
  0.05      0.       ]
 [0.        0.        0.05      0.        0.        0.05      0.
  0.        0.05     ]]


In [None]:
2.45125 - 2.40125

0.04999999999999982

잡았다 요놈...

In [None]:
class MinimumEnergyControl:
    def __init__(self, x_des, x_0, step=50, dt=0.05, damping=False):

        ## gravity, criterion: moon
        gravity = -1.62     # N/kg

        ## no drag or something disturb movement
        if not damping:
            ## A
            self.state_transition_matrix = \
                np.array([[ 1, 0, 0,dt, 0, 0],
                          [ 0, 1, 0, 0,dt, 0],
                          [ 0, 0, 1, 0, 0,dt],
                          [ 0, 0, 0, 1, 0, 0],
                          [ 0, 0, 0, 0, 1, 0],
                          [ 0, 0, 0, 0, 0, 1]])

            ## B
            input_matrix = \
                np.array([[dt*dt/2,      0,      0],
                          [      0,dt*dt/2,      0],
                          [      0,      0,dt*dt/2],
                          [     dt,      0,      0],
                          [      0,     dt,      0],
                          [      0,      0,     dt]])
            
            self.input_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*dt*dt, dt])))

            ## g
            gravity_matrix = \
                np.array([[              0],
                          [              0],
                          [gravity*dt*dt/2],
                          [              0],
                          [              0],
                          [     gravity*dt]])
                
            self.gravity_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*gravity*dt*dt, gravity*dt])))

        ## drag or something exist...
        else:
            pass

        ## desired state: x_des
        self.x_des = gpuarray.to_gpu(np.float32(x_des))

        ## initial state: x_0
        self.x_0 = gpuarray.to_gpu(np.float32(x_0))

        self.dt = np.float32(dt)

        self.step = step

        ## weight
        self.rho = 1/3

    def run(self):
        self.get_gradient(self.gram_G,
                          self.u,
                          self.G_C,          
                          self.iteration,
                          self.gradient,
                          np.int32(self.step),
                          block=(self.TPB,1,1),
                          grid=(3*self.step,1,1))

    def optimal_size(self, n):
        thread_per_block = int(math.sqrt(n/2))

        iteration = int(n / thread_per_block) + 1

        return thread_per_block, np.int32(iteration)

    def define_problem(self):

        ## define matrices
        self.define_matrix_for_object_function()

        ## define matrices' value
        self.define_object_function_at_kernel()

    def define_matrix_for_object_function(self):
        self.rho_matrix = \
            gpuarray.to_gpu(np.float32(math.sqrt(self.rho) * np.identity(3*self.step)))
 
        ## solution!!!
        self.u = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))

        ## G, gram_G, Q
        self.G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,6)).reshape(6*3*self.step)))
        self.gram_G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,3*self.step)).reshape(3*3*self.step*self.step)))
        self.Q = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.C = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.G_C = gpuarray.to_gpu(np.float32(np.zeros((150,1))))
        self.gradient = gpuarray.to_gpu(np.float32(np.zeros((150,1))))

        ## TPB: thread_per_block, BPG: block_per_grid
        self.TPB, self.iteration = self.optimal_size(3*self.step)

    def define_object_function_at_kernel(self):
        self.ker_function()

        self.get_G_matrix(self.input_matrix, self.dt, self.G, block=(6,1,1), grid=(self.step,1,1))
        self.get_Q_matrix(self.gravity_matrix, self.dt, self.Q, block=(self.step,1,1), grid=(2,1,1))
        self.get_G_gram_matrix(self.G, self.rho_matrix, self.gram_G, np.int32(self.step), block=(3,1,1), grid=(self.step,self.step,1))
        self.get_G_C_matrix(self.G, self.x_des, self.x_0, self.Q, self.C, self.G_C, block=(3,1,1), grid=(self.step,1,1))

    def ker_function(self):
        ## We'll gonna do 150 x 150 @ 150 x 1
        ## block=(thread_per_block,1,1), grid=(3*self.step,1,1)
        get_gradient_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_gradient(float* matrix, float* vector1, float* vector2, int iteration, float* result, int step) {

            __shared__ float result_jerk[1000];

            result_jerk[tx] = 0.0;

            for (int i = 0; i < iteration; i++) {
                int index1 = i + tx * iteration;
                int index2 = index1 + bx * 3 * step;

                if (index1 < gs) {
                    result_jerk[tx] += matrix[index2] * vector1[index1];
                }
                else {
                    result_jerk[1000-tx] = 0;
                }
            }

            __syncthreads();

            if (tx == 0) {
                for (int j = 0; j < bs; j++) {
                    result[bx] += result_jerk[j];
                }

                result[bx] -= vector2[bx];
            }
            else {
                result_jerk[1000-tx] = 0;
            }

            __syncthreads();
        }
        """
        get_gradient_ker = SourceModule(get_gradient_ker_function)

        ## block=(6,1,1), grid=(self.step,1,1)
        get_G_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_matrix(float* input_matrix, float dt, float* G) {
            // 6: DOF, 18: DOF*axis
            int index = tx + (tx%3) * 6 + bx * 18;

            if (tx < 3) {
                float value;
                value = input_matrix[0] + (step - bx - 1) * dt * input_matrix[1];

                G[index] = value;
            }
            else {
                G[index] = dt;
            }

            __syncthreads();
        }
        """
        get_G_matrix_ker = SourceModule(get_G_matrix_ker_function)

        ## block=(self.step,1,1), grid=(2,1,1)
        get_Q_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_Q_matrix(float* gravity, float dt, float* Q) {
            
            __shared__ float value[50];
            
            if (bx == 0) {
                value[tx] = gravity[0] + (tx * dt) * gravity[1];
            }
            else {
                value[tx] = gravity[1];
            }

            __syncthreads();

            if (bx == 0) {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[2] += value[i];
                    }
                }
            }
            else {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[5] += value[i];
                    }
                }
            }

            __syncthreads();
        }
        """
        get_Q_matrix_ker = SourceModule(get_Q_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,self.step,1)
        get_G_gram_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define by (blockIdx.y)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_gram_matrix(float* G, float* rho_matrix, float* gram_G) {
            // 9: axis, 151: axis*step+1, 450: axis*axis*step
            int index1 = 3 * step + 1;
            int index2 = 3 * 3 * step;
            int index3 = tx * index1 + bx * 3 + by * index2;

            // 7: DOF+1, 18: DOF*axis
            int index4 = tx * 7 + bx * 18;
                
            float value = 0.0;
            value = G[index4] * G[index4] + G[index4+3] * G[index4+3];

            gram_G[index3] = value; 

            __syncthreads();

            gram_G[index3] += rho_matrix[index3]*rho_matrix[index3];

            __syncthreads();
        }
        """
        get_G_gram_matrix_ker = SourceModule(get_G_gram_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,1,1)
        get_G_C_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __global__ void get_G_C_matrix(float* G, float* x_des, float* x_current, float* Q, float* C, float* G_C) {
            // C first in each block
            __shared__ float C_jerk[6];

            C_jerk[tx] = x_des[tx] - Q[tx] - x_current[tx];
            C_jerk[tx+3] = x_des[tx+3] - Q[tx+3] - x_current[tx+3];

            __syncthreads();

            C[tx] = C_jerk[tx];
            C[tx+3] = C_jerk[tx+3];

            __syncthreads();

            // G_C Next
            int index1 = tx * 7 + bx * 18;
            int index2 = tx + bx * 3;

            float value;
            value = G[index1] * C_jerk[tx] + G[index1+3] * C_jerk[tx+3];

            __syncthreads();

            G_C[index2] = value;

            __syncthreads();
        }
        """
        get_G_C_matrix_ker = SourceModule(get_G_C_matrix_ker_function)

        self.get_G_matrix      = get_G_matrix_ker.get_function("get_G_matrix")
        self.get_Q_matrix      = get_Q_matrix_ker.get_function("get_Q_matrix")
        self.get_G_gram_matrix = get_G_gram_matrix_ker.get_function("get_G_gram_matrix")
        self.get_G_C_matrix    = get_G_C_matrix_ker.get_function("get_G_C_matrix")
        self.get_gradient      = get_gradient_ker.get_function("get_gradient")

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary)

In [None]:
MECS.solve()

print(MECS.MEC.G.reshape(150,6).T[:,:9])

MECS.MEC.step -= 1

[[0.12375    0.         0.         0.12125    0.         0.
  0.11875001 0.         0.        ]
 [0.         0.12375    0.         0.         0.12125    0.
  0.         0.11875001 0.        ]
 [0.         0.         0.12375    0.         0.         0.12125
  0.         0.         0.11875001]
 [0.05       0.         0.         0.05       0.         0.
  0.05       0.         0.        ]
 [0.         0.05       0.         0.         0.05       0.
  0.         0.05       0.        ]
 [0.         0.         0.05       0.         0.         0.05
  0.         0.         0.05      ]]


이거지...

# GO GO!!

In [None]:
class MinimumEnergyControl:
    def __init__(self, x_des, x_0, step=50, dt=0.05, damping=False):

        ## gravity, criterion: moon
        gravity = 1.62     # N/kg

        ## no drag or something disturb movement
        if not damping:
            ## A
            self.state_transition_matrix = \
                np.array([[ 1, 0, 0,dt, 0, 0],
                          [ 0, 1, 0, 0,dt, 0],
                          [ 0, 0, 1, 0, 0,dt],
                          [ 0, 0, 0, 1, 0, 0],
                          [ 0, 0, 0, 0, 1, 0],
                          [ 0, 0, 0, 0, 0, 1]])

            ## B
            input_matrix = \
                np.array([[dt*dt/2,      0,      0],
                          [      0,dt*dt/2,      0],
                          [      0,      0,dt*dt/2],
                          [     dt,      0,      0],
                          [      0,     dt,      0],
                          [      0,      0,     dt]])
            
            self.input_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*dt*dt, dt])))

            ## g
            gravity_matrix = \
                np.array([[              0],
                          [              0],
                          [gravity*dt*dt/2],
                          [              0],
                          [              0],
                          [     gravity*dt]])
                
            self.gravity_matrix = \
                gpuarray.to_gpu(np.float32(np.array([0.5*gravity*dt*dt, gravity*dt])))

        ## drag or something exist...
        else:
            pass

        ## desired state: x_des
        self.x_des = gpuarray.to_gpu(np.float32(x_des))

        ## initial state: x_0
        self.x_0 = gpuarray.to_gpu(np.float32(x_0))

        ## current state
        self.x_current = gpuarray.to_gpu(np.float32(x_0))

        self.dt = np.float32(dt)

        self.step = step

        ## weight
        self.rho = 100

    def run(self):
        self.get_gradient(self.gram_G,
                          self.u,
                          self.G_C,          
                          self.iteration,
                          self.gradient,
                          np.int32(self.step),
                          block=(self.TPB,1,1),
                          grid=(3*self.step,1,1))

    def optimal_size(self, n):
        thread_per_block = int(math.sqrt(n/2))

        iteration = int(n / thread_per_block) + 1

        return thread_per_block, np.int32(iteration)

    def define_problem(self):

        ## define matrices
        self.define_matrix_for_object_function()

        ## define matrices' value
        self.define_object_function_at_kernel()

    def define_matrix_for_object_function(self):
        self.rho_matrix = \
            gpuarray.to_gpu(np.float32(math.sqrt(self.rho) * np.identity(3*self.step)))
 
        ## solution!!!
        self.u = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))

        ## G, gram_G, Q
        self.G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,6)).reshape(6*3*self.step)))
        self.gram_G = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,3*self.step)).reshape(3*3*self.step*self.step)))
        self.Q = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.C = gpuarray.to_gpu(np.float32(np.zeros((6,1))))
        self.G_C = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))
        self.gradient = gpuarray.to_gpu(np.float32(np.zeros((3*self.step,1))))

        ## TPB: thread_per_block, BPG: block_per_grid
        self.TPB, self.iteration = self.optimal_size(3*self.step)

    def define_object_function_at_kernel(self):
        self.ker_function()

        self.get_G_matrix(self.input_matrix, self.dt, self.G, block=(6,1,1), grid=(self.step,1,1))
        self.get_Q_matrix(self.gravity_matrix, self.dt, self.Q, block=(self.step,1,1), grid=(2,1,1))
        self.get_G_gram_matrix(self.G, self.rho_matrix, self.gram_G, np.int32(self.step), block=(3,1,1), grid=(self.step,self.step,1))
        self.get_G_C_matrix(self.G, self.x_des, self.x_0, self.Q, self.C, self.G_C, block=(3,1,1), grid=(self.step,1,1))

    def ker_function(self):
        ## We'll gonna do 150 x 150 @ 150 x 1
        ## block=(thread_per_block,1,1), grid=(3*self.step,1,1)
        get_gradient_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_gradient(float* matrix, float* vector1, float* vector2, int iteration, float* result, int step) {

            __shared__ float result_jerk[1000];

            result_jerk[tx] = 0.0;

            for (int i = 0; i < iteration; i++) {
                int index1 = i + tx * iteration;
                int index2 = index1 + bx * 3 * step;

                if (index1 < gs) {
                    result_jerk[tx] += matrix[index2] * vector1[index1];
                }
                else {
                    result_jerk[1000-tx] = 0;
                }
            }

            __syncthreads();

            if (tx == 0) {
                for (int j = 0; j < bs; j++) {
                    result[bx] += result_jerk[j];
                }

                result[bx] -= vector2[bx];
            }
            else {
                result_jerk[1000-tx] = 0;
            }

            __syncthreads();
        }
        """
        get_gradient_ker = SourceModule(get_gradient_ker_function)

        ## block=(6,1,1), grid=(self.step,1,1)
        get_G_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_matrix(float* input_matrix, float dt, float* G) {
            // 6: DOF, 18: DOF*axis
            int index = tx + (tx%3) * 6 + bx * 18;

            if (tx < 3) {
                float value;
                value = input_matrix[0] + (step - bx - 1) * dt * input_matrix[1];

                G[index] = value;
            }
            else {
                G[index] = dt;
            }

            __syncthreads();
        }
        """
        get_G_matrix_ker = SourceModule(get_G_matrix_ker_function)

        ## block=(self.step,1,1), grid=(2,1,1)
        get_Q_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)
        #define step (blockDim.x)

        __global__ void get_Q_matrix(float* gravity, float dt, float* Q) {
            
            __shared__ float value[50];
            
            if (bx == 0) {
                value[tx] = gravity[0] + (tx * dt) * gravity[1];
            }
            else {
                value[tx] = gravity[1];
            }

            __syncthreads();

            if (bx == 0) {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[2] += value[i];
                    }
                }
            }
            else {
                if(tx == 0) {
                    for (int i = 0; i < step; i++) {
                        Q[5] += value[i];
                    }
                }
            }

            __syncthreads();
        }
        """
        get_Q_matrix_ker = SourceModule(get_Q_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,self.step,1)
        get_G_gram_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define by (blockIdx.y)
        #define tx (threadIdx.x)
        #define step (gridDim.x)

        __global__ void get_G_gram_matrix(float* G, float* rho_matrix, float* gram_G) {
            // 9: axis, 151: axis*step+1, 450: axis*axis*step
            int index1 = 3 * step + 1;
            int index2 = 3 * 3 * step;
            int index3 = tx * index1 + bx * 3 + by * index2;

            // 7: DOF+1, 18: DOF*axis
            int index4 = tx * 7 + bx * 18;
                
            float value = 0.0;
            value = G[index4] * G[index4] + G[index4+3] * G[index4+3];

            gram_G[index3] = value; 

            __syncthreads();

            gram_G[index3] += rho_matrix[index3]*rho_matrix[index3];

            __syncthreads();
        }
        """
        get_G_gram_matrix_ker = SourceModule(get_G_gram_matrix_ker_function)

        ## block=(3,1,1), grid=(self.step,1,1)
        get_G_C_matrix_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __global__ void get_G_C_matrix(float* G, float* x_des, float* x_current, float* Q, float* C, float* G_C) {
            // C first in each block
            __shared__ float C_jerk[6];

            C_jerk[tx] = x_des[tx] - Q[tx] - x_current[tx];
            C_jerk[tx+3] = x_des[tx+3] - Q[tx+3] - x_current[tx+3];

            __syncthreads();

            C[tx] = C_jerk[tx];
            C[tx+3] = C_jerk[tx+3];

            __syncthreads();

            // G_C Next
            int index1 = tx * 7 + bx * 18;
            int index2 = tx + bx * 3;

            float value;
            value = G[index1] * C_jerk[tx] + G[index1+3] * C_jerk[tx+3];

            __syncthreads();

            G_C[index2] = value;

            __syncthreads();
        }
        """
        get_G_C_matrix_ker = SourceModule(get_G_C_matrix_ker_function)

        self.get_G_matrix      = get_G_matrix_ker.get_function("get_G_matrix")
        self.get_Q_matrix      = get_Q_matrix_ker.get_function("get_Q_matrix")
        self.get_G_gram_matrix = get_G_gram_matrix_ker.get_function("get_G_gram_matrix")
        self.get_G_C_matrix    = get_G_C_matrix_ker.get_function("get_G_C_matrix")
        self.get_gradient      = get_gradient_ker.get_function("get_gradient")

In [None]:
class OptimizerForGuidance:
    def __init__(self, length, learning_rate):
        self.length = length
        self.learning_rate = np.float32(learning_rate)
        self.kernel_function()

    def run(self, theta, gradient):
        ## theta, gradient: gpuarray type variable
        self.basic_optimizer(theta,
                             gradient,
                             self.learning_rate,
                             block=(self.length,1,1),
                             grid=(1,1,1))

    def kernel_function(self):
        ## block=(length,1,1), grid=(1,1,1)
        basic_optimizer_ker_function = \
        """
        #define x (threadIdx.x)

        __global__ void basic_optimizer(float* theta, float* gradient, float learning_rate) {
            theta[x] -= gradient[x] * learning_rate;

            __syncthreads();
        }
        """
        basic_optimizer_ker = SourceModule(basic_optimizer_ker_function)

        self.basic_optimizer = basic_optimizer_ker.get_function("basic_optimizer")
        

In [None]:
class ConstraintsForInput:
    def __init__(self, problem, upper_boundary, downer_boundary):
        ## ex> MEC(minimum energy control)
        self.problem = problem

        self.upper_boundary = np.float32(upper_boundary)
        self.downer_boundary = np.float32(downer_boundary)

        self.kernel_function()

    def projection(self):
        self.project_function(self.problem.u,
                              self.upper_boundary,
                              self.downer_boundary,
                              block=(3,1,1),
                              grid=(self.problem.step,1,1))

    def kernel_function(self):
        ## block=(3,1,1), grid=(problem.step,1,1)
        projection_ker_function = \
        """
        #define bx (blockIdx.x)
        #define tx (threadIdx.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float x, float y, float z) {
            float value;
            float norm;

            value = x * x + y * y + z * z;
            norm = square_root(value);

            return norm;    
        }

        __global__ void projection(float* theta, float upper_boundary, float downer_boundary) {
            __shared__ float u[3];
            __shared__ float norm[1];
            float value;

            int index = tx + bx * 3;

            u[tx] = theta[index];

            __syncthreads();

            if (tx == 0) {
                norm[0] = get_norm(u[0], u[1], u[2]);
            } 

            __syncthreads();

            if ((norm[0] > downer_boundary) && (norm[0] < upper_boundary)) {
                value = u[tx];
            }
            else {
                value = u[tx] * upper_boundary / norm[0];
            }

            __syncthreads();

            theta[index] = value;
        }
        """
        projection_ker = SourceModule(projection_ker_function)

        self.project_function = projection_ker.get_function("projection")


In [None]:
class MinimumEnergyControlSolver:
    def __init__(self, x_des, x_0, upper_boundary, downer_boundary, step=50, max_iteration=100):
        ## max_iteration
        self.max_iteration = max_iteration

        ## initialize MEC(minimum energy control)
        self.MEC = MinimumEnergyControl(x_des, x_0, step=step)

        ## initialize optimizer
        learning_rate = 1e-4

        self.optimizer = OptimizerForGuidance(3*self.MEC.step, learning_rate)

        ## constraint
        self.upper_boundary = upper_boundary
        self.downer_boundary = downer_boundary

        self.constraint = ConstraintsForInput(self.MEC, self.upper_boundary, self.downer_boundary)

        ## evaluate
        self.error_vector = gpuarray.to_gpu(np.float32(np.zeros((3*self.MEC.step+6,1))))
        self.error = gpuarray.to_gpu(np.float32(np.zeros((1,self.max_iteration)))) 

        ## record updated state
        self.state = gpuarray.to_gpu(np.float32(np.zeros((self.MEC.step,6)).reshape(6*self.MEC.step)))
        
        ## TPB = 5, iteration = 10
        self.TPB, self.iteration = self.MEC.optimal_size(self.MEC.step)

        self.kernel_function()

    def solve(self):
        for step in range(self.MEC.step-1):
            ## define problem: fit matrices for left step
            self.MEC.define_problem()

            for i in range(100):
                ## get_gradient
                self.MEC.run()

                ## optimize
                self.optimizer.run(self.MEC.u, self.MEC.gradient)

                ## constraint
                self.constraint.projection()

                ## evaluate
                # self.evaluate(i)

            ## update state
            self.update_state(step)
            
            ## for test
            # print(self.MEC.u.shape)

            ## next step
            self.MEC.step -= 1

    def evaluate(self, current_iter):
        ## set size
        block_size = 3 * self.MEC.step + 6
        grid_size  = 3 * self.MEC.step + 6
        
        ## evaluate learning
        self.get_error_vector(self.MEC.G,
                              self.MEC.rho_matrix,
                              self.MEC.u,
                              self.MEC.C,
                              self.iteration, 
                              self.error_vector, 
                              block=(self.TPB,1,1),
                              grid=(grid_size,1,1))
        
        self.get_error(self.error_vector,
                       self.error,
                       np.int32(current_iter),
                       block=(block_size,1,1),
                       grid=(1,1,1))
        
    def update_state(self, step):

        self.get_next_state(self.MEC.x_current,
                            self.MEC.u,
                            self.MEC.dt,
                            self.MEC.gravity_matrix,
                            self.state,
                            np.int32(step),
                            block=(6,1,1),
                            grid=(1,1,1))
        
    def kernel_function(self):
        ## We'll gonna do 156 x 150 @ 150 x 1
        ## block=(TPB,1,1), grid=(156,1,1)
        get_error_vector_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bx (blockIdx.x)
        #define bs (blockDim.x)
        #define gs (gridDim.x)

        __global__ void get_error_vector(float* G, float* rho_matrix, float* u, float* C, int iteration, float* error_vector) {

            if (bx < 6) {
                
                __shared__ float value[100];

                value[tx] = 0.0;

                __syncthreads();

                for (int i = 0; i < iteration; i++) {
                    int index1 = bx % 3;
                    int index2 = i * 5 + tx % 5;
                    int index3 = index1 + index2*3;

                    // 7: DOF+1, 90: 5*DOF*axis
                    int index4 = bx + index1*6 + index2*18;

                    value[tx] += G[index4] * u[index3];
                }

                __syncthreads();
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    value[50] = 0.0;
                    for (int j = 0; j < bs; j++) {
                        value[50] += value[j];
                    }
                 
                    error_vector[bx] = value[50] - C[bx];
                }
                __syncthreads();
            }
            else {
                if (tx == 0) {
                    // initialize
                    error_vector[bx] = 0.0;

                    int index1 = bx - 6;
                    int index2 = gs - 5;
                    int index3 = index1 * index2;

                    error_vector[bx] = rho_matrix[index3] * u[index1];
                }

                __syncthreads();
            }
        }
        """

        ## block=(156,1,1), grid=(1,1,1)
        get_error_ker_function = \
        """
        #define tx (threadIdx.x)
        #define bs (blockDim.x)

        __device__ float square_root(float value) {
            float s = 0;
            float t = 0;

            s = value / 2;

            for (;s != t;) {
                t = s;
                s = ((value/t) + t) / 2;
            }

            return s;
        }

        __device__ float get_norm(float* vector, int length) {
            float value = 0.0;
            float norm;

            for (int i = 0; i < length; i++) {
                value += vector[i] * vector[i];
            } 

            norm = square_root(value);

            return norm;    
        }


        __global__ void get_error(float* error_vector, float* error, int current_iter) {

            __shared__ float value[1000];

            value[tx] = error_vector[tx];

            __syncthreads();

            if (tx == 0) {
                int length = bs;

                error[current_iter] = get_norm(value, length);
            }
            else {

                value[1000-tx] = 0.0;
            }
            
            __syncthreads();
        }
        """
        get_error_vector_ker = SourceModule(get_error_vector_ker_function)
        get_error_ker = SourceModule(get_error_ker_function)

        ## block=(6,1,1), grid=(1,1,1)
        get_next_state_ker_function = \
        """
        #define tx (threadIdx.x)

        __global__ void get_next_state(float* x, float* u, float dt, float* gravity, float* state, int step) {

            __shared__ float momentum[6];
            __shared__ float input[6];

            int index1 = tx + step * 6;

            if (tx < 3) {
                int index2 = tx % 3;

                momentum[tx] = x[tx] + dt * x[tx+3];
                input[tx]    = u[index2];
            }
            else {
                int index2 = tx % 3;

                momentum[tx] = x[tx];
                input[tx]    = u[index2];
            }

            __syncthreads();

            x[tx] = momentum[tx] + input[tx] + gravity[tx];
            state[index1] = x[tx];

            __syncthreads();
        }
        """
        get_next_state_ker = SourceModule(get_next_state_ker_function)

        self.get_error_vector = get_error_vector_ker.get_function("get_error_vector")
        self.get_error        = get_error_ker.get_function("get_error")
        self.get_next_state   = get_next_state_ker.get_function("get_next_state")

# Test

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary, step=50)

In [None]:
MECS.solve()

In [None]:
MECS.MEC.x_current

array([   61.97376  ,     3.9690018, -1093.4905   ,   -15.191645 ,
           0.       ,   160.72765  ], dtype=float32)

In [None]:
MECS.state.reshape(50,6).T[2,:]

array([-1492.751 , -1485.405 , -1477.9656, -1470.436 , -1462.8198,
       -1455.1204, -1447.341 , -1439.485 , -1431.5559, -1423.557 ,
       -1415.492 , -1407.3639, -1399.1763, -1390.9325, -1382.6361,
       -1374.2904, -1365.8988, -1357.4646, -1348.9913, -1340.4824,
       -1331.9412, -1323.371 , -1314.7754, -1306.1577, -1297.5214,
       -1288.8696, -1280.206 , -1271.534 , -1262.8569, -1254.1782,
       -1245.5012, -1236.8293, -1228.166 , -1219.5148, -1210.8788,
       -1202.2615, -1193.6664, -1185.0968, -1176.5563, -1168.0481,
       -1159.5757, -1151.1423, -1142.7516, -1134.4067, -1126.1112,
       -1117.8684, -1109.6818, -1101.5547, -1093.4905,     0.    ],
      dtype=float32)

In [None]:
MECS.MEC.u

array([[ 0.00226113],
       [ 0.        ],
       [ 0.02925601],
       [ 0.0067846 ],
       [ 0.        ],
       [-0.03859557]], dtype=float32)

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary, step=50)

In [None]:
MECS.solve()

In [None]:
MECS.state.reshape(50,6).T[:,-2]

array([  40.32205  ,    3.9690018, -514.2627   ,  -23.96316  ,
          0.       ,  392.94318  ], dtype=float32)

In [None]:
## upper boundary: 5.8, downer boundary: 0.0

## destination
x_des = np.array([0,0,0,0,0,0])

## initial point
x_0 = np.array([100,0,-1500,-10,0,80])

## constraints
upper_boundary = 5.8
downer_boundary = 0.0

MECS = MinimumEnergyControlSolver(x_des, x_0, upper_boundary, downer_boundary, step=32)

In [None]:
MECS.solve()

In [None]:
MECS.state.reshape(32,6).T[:,-2]

array([   65.990555 ,     2.5110002, -1071.7192   ,   -20.08457  ,
           0.       ,   249.98914  ], dtype=float32)

In [None]:
MECS.MEC.u

array([[ 0.06132204],
       [ 0.        ],
       [ 0.7939236 ],
       [ 0.18407021],
       [ 0.        ],
       [-1.0472887 ]], dtype=float32)

태초 마을로....