In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
from sklearn.neural_network import MLPRegressor
c.switchToNumpyArray()

In [3]:
# get the residual data
positions = []
residuals = []
model = c.ActionModelUnicycle()

for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    for d in ddp.datas():
        residual = d.r
    positions.append(x0)
    residuals.append(residual)

positions = np.asarray(positions)
residuals = np.asarray(residuals)
del model

# The residuals are of the shape (1000, 5). The first three columns of the residuals are those that are
# related to positions. The last two are the residuals associated with u. Ideally, it should be 0.

In [4]:
nn = MLPRegressor(
    hidden_layer_sizes=(8,8),  activation='tanh', solver='lbfgs', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=True, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

nn.fit(positions, residuals)

MLPRegressor(activation='tanh', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(8, 8), learning_rate='constant',
             learning_rate_init=0.01, max_fun=15000, max_iter=1000,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=9, shuffle=True, solver='lbfgs',
             tol=0.0001, validation_fraction=0.01, verbose=True,
             warm_start=False)

In [6]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()

class UnicycleDerived(c.ActionModelAbstract):
    def __init__(self, nn):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.net = nn
        
    def calc(self, data, x, u=None):
        prediction = self.net.predict(x.reshape(1, -1))
        data.r = a2m(prediction)
        #data.cost = .5 * sum(m2a(data.r)**2)
        data.cost = 0.5 * np.dot(data.r.T, data.r)
        

    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)
            
            
        def cost(x):
            prediction = self.net.predict(x.reshape(1, -1))
            residual = a2m(prediction)
            return 0.5 * np.dot(residual.T, residual)
        
                
        jacobian = nd.Jacobian(cost)
        data.Lx = a2m(jacobian(x))
        
        hessian = nd.Hessian(cost)
        data.Lxx = a2m(hessian(x))
        
        

In [7]:
from time import perf_counter

In [8]:
terminal_model = UnicycleDerived(nn)
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(1000):
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
end = perf_counter()

In [10]:
print(end - start)
# Result 2168 seconds

2168.4204136320004
