In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import numpy as np
import crocoddyl as c
from sklearn.neural_network import MLPRegressor
c.switchToNumpyArray()

In [5]:
# get the residual data
positions = []
residuals = []
model = c.ActionModelUnicycle()

for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    for d in ddp.datas():
        residual = d.r
    positions.append(x0)
    residuals.append(residual)

positions = np.asarray(positions)
residuals = np.asarray(residuals)

# The residuals are of the shape (1000, 5). The first three columns of the residuals are those that are
# related to positions. The last two are the residuals associated with u. Ideally, it should be 0.

In [17]:
nn = MLPRegressor(
    hidden_layer_sizes=(8,8),  activation='tanh', solver='lbfgs', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=True, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

nn.fit(positions, residuals)

MLPRegressor(activation='tanh', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(8, 8), learning_rate='constant',
             learning_rate_init=0.01, max_fun=15000, max_iter=1000,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=9, shuffle=True, solver='lbfgs',
             tol=0.0001, validation_fraction=0.01, verbose=True,
             warm_start=False)

In [33]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()

class UnicycleDerived(c.ActionModelAbstract):
    def __init__(self, nn):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.net = nn

    def calc(self, data, x, u=None):
        prediction = self.net.predict(x.reshape(1, -1))
        #print(prediction)
        data.r = a2m(prediction)
        
        data.cost = .5 * sum(m2a(data.r)**2)
        
        cost = 0.5 * np.dot(data.r.T, data.r)
        #print(data.cost, cost)

    def calcDiff(self, data, x, u=None, recalc=True):
        data.Lx = a2m(m2a(x) * (10**2 * self.state.nx))
        print(x, self.state.nx)

        

In [34]:
terminal_model = UnicycleDerived(nn)
x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
T = 30
problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
ddp = c.SolverDDP(problem)
ddp.solve()


[0. 0. 0.] 3
[-2.22635988e-12  1.63229746e+00  2.10885124e-13] 3
[-1.44796527e-11 -1.30492067e+00  1.36533001e-10] 3
[-1.13920925e-09  5.39266846e-01 -1.40149514e-09] 3
[ 5.96197968e-08  3.08742788e-01 -3.67504542e-08] 3
[1.22870764e-07 2.02341754e-01 1.86744536e-07] 3
[-1.11125328e-06  2.28613005e-02 -6.12992802e-07] 3
[5.11660781e-06 4.53388625e-02 2.85815241e-06] 3
[-2.47806045e-05  4.72797117e-02 -1.43201804e-05] 3
[1.21431696e-04 4.86649022e-02 7.12691613e-05] 3
[-0.0005947   0.04911451 -0.00035431] 3
[0.00291264 0.04946682 0.00176134] 3
[-0.014268    0.04957818 -0.0087544 ] 3
[-0.0037465   0.04956094 -0.00222274] 3
[0.00730183 0.04962559 0.00441522] 3
[0.00191781 0.04962504 0.00111999] 3
[-0.00373722  0.04965647 -0.00222588] 3
[0.0017735  0.04966593 0.00109766] 3
[-0.00084235  0.04967558 -0.00054008] 3
[0.00412874 0.04971291 0.00268104] 3
[0.00108274 0.04971277 0.00068271] 3
[-0.0005145   0.04971798 -0.00033557] 3
[0.0025222  0.04973782 0.00166523] 3
[0.00159169 0.04973782 0.0010

False