In [1]:
"""
Implementation of Irepa iterations using crocoddyl.

Some terminology:

1: Nominal_crocoddyl: Regular crocoddyl with ActionModelUnicycle running with loss function, as terminal model.
                      This is equivalent to:
                          model = crocoddyl.ActionModelUnicycle()
                          problem = crocoddyl.ShootingProblem(x0.T, [model]*T, model)
                          
2: Terminal_crocoddyl: Crocoddyl after a large number of iterations. This is the model 
                       used to establish convergence. It will correspond to the last irepa run.
                       
3: Running_crocoddyl : Crocodddyl in its ith iteration. If we run irepa for 50 iterations, then all iterations
                       before terminal crocoddyl are considered to be running iterations.

The algorithm is as follows:

    1: Generate dataset from Nominal_crocoddyl. 
    2: Train the neural network on the dataset.
    
    3: Use neural net inside crocoddyl to generate new dataset. This is the 1st Running_crocoddyl
    4: Train the neural net.
    
    Repeat 3, 4 until convergence to Terminal_crocoddyl

Three cases are considered for convergence.

1: similarity to 0              -------------> Similarity of any Running_crocoddyl to Nominal_crocoddyl
2: similarity to asymptote      -------------> Similarity of any Running_crocoddyl to Terminal_crocoddyl
3: similarity to previous iteration  --------> Similarity of nth Running_crocoddyl to (n-1)th Running_crocoddyl

The (dis)similarity measures can be established in two ways.

1: Mean Squared errors 
2: Procustes dissmilarity

"""


import torch 
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from data_generator import Datagen
from terminal_unicycle import FeedforwardUnicycle, ResidualUnicycle
from feedforward_network import FeedForwardNet
from residual_network import ResidualNet

class Irepa:
    """
    Training for irepa iterations
    """
    def _feedforward_net(fc1_dims = 64,
                         fc2_dims = 64):
        
        
        """
        Instantiate and return a feedforward neural network with the given dimensions.
        The activation used is nn.Tanh(), since the network needs to be double differentiable
        
        @params:
            1: fc1_dims      = number of hidden units in the first fully connected layer
            2: fc2_dims      = number of hidden units in the second fully connected layer.
            
        @returns:
            A fully connected feed forward layer
        """
        fnet = FeedForwardNet(fc1_dims=fc1_dims, fc2_dims=fc2_dims)
        return fnet
    
    def _residual_net(residual_dims = 3,
                     fc1_dims = 64,
                     fc2_dims = 64,
                     activation = nn.Tanh()
                     ):
        """
        Instantiate and return a residual network
        
        @params:
            1: residual_dims = number of units in the residual layer. Default 3
            2: fc1_dims      = number of hidden units in the first fully connected layer
            3: fc2_dims      = number of hidden units in the second fully connected layer.
            4: activation    = nn.Tanh().
                                if using Gauss Approximation, then other activation functions can be used
                                    e.g nn.Relu()
                                    
        @returns:
            A residual network
        
        """
        res_net = ResidualNet(residual_dims=residual_dims,
                             fc1_dims=fc1_dims,
                             fc2_dims=fc2_dims,
                             activation=activation)
        
        return res_net
    
    def feed_forward_training(fc1_dims=64,
                              fc2_dims=64,
                              runs = 50,
                              ntraj= 100,
                              lr=1e-3, 
                              batch_size=128,
                              epochs=1000,
                              ):
                
        """
        The main irepa training loop
        @params:
            1: fc1_dims   = Hidden units in layer 1 of feedforward net
            2: fc2_dims   = Hidden units of layer 2
            2: runs       = 50. Number of trainings
            3: ntraj      = 100, number of trajectories used for training
            4: lr         = learning rate
            5: batch_size = 128
            6: epochs     = 1000
            
        @returns:
           trained feedforward neural network
        
        """
        print("Starting Irepa for Feedforward Network.......\n")
        
        # Get training data from nominal crocoddyl
        xtrain, ytrain = Datagen.data(ntrajectories=ntraj)
        
        # Instantiate a feedforward network with the given fc dims
        net = Irepa._feedforward_net(fc1_dims=fc1_dims, fc2_dims=fc2_dims)
        
        # Irepa 0
        net = Irepa._training(net = net,
                              xtrain=xtrain,
                              ytrain=ytrain,
                              lr=lr,
                              batch_size=batch_size,
                              epochs=epochs)
        
        torch.save(net, "net1.pth")
        del xtrain, ytrain
        # main loop
        for i in tqdm(range(runs-1)):
            
            # Generate training data with neural network inside crocoddyl
            terminal_model = FeedforwardUnicycle(net)
            xtrain, ytrain = Datagen.data(terminal_model = terminal_model,
                                          ntrajectories = ntraj)
            
            net            = _training(net, xtrain, ytrain, batch_size, epochs)
            
            if i in [3, 8, 15, 28, 38, 48]:
                torch.save(net, './Fnet/net'+str(i+2)+'.pth')
            
            del terminal_model, xtrain, ytrain
            
        print("Done........")
        
        
        
        
    def residual_net_training(fc1_dims=64,
                              fc2_dims=64,
                              residual_dims = 3,
                              runs = 50,
                              ntraj= 100,
                              lr=1e-3, 
                              batch_size=128,
                              epochs=1000,
                              ):
                
        """
        The main irepa training loop
        @params:
            1: fc1_dims   = Hidden units in layer 1 of feedforward net
            2: fc2_dims   = Hidden units of layer 2
            3: residual_dims  = 3 Hidden units of residual layer
            4: runs       = 50. Number of trainings
            5: ntraj      = 100, number of trajectories used for training
            6: lr         = learning rate
            7: batch_size = 128
            8: epochs     = 1000
            
        @returns:
           trained feedforward neural network
        
        """
        print("Starting Irepa for Residual Network.......\n")
        
        # Get training data from nominal crocoddyl
        xtrain, ytrain = Datagen.data(ntrajectories=ntraj)
        
        # Instantiate a feedforward network with the given fc dims
        net = Irepa._residual_net(fc1_dims = fc1_dims,
                                  fc2_dims = fc2_dims,
                                  residual_dims = residual_dims)
        
        # Irepa 0
        net = Irepa._training(net = net,
                              xtrain=xtrain,
                              ytrain=ytrain,
                              lr=lr,
                              batch_size=batch_size,
                              epochs=epochs)
        
        torch.save(net, "net1.pth")
        del xtrain, ytrain
        # main loop
        for i in tqdm(range(runs-1)):
            
            # Generate training data with neural network inside crocoddyl
            terminal_model = ResidualUnicycle(net)
            xtrain, ytrain = Datagen.data(terminal_model = terminal_model,
                                          ntrajectories = ntraj)
            net            = _training(net,
                                       xtrain,
                                       ytrain,
                                       batch_size,
                                       epochs)
            
            if i in [3, 8, 15, 28, 38, 48]:
                torch.save(net, './Rnet/net'+str(i+2)+'.pth')
            
            del terminal_model, xtrain, ytrain
            
        print("Done........")
        

        
    def _training( net, xtrain, ytrain,lr, batch_size, epochs):
        """
        @params:
            1: net = neural net to be trained
            2: xtrain, ytrain = dataset
            
        @returns:
            1: trained neural network
            
        """
        # Convert to torch dataloader
        dataset = torch.utils.data.TensorDataset(xtrain, ytrain)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size)
        
        criterion = torch.nn.MSELoss(reduction='sum')
        optimizer = torch.optim.Adam(net.parameters(), lr = lr)  
        
        net.float()
        net.train()
        
        for epoch in tqdm(range(1000)):        
            for data, target in dataloader: 

                output = net(data)
                loss = criterion(output, target)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
        return net

In [2]:
Irepa.feed_forward_training()

  0%|          | 2/1000 [00:00<00:55, 18.04it/s]

Starting Irepa for Feedforward Network.......

Sampling x, y, z from: 
 
  x = [-2.1, 2.1] 

  y = [-2.1, 2.1]

  z = [-6.283185307179586, 6.283185307179586]



100%|██████████| 1000/1000 [00:42<00:00, 23.28it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

Sampling x, y, z from: 
 
  x = [-2.1, 2.1] 

  y = [-2.1, 2.1]

  z = [-6.283185307179586, 6.283185307179586]






ArgumentError: Python argument types in
    ShootingProblem.__init__(ShootingProblem, numpy.ndarray, list, FeedForwardNet)
did not match C++ signature:
    __init__(_object* self, Eigen::Matrix<double, -1, 1, 0, -1, 1> initialState, std::vector<boost::shared_ptr<crocoddyl::ActionModelAbstractTpl<double> >, std::allocator<boost::shared_ptr<crocoddyl::ActionModelAbstractTpl<double> > > > runningModels, boost::shared_ptr<crocoddyl::ActionModelAbstractTpl<double> > terminalModel)