In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import numpy as np
import pandas as pd

from rocket import Rocket

import random

In [2]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        d = 16
        self.fc1 = nn.Linear(5, d)
        self.fc2 = nn.Linear(d, d)
        self.fc3 = nn.Linear(d, 2)
        
        a = (12/(5+d))**0.5
        nn.init.uniform_(self.fc1.weight, a=-a, b=a)
        
        a = (12/(d+d))**0.5
        nn.init.uniform_(self.fc2.weight, a=-a, b=a)
        
        a = (6/(d+2))**0.5
        nn.init.uniform_(self.fc3.weight, a=-a, b=a)
        
    def forward(self, x):
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        
        return x
    
    def forward_simulate(self, x, z, vx, vz, m, dt = 0.1, g = 1.6229, c1 = 44000, c2 = 311*9.81):
        
        terminated = False
        
        while not terminated:
            #get controls from neural net
            u1, u2 = self.forward([x, z, vx, vz, m])
            
        ds[0] = vx
        ds[1] = vz
        ds[2] = self.c1*(u1/m)*sin(u2)
        ds[3] = self.c1*(u1/m)*cos(u2) - self.g
        
        
        
        
        

In [3]:
net = Net()
print(net)
print(f'numel: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')

Net(
  (fc1): Linear(in_features=5, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=2, bias=True)
)
numel: 402


In [5]:

def apollo_control(x, z, vx, vz, m, g = 1.6229, c1 = 44000, c2 = 311*9.81):

    if abs(vz)<0.001: vz = vz + 0.001

    tgo = (3 * z)/(vz)

    a_x =  -(6/tgo)*vx - (12/tgo**2) * x
    a_z =  -(6/tgo)*vz - (12/tgo**2) * z + g

    u1 = (1/c1)*m*(a_x**2 + a_z**2)**0.5
    u2 = np.arctan2(a_x, a_z)

    u1 = max(min(u1, 1), 0)
    #u2 = max(min(u2, np.pi/8), -np.pi/8)

    return (u1, u2)

In [None]:
def make_train_step(optimal_control, loss_fn, optimizer):
    # Builds function that performs a step in the train loop
    
    def train_step(x):
        # Sets model to TRAIN mode
        model.train()
        # Makes predictions
        yhat = model(x)
        # Computes loss
        loss = loss_fn(y, yhat)
        # Computes gradients
        loss.backward()
        # Updates parameters and zeroes gradients
        optimizer.step()
        optimizer.zero_grad()
        # Returns the loss
        return loss.item()
    
    # Returns the function that will be called inside the train loop
    return train_step

In [None]:
net.zero_grad()

In [None]:
optimizer = optim.SGD(net.parameters(), lr = 0.005, momentum=0.9)
criterion = nn.MSELoss()


train_step = make_train_step(model=net, loss_fn=criterion, optimizer=optimizer)
losses = []
val_losses = []

n_epochs = 10

In [None]:
print("Started Training")
for epoch in range(n_epochs):
    episode_loss_train = 0
    episode_loss_val   = 0
    
    for batch in train_dl:

        state = batch['s']
        control = batch['c']
        
        loss = train_step(state, control)
        
        losses.append(loss)
        episode_loss_train += loss
        
    with torch.no_grad():
        for batch in validate_dl:
            
            net.eval()
            
            state = batch['s']
            control = batch['c']

            c_hat = net(state)
            
            val_loss = criterion(control, c_hat)
            
            val_losses.append(val_loss.item())
            
            episode_loss_val += val_loss.item()
            
            
    print(f"""Epoch: {epoch}, Loss: {sum(losses)/len(losses):.6f}, Val_loss: {sum(val_losses)/len(val_losses):.6f}, Epoch Loss: {episode_loss_train/len(losses):.6f}, Epoch Val Loss: {episode_loss_val/len(val_losses):.6f}""")

print('')

print("********Finished Training*********")