In [1]:
from time import time
import json
import numpy as np
import os
from stable_baselines3 import TD3 as sb3TD3
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from IPython.display import clear_output
import matplotlib.pyplot as plt
import copy
from hyperopt import fmin, tpe, hp
import pickle
# simple stopwatch to time whatevs, in (float) seconds
# keeps track of laps along with final time
class Stopwatch:
    def __init__(self):
            self.start_time = time()
            self.last_time = self.start_time
            self.laps = []
    def lap(self):
        this_time = time()
        delta_time = this_time - self.last_time
        self.laps.append(delta_time)
        self.last_time = this_time
        return delta_time
    def stop(self):
        self.stop_time = time()
        self.delta_time = self.stop_time - self.start_time
        return self.delta_time

In [2]:
nLayers = 3
nNodes = 2**5
learning_rate = 10**(int(-1*3))#params['learning_rate']))
weight_decay = 10**(int(-1*6))
eps = 1e-8#10**(int(-1*int(params['eps'])))
patience = 10#int(params['patience'])

activation_fn = nn.ReLU
with_bias = True
input_dim = 212
net_arch = [nNodes for _ in range(nLayers)]
output_dim = 3

modules = []
if nLayers == 0:
    modules.append(nn.Linear(input_dim, output_dim, bias=with_bias))
else:
    modules.append(nn.Linear(input_dim, net_arch[0], bias=with_bias))
    modules.append(activation_fn())
    for idx in range(len(net_arch) - 1):
        modules.append(nn.Linear(net_arch[idx], net_arch[idx + 1], bias=with_bias))
        modules.append(activation_fn())
    modules.append(nn.Linear(net_arch[-1], output_dim, bias=with_bias))
modules.append(nn.Tanh())
actor = nn.Sequential(*modules)
device = torch.device("cuda")
actor.cuda()
actor_weights = torch.load('actor_pretrained_weights_vert.pt')
print(actor)
actor.load_state_dict(actor_weights)

Sequential(
  (0): Linear(in_features=212, out_features=32, bias=True)
  (1): ReLU()
  (2): Linear(in_features=32, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=3, bias=True)
  (7): Tanh()
)


<All keys matched successfully>

In [3]:
nLayers = 3
nNodes = 2**5
learning_rate = 10**(int(-1*3))#params['learning_rate']))
weight_decay = 10**(int(-1*6))
eps = 1e-8#10**(int(-1*int(params['eps'])))
patience = 10#int(params['patience'])

activation_fn = nn.ReLU
with_bias = True
input_dim = 215
net_arch = [nNodes for _ in range(nLayers)]
output_dim = 1

modules = []
if nLayers == 0:
    modules.append(nn.Linear(input_dim, output_dim, bias=with_bias))
else:
    modules.append(nn.Linear(input_dim, net_arch[0], bias=with_bias))
    modules.append(activation_fn())
    for idx in range(len(net_arch) - 1):
        modules.append(nn.Linear(net_arch[idx], net_arch[idx + 1], bias=with_bias))
        modules.append(activation_fn())
    modules.append(nn.Linear(net_arch[-1], output_dim, bias=with_bias))
critic = nn.Sequential(*modules)
device = torch.device("cuda")
critic.cuda()
critic_weights = torch.load('critic_pretrained_weights_vert.pt')
print(critic)
critic.load_state_dict(critic_weights)

Sequential(
  (0): Linear(in_features=215, out_features=32, bias=True)
  (1): ReLU()
  (2): Linear(in_features=32, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=1, bias=True)
)


<All keys matched successfully>

In [5]:
data_dir = 'data_alpha/' # vertical, 25x25 forward and belly depths
observations = {}
files = os.listdir(data_dir)
for fname in files:
    fpath = data_dir + fname
    if 'observations' in fname:
        observation_set = np.load(fpath, allow_pickle=True)
        observations.update(observation_set)
Xs = {
    'train':[],
    'val':[],
    'test':[],
}
Ys = {
    'train':[],
    'val':[],
    'test':[],
}
XYs = {
    'train':[],
    'val':[],
    'test':[],
}
Qs = {
    'train':[],
    'val':[],
    'test':[],
}
nSteps = []
for fname in files:
    fpath = data_dir + fname
    if 'states' in fname:
        jdict = json.load(open(fpath, 'r'))
        for episode in jdict:
            gfile = jdict[episode]['step_0']['fname']
            if 'part2' in gfile or 'part3' in gfile or 'horizontal' in gfile:
                continue
            if 'train' in gfile:
                batch = 'train'
            if 'val' in gfile:
                batch = 'val'
            if 'test' in gfile:
                batch = 'test'
            nSteps.append(len(jdict[episode])-1)
            start = np.array(jdict[episode]['step_0']['drone_position'], dtype=float)
            goal = np.array(jdict[episode]['step_0']['goal_position'], dtype=float)
            last_distance = np.linalg.norm(goal-start)
            rs = []
            for step in jdict[episode]:
                if step == 'step_0':
                    continue
                state = jdict[episode][step]
                actions = state['rl_output']
                observation_name = state['observation_name']
                observation = observations[observation_name]
                Xs[batch].append(list(observation))
                Ys[batch].append(list(np.clip(actions, -1, 1)))
                XYs[batch].append(list(observation) + list(np.clip(actions, -1, 1)))
                drone = np.array(state['drone_position'], dtype=float)
                this_distance = np.linalg.norm(goal-drone)
                delta_distance = last_distance - this_distance
                last_distance = this_distance
                if this_distance <= 4:
                    r = 100
                else:
                    r = .1*np.tanh(delta_distance) - 1
                rs.append(r)
                if this_distance <= 4:
                    break
            qs = [100]
            gamma = 0.99
            for i in range(len(rs)-2,-1,-1):
                qs.append(rs[i] + gamma * qs[-1])
            qs = qs[::-1]
            Qs[batch] = Qs[batch] + qs
for batch in Xs:
    Xs[batch] = np.array(Xs[batch])
for batch in Ys:
    Ys[batch] = np.array(Ys[batch])
for batch in XYs:
    XYs[batch] = np.array(XYs[batch])
for batch in Qs:
    Qs[batch] = np.array(Qs[batch])

In [6]:
class DatasetActor(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.as_tensor(X.copy(), device = torch.device("cuda")).float()
        self.y = torch.as_tensor(y.copy(), device = torch.device("cuda")).float()

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
training_set_actor = DatasetActor(Xs['train'], Ys['train'])
validation_set_actor = DatasetActor(Xs['val'], Ys['val'])
testing_set_actor = DatasetActor(Xs['test'], Ys['test'])

batch_size = 32
params = {'batch_size': batch_size,
          'shuffle': True,}
training_loader_actor = torch.utils.data.DataLoader(training_set_actor, **params)
params = {'batch_size': batch_size,
          'shuffle': False,}
validation_loader_actor = torch.utils.data.DataLoader(validation_set_actor, **params)
testing_loader_actor = torch.utils.data.DataLoader(testing_set_actor, **params)

In [7]:
class DatasetCritic(torch.utils.data.Dataset):
    def __init__(self, Xy, q):
        self.Xy = torch.as_tensor(Xy.copy(), device = torch.device("cuda")).float()
        self.q = torch.as_tensor(q.copy(), device = torch.device("cuda")).float()

    def __len__(self):
        return len(self.q)

    def __getitem__(self, index):
        return self.Xy[index], self.q[index]
    
training_set_critic = DatasetCritic(XYs['train'], Qs['train'])
validation_set_critic = DatasetCritic(XYs['val'], Qs['val'])
testing_set_critic = DatasetCritic(XYs['test'], Qs['test'])

batch_size = 32
params = {'batch_size': batch_size,
          'shuffle': True,}
training_loader_critic = torch.utils.data.DataLoader(training_set_critic, **params)
params = {'batch_size': batch_size,
          'shuffle': False,}
validation_loader_critic = torch.utils.data.DataLoader(validation_set_critic, **params)
testing_loader_critic = torch.utils.data.DataLoader(testing_set_critic, **params)

In [8]:
x, y = next(iter(testing_loader_actor))
xy, q = next(iter(testing_loader_critic))

In [9]:
sb3model = sb3TD3.load('model_in_vert.zip')
device = torch.device("cuda")
sb3model.actor.cuda()
sb3model.critic.cuda()
print(sb3model.actor)
print(sb3model.critic)

Actor(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mu): Sequential(
    (0): Linear(in_features=212, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=3, bias=True)
    (7): Tanh()
  )
)
ContinuousCritic(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (qf0): Sequential(
    (0): Linear(in_features=215, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=1, bias=True)
  )
  (qf1): Sequential(
    (0): Linear(in_features=215, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=3

In [10]:
sb3model.actor(x[:2])

tensor([[ 0.1346,  0.1769, -0.0628],
        [ 0.1363,  0.1771, -0.0683]], device='cuda:0', grad_fn=<TanhBackward0>)

In [11]:
for param in sb3model.actor.parameters():
    print(param)
    break

Parameter containing:
tensor([[-0.0393, -0.0311,  0.0631,  ..., -0.0161,  0.0468,  0.0004],
        [ 0.0546,  0.0410, -0.0172,  ..., -0.0003,  0.0072, -0.0159],
        [ 0.0151, -0.0262,  0.0030,  ...,  0.0595,  0.0543, -0.0517],
        ...,
        [-0.0566, -0.0611, -0.0204,  ...,  0.0665,  0.0220,  0.0391],
        [-0.0367,  0.0551, -0.0341,  ...,  0.0238, -0.0165, -0.0522],
        [ 0.0417, -0.0006, -0.0013,  ...,  0.0217, -0.0591, -0.0211]],
       device='cuda:0', requires_grad=True)


In [12]:
sb3model.actor.mu = copy.deepcopy(actor)
sb3model.actor_target.mu = copy.deepcopy(actor)

In [13]:
sb3model.actor(x[:2])

tensor([[ 0.9470,  0.0678, -0.0264],
        [ 0.9124,  0.0496, -0.0215]], device='cuda:0', grad_fn=<TanhBackward0>)

In [14]:
for param in sb3model.actor.parameters():
    print(param)
    break

Parameter containing:
tensor([[ 3.1955e-01,  5.1998e-02,  1.4502e-01,  ..., -1.1753e-03,
          1.4321e-02, -2.2154e-02],
        [ 3.7388e-02,  2.9288e-01,  3.4823e-01,  ..., -7.2124e-02,
          8.0031e-03, -6.9549e-02],
        [ 1.0586e+00, -9.8386e-01, -7.7886e-01,  ...,  4.1957e-02,
         -7.5198e-02, -3.1768e-02],
        ...,
        [-1.9157e-01,  6.1842e-01, -3.8841e-01,  ...,  5.6180e-02,
          1.9713e-02, -1.7462e-02],
        [-1.9312e-02, -4.9004e-02,  9.4241e-03,  ...,  8.4671e-03,
         -4.4216e-02, -4.6297e-02],
        [ 2.3181e-01,  1.2013e+00, -5.1635e-01,  ..., -4.4961e-02,
          8.9984e-02, -6.8418e-02]], device='cuda:0', requires_grad=True)


In [15]:
for param in actor.parameters():
    print(param)
    break

Parameter containing:
tensor([[ 3.1955e-01,  5.1998e-02,  1.4502e-01,  ..., -1.1753e-03,
          1.4321e-02, -2.2154e-02],
        [ 3.7388e-02,  2.9288e-01,  3.4823e-01,  ..., -7.2124e-02,
          8.0031e-03, -6.9549e-02],
        [ 1.0586e+00, -9.8386e-01, -7.7886e-01,  ...,  4.1957e-02,
         -7.5198e-02, -3.1768e-02],
        ...,
        [-1.9157e-01,  6.1842e-01, -3.8841e-01,  ...,  5.6180e-02,
          1.9713e-02, -1.7462e-02],
        [-1.9312e-02, -4.9004e-02,  9.4241e-03,  ...,  8.4671e-03,
         -4.4216e-02, -4.6297e-02],
        [ 2.3181e-01,  1.2013e+00, -5.1635e-01,  ..., -4.4961e-02,
          8.9984e-02, -6.8418e-02]], device='cuda:0', requires_grad=True)


In [16]:
sb3model.critic(x[:2], y[:2])

(tensor([[0.0210],
         [0.0246]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[0.1040],
         [0.1057]], device='cuda:0', grad_fn=<AddmmBackward0>))

In [17]:
for param in sb3model.critic.parameters():
    print(param)
    break

Parameter containing:
tensor([[ 0.0677, -0.0571, -0.0417,  ...,  0.0357,  0.0276, -0.0557],
        [ 0.0381, -0.0370,  0.0434,  ...,  0.0393, -0.0378,  0.0665],
        [-0.0577, -0.0059, -0.0248,  ..., -0.0058,  0.0107, -0.0413],
        ...,
        [-0.0411, -0.0607,  0.0116,  ..., -0.0578,  0.0355, -0.0592],
        [ 0.0656, -0.0486, -0.0257,  ..., -0.0416, -0.0397,  0.0562],
        [ 0.0471,  0.0189,  0.0533,  ...,  0.0140,  0.0263,  0.0499]],
       device='cuda:0', requires_grad=True)


In [18]:
for module in sb3model.critic.modules():
    del module
for module in sb3model.critic_target.modules():
    del module
for idx in range(len(sb3model.critic.q_networks)):
    q_net = copy.deepcopy(critic)
    sb3model.critic.q_networks[idx] = q_net
    sb3model.critic.add_module(f"qf{idx}", q_net)
    q_net_target = copy.deepcopy(critic)
    sb3model.critic_target.q_networks[idx] = q_net_target
    sb3model.critic_target.add_module(f"qf{idx}", q_net_target)

In [19]:
sb3model.critic(x[:2], y[:2])

(tensor([[92.7972],
         [92.5767]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[92.7972],
         [92.5767]], device='cuda:0', grad_fn=<AddmmBackward0>))

In [20]:
for param in sb3model.critic.parameters():
    print(param)
    break

Parameter containing:
tensor([[-0.0234, -0.0487, -0.0440,  ...,  0.0581,  0.0471,  0.0280],
        [-0.0632, -0.0573,  0.0300,  ...,  0.0252, -0.0303,  0.0332],
        [ 0.0676,  0.0391,  0.0522,  ...,  0.0023, -0.0139, -0.0637],
        ...,
        [-0.0788,  0.0281,  0.0034,  ..., -0.0360, -0.0607,  0.0577],
        [ 0.0094, -0.0140,  0.0473,  ..., -0.0294, -0.0517,  0.0499],
        [-0.0184,  0.0007,  0.0687,  ..., -0.0132,  0.0398, -0.0273]],
       device='cuda:0', requires_grad=True)


In [21]:
for param in critic.parameters():
    print(param)
    break

Parameter containing:
tensor([[-0.0234, -0.0487, -0.0440,  ...,  0.0581,  0.0471,  0.0280],
        [-0.0632, -0.0573,  0.0300,  ...,  0.0252, -0.0303,  0.0332],
        [ 0.0676,  0.0391,  0.0522,  ...,  0.0023, -0.0139, -0.0637],
        ...,
        [-0.0788,  0.0281,  0.0034,  ..., -0.0360, -0.0607,  0.0577],
        [ 0.0094, -0.0140,  0.0473,  ..., -0.0294, -0.0517,  0.0499],
        [-0.0184,  0.0007,  0.0687,  ..., -0.0132,  0.0398, -0.0273]],
       device='cuda:0', requires_grad=True)


In [22]:
sb3model.save('model_out_vert.zip')