In [1]:
import torch

from stable_baselines3.ppo import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecVideoRecorder

from cppo import CPPO_Policy
from env import SlidingAntEnv

from env import SlidingAntEnv
from utils import WeightLogger, AgesLogger, SlidingEval
from sb3_logger import configure_logger, WandbOutputFormat

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stable_baselines3.common.evaluation import evaluate_policy

In [3]:
import multiprocessing as mp
import numpy as np

In [40]:
input_queue = mp.Queue()
output_queue = mp.Queue()
done = mp.Value('b', False)

In [41]:
def eval_loop(policy_cls, settings, n_eval_episodes, deterministic, max_steps, input_queue, output_queue, done_flag):
    env  = make_vec_env(SlidingAntEnv, 1, env_kwargs={'change_steps':np.inf, 'max_steps':max_steps})
    env.reset()
    model = PPO(policy_cls, env, **settings)
    while not done_flag.value:
        timestep, sd, friction = input_queue.get()
        env.env_method('set_friction', friction)
        for name, tensor in sd.items():
            sd[name] = tensor.cuda()
        model.policy.load_state_dict(sd)
        episode_rewards, episode_lengths = evaluate_policy(
            model,
            env,
            n_eval_episodes=n_eval_episodes,
            deterministic=deterministic,
            return_episode_rewards=True
        )
        output_queue.put((float(np.mean(episode_rewards)), np.mean(episode_lengths), timestep))
    env.close()

In [42]:
sd = ppo.policy.state_dict()

In [43]:
for name, tensor in sd.items():
    sd[name] = tensor.cpu()

In [None]:
mp.Process(target=eval_loop, args=)

In [48]:
{'policy_kwargs': ppo.policy_kwargs}

{'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}],
  'activation_fn': torch.nn.modules.activation.Tanh,
  'optimizer_kwargs': {'betas': [0.9, 0.999]}}}

In [44]:
input_queue.put((ppo.num_timesteps, sd, ppo.env.get_attr('friction', 0)[0]))

In [45]:
eval_loop(learner_class, settings, 1, True, 2000, input_queue, output_queue, done)

In [46]:
while not output_queue.empty():
    print(output_queue.get_nowait())

(10.19812, 21.0, 12288)


In [108]:
ppo.

In [6]:
env = make_vec_env(SlidingAntEnv, 8, seed=42, env_kwargs={'change_steps':10000, 'max_steps':2000, 'seed':42}) #, vec_env_cls=SubprocVecEnv

In [7]:
learner_class = CPPO_Policy #'MlpPolicy' #

In [8]:
settings = {
    'n_steps': 4096//8,
    'n_epochs': 10,
    'batch_size': 128,
    'gae_lambda': 0.95,
    'gamma': 0.99,
    'clip_range': 0.2,
    'learning_rate': 1e-4,
    'tensorboard_log': './speed/tensorboard',
    'device': 'cuda',
    'policy_kwargs': {
      'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}],
      'activation_fn': torch.nn.Tanh,
      'optimizer_kwargs': {
        'betas': [0.9, 0.999]
      }
    }
}

In [9]:
ppo = PPO(learner_class, env, seed=42, **settings)

In [10]:
callbacks = [WeightLogger(), AgesLogger('./speed/ages')] #, SlidingEval(max_steps=2000, n_eval_episodes=1, deterministic=True)

In [11]:
%%time
ppo.learn(total_timesteps=10000, callback=callbacks, tb_log_name='test')

CPU times: total: 32.6 s
Wall time: 34.4 s


<stable_baselines3.ppo.ppo.PPO at 0x2a811814e50>

In [7]:
import cProfile

In [8]:
cProfile.run("ppo.learn(total_timesteps=10000, callback=callbacks, tb_log_name='test')", './speed/cppo_stats')

In [237]:
env.close()

In [10]:
import pstats
from pstats import SortKey

In [11]:
ppo_stats = pstats.Stats('./speed/ppo_stats')
cppo_stats = pstats.Stats('./speed/cppo_stats')

In [14]:
ppo_stats.print_stats('collect_rollouts'), cppo_stats.print_stats('collect_rollouts')

Sat Nov 26 09:48:46 2022    ./speed/ppo_stats

         5969307 function calls (5692247 primitive calls) in 41.532 seconds

   Random listing order was used
   List reduced from 693 to 1 due to restriction <'collect_rollouts'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.169    0.169   22.793   22.793 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py:132(collect_rollouts)


Sat Nov 26 09:56:35 2022    ./speed/cppo_stats

         7615004 function calls (7248344 primitive calls) in 64.962 seconds

   Random listing order was used
   List reduced from 756 to 1 due to restriction <'collect_rollouts'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.170    0.170   23.214   23.214 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py:132(collect_rollouts)




(<pstats.Stats at 0x21b072be640>, <pstats.Stats at 0x21b33d43eb0>)

In [15]:
ppo_stats.print_stats('step'), cppo_stats.print_stats('step')

Sat Nov 26 09:48:46 2022    ./speed/ppo_stats

         5969307 function calls (5692247 primitive calls) in 41.532 seconds

   Random listing order was used
   List reduced from 693 to 16 due to restriction <'step'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     4136    0.021    0.000   11.392    0.003 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\base_vec_env.py:154(step)
       40    0.000    0.000    0.000    0.000 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py:38(step_async)
       40    0.000    0.000    0.049    0.001 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py:41(step_wait)
     2560    0.095    0.000    3.815    0.001 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\torch\optim\adam.py:105(step)
16384/4096    0.028    0.000    0.036    0.000 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\st

(<pstats.Stats at 0x21b072be640>, <pstats.Stats at 0x21b33d43eb0>)

In [9]:
from copy import deepcopy

In [10]:
buff = deepcopy(ppo.rollout_buffer)

In [11]:
class DummyLogger():
    def record(self, *args, **kwargs):
        pass

In [72]:
import torch
from torch import Tensor
from typing import List, Optional
from torch.optim import Adam


class CAdam(Adam):
    r"""
    Barebone Adam adaptation with 'step' parameter for each weight in a parameter Tensor, instead of one number for the whole Tensor
    """

    @torch.no_grad()
    def step(self, closure=None):
        """Performs a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        self._cuda_graph_capture_health_check()

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            params_with_grad = []
            grads = []
            exp_avgs = []
            exp_avg_sqs = []
            max_exp_avg_sqs = []
            state_steps = []
            beta1, beta2 = group['betas']

            for p in group['params']:
                if p.grad is not None:
                    params_with_grad.append(p)
                    if p.grad.is_sparse:
                        raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
                    grads.append(p.grad)

                    state = self.state[p]
                    # Lazy state initialization
                    if len(state) == 0:
                        ### CHANGED ###
                        #state['step'] = torch.zeros((1,), dtype=torch.float, device=p.device) \
                        #    if self.defaults['capturable'] else torch.tensor(0.)
                        state['step'] = torch.zeros_like(p, memory_format=torch.preserve_format, device=p.device)
                        ###############
                        
                        
                        # Exponential moving average of gradient values
                        state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format, device=p.device)
                        # Exponential moving average of squared gradient values
                        state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format, device=p.device)
                        if group['amsgrad']:
                            # Maintains max of all exp. moving avg. of sq. grad. values
                            state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)

                    exp_avgs.append(state['exp_avg'])
                    exp_avg_sqs.append(state['exp_avg_sq'])

                    if group['amsgrad']:
                        max_exp_avg_sqs.append(state['max_exp_avg_sq'])

                    state_steps.append(state['step'])

            cadam(params_with_grad,
                 grads,
                 exp_avgs,
                 exp_avg_sqs,
                 max_exp_avg_sqs,
                 state_steps,
                 amsgrad=group['amsgrad'],
                 beta1=beta1,
                 beta2=beta2,
                 lr=group['lr'],
                 weight_decay=group['weight_decay'],
                 eps=group['eps'],
                 maximize=group['maximize'],
                 foreach=group['foreach'],
                 capturable=group['capturable'])

        return loss


def cadam(params: List[Tensor],
         grads: List[Tensor],
         exp_avgs: List[Tensor],
         exp_avg_sqs: List[Tensor],
         max_exp_avg_sqs: List[Tensor],
         state_steps: List[Tensor],
         # kwonly args with defaults are not supported by functions compiled with torchscript issue #70627
         # setting this as kwarg for now as functional API is compiled by torch/distributed/optim
         foreach: bool = None,
         capturable: bool = False,
         *,
         amsgrad: bool,
         beta1: float,
         beta2: float,
         lr: float,
         weight_decay: float,
         eps: float,
         maximize: bool):
    r"""Functional API that performs Adam algorithm computation.
    See :class:`~torch.optim.Adam` for details.
    """

    if not all([isinstance(t, torch.Tensor) for t in state_steps]):
        raise RuntimeError("API has changed, `state_steps` argument must contain a list of singleton tensors")

    if foreach is None:
        # Placeholder for more complex foreach logic to be added when value is not set
        foreach = False

    if foreach and torch.jit.is_scripting():
        raise RuntimeError('torch.jit.script not supported with foreach optimizers')

    if foreach and not torch.jit.is_scripting():
        ### CHANGED ###
        #func = _multi_tensor_adam
        raise NotImplementedError()
        ###############
    else:
        func = _single_tensor_cadam

    func(params,
         grads,
         exp_avgs,
         exp_avg_sqs,
         max_exp_avg_sqs,
         state_steps,
         amsgrad=amsgrad,
         beta1=beta1,
         beta2=beta2,
         lr=lr,
         weight_decay=weight_decay,
         eps=eps,
         maximize=maximize,
         capturable=capturable)

@torch.jit.script
def _cadam_calcs(param, grad, exp_avg, exp_avg_sq, step, beta1: float, beta2: float, lr: float, eps: float, weight_decay: float):
    step.add_(1)
    if weight_decay != 0:
        grad = grad.add(param, alpha=weight_decay)
        
    exp_avg = exp_avg.mul(beta1).add(grad, alpha=1 - beta1)
    exp_avg_sq = exp_avg_sq.mul(beta2).addcmul(grad, grad.conj(), value=1 - beta2)
    bias_correction1 = 1 - beta1 ** step
    bias_correction2 = 1 - beta2 ** step
    step_size = lr / bias_correction1
    bias_correction2_sqrt = bias_correction2.sqrt()
    denom = (exp_avg_sq.sqrt() / bias_correction2_sqrt).add(eps)
    return (exp_avg / denom).mul(step_size)

def _single_tensor_cadam(params: List[Tensor],
                        grads: List[Tensor],
                        exp_avgs: List[Tensor],
                        exp_avg_sqs: List[Tensor],
                        max_exp_avg_sqs: List[Tensor],
                        state_steps: List[Tensor],
                        *,
                        amsgrad: bool,
                        beta1: float,
                        beta2: float,
                        lr: float,
                        weight_decay: float,
                        eps: float,
                        maximize: bool,
                        capturable: bool):
    
    for i, param in enumerate(params):

        grad = grads[i] if not maximize else -grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        step_t = state_steps[i]

        if capturable:
            assert param.is_cuda and step_t.is_cuda, "If capturable=True, params and state_steps must be CUDA tensors."
            
        param.sub_(_cadam_calcs(param, grad, exp_avg, exp_avg_sq, step_t, beta1, beta2, lr, eps, weight_decay))

In [232]:
from typing import Dict, Callable, Tuple
import torch
import math
from stable_baselines3.common.utils import get_device

@torch.jit.script
def _hook_calcs(cbp_vals: Dict[str, torch.Tensor], out: torch.Tensor, eta: torch.Tensor):
    # NOTE Seems CBP is only described for sequential input with gradient updates at each step.
    #      Since PPO is based on batched environment data, changes have to be made
    #      I will therefore work with means over the baches
    cbp_vals['age'].add_(1)
    cbp_vals['h'] = out.mean(0).detach_()
    cbp_vals['fhat'] = cbp_vals['f'] / (1 - eta**cbp_vals['age'])
    cbp_vals['f'].mul_(eta).add_((1-eta)*cbp_vals['h'])
    
@torch.jit.ignore
def sample_weights(size: Tuple[int, int], device: torch.device):
    sample = torch.empty(size, device=device)
    torch.nn.init.kaiming_uniform_(sample, a=math.sqrt(5))
    return sample
    
@torch.jit.script
def _step_calcs(cbp_vals: Dict[str, torch.Tensor],
                pre_state: Dict[str, torch.Tensor],
                post_state: Dict[str, torch.Tensor],
                pre_linear: torch.nn.Parameter,
                post_linear: torch.nn.Parameter,
                eta: float, m: int, rho: float, eps: float,
                #sample_weights: torch.jit.ScriptFunction # callables not supported jet
               ):
    pre_w = pre_linear.abs().sum(1).detach_().add_(eps) # avoid division by zero
    post_w = post_linear.abs().sum(0).detach_()
    
    y = (cbp_vals['h'] - cbp_vals['fhat']).abs_().mul_(post_w).div_(pre_w)
    cbp_vals['u'].mul_(eta).add_((1-eta)*y)
    
    uhat = cbp_vals['u'] / (1 - eta**cbp_vals['age'])
    
    eligible = cbp_vals['age'] > m
    if eligible.any() and torch.rand(1) < len(uhat)*rho:  # use n_l* rho as a probability of replacing a single feature
        ascending = uhat.argsort()
        r = ascending[eligible[ascending]]   # sort eligible indices according to their utility
        #r = r[:math.ceil(uhat.shape[0]*self.rho)]  # choose top k worst performing features    # using ceil because otherwise nothing ever gets reset int(256*10**-4)=0
        r = r[[0]]  # choose the worst feature
        
        pre_linear.index_copy_(0, r, sample_weights((len(r), pre_linear.shape[1]), device=pre_linear.device))
        post_linear.index_fill_(1, r, 0.)
        
        cbp_vals['u'].index_fill_(0, r, 0.)
        cbp_vals['f'].index_fill_(0, r, 0.)
        cbp_vals['age'].index_fill_(0, r, 0)
        
        ### Adam resets
        pre_state['step'].index_fill_(0, r, 0)
        pre_state['exp_avg'].index_fill_(0, r, 0.)
        pre_state['exp_avg_sq'].index_fill_(0, r, 0.)
        
        post_state['step'].index_fill_(1, r, 0)
        post_state['exp_avg'].index_fill_(1, r, 0.)
        post_state['exp_avg_sq'].index_fill_(1, r, 0.)

class CBP(CAdam):
    '''
    Open questions:
        How should batches be dealth with?
            For now I calculate the mean over the batch and handle that like in the sequential case
        How many features are actually replaced every iteration? Their n_l and rho don't seem to work, as 256 * 10**-4 < 1. Is this supposed to be a probability?
            # For now math.ceil is used, so every iteration 1 usit is replaced. This doesn't make sense, since when n_l < m then the features are just replaced in order as they mature.
            Changed to using n_l * rho as a probability of replacing the worst performing feature
    '''
    def __init__(self,
                 params,                 # all parameters to be optimized by Adam
                 linear_layers,          # List[List[Linear]], a list of linearities for each separate network (policy, value, ...), in the order they are executed
                 activation_layers,      # List[List[Activation]], a list of activation layers for each separate network (policy, value, ...), in the order they are executed. Forward hooks are added to these
                 output_linears,         # List[Linear], a list of each network's last Linear layer
                 eta=0.99,               # running average discount factor
                 m=int(5e3),             # maturity threshold, only features with age > m are elligible to be replaced
                 rho=10**-4,             # replacement rate, controls how frequently features are replaced                                                    # TODO: change description
                 sample_weights=None,    # functiion, take size and device as input and return a tensor of the given size with newly initialized weights
                 eps=1e-8,               # small additive value to avoid division by zero
                 device = 'auto',
                 **kwargs):
        super(CBP, self).__init__(params, **kwargs)
        self.linear_layers = linear_layers
        self.activation_layers = activation_layers
        self.cbp_vals = {}
        self.output_linears = output_linears
        self.eta = eta
        self.m = m
        self.rho = rho
        
        self.dev = get_device(device)
        
        assert len(self.linear_layers) == len(self.activation_layers)
        for linears, activations in zip(self.linear_layers, self.activation_layers):
            self._add_hooks(linears, activations)
        
        if sample_weights is None:
            def sample_weights(size, device):
                sample = torch.empty(size, device=device)
                torch.nn.init.kaiming_uniform_(sample, a=math.sqrt(5))
                return sample
        self.sample_weights = sample_weights
        self.eps = eps
    
    @torch.no_grad()
    def step(self):
        super(CBP, self).step()
        for linears, output_linear in zip(self.linear_layers, self.output_linears): # cycle through models
            for current_linear, next_linear in zip(linears, linears[1:] + [output_linear]): # cycle through layers
                cbp_vals = self.cbp_vals[current_linear]
                pre_state = self.state[current_linear.weight]
                post_state = self.state[next_linear.weight]
                
                _step_calcs(cbp_vals, pre_state, post_state, current_linear.weight, next_linear.weight, self.eta, self.m, self.rho, self.eps) #self.sample_weights)
        
    def _hook_gen(self, linear_layer):
        num_units = linear_layer.weight.shape[0]
        self.cbp_vals[linear_layer] = {
            'age':  torch.zeros(num_units, dtype=int, device=self.dev), 
            'h':    torch.zeros(num_units, device=self.dev),
            'f':    torch.zeros(num_units, device=self.dev),
            'fhat': torch.zeros(num_units, device=self.dev),
            'u':    torch.zeros(num_units, device=self.dev)
        }
        
        def hook(mod, inp, out):
            if mod.training:
                cbp_vals = self.cbp_vals[linear_layer]
                with torch.no_grad():
                    _hook_calcs(cbp_vals, out, self.eta)
        return hook
    
    def _add_hooks(self, linears, activations):
        assert len(linears) == len(activations
                                  )
        for lin, act in zip(linears, activations):
            act.register_forward_hook(self._hook_gen(lin))

In [233]:
from stable_baselines3.ppo.policies import ActorCriticPolicy

class CPPO_Policy(ActorCriticPolicy):
    def __init__(
        self,
        observation_space,
        action_space,
        lr_schedule,
        **kwargs
    ):
        super(CPPO_Policy, self).__init__(observation_space, action_space, lr_schedule, **(kwargs|{'optimizer_kwargs':{}})) # remove optimizer_kwargs, as __init__ initializes Adam with them, which throws errors
        self.optimizer_kwargs = kwargs['optimizer_kwargs']
        assert len(self.mlp_extractor.shared_net) == 0, 'no shared layers between policy and value function allowed' # not used in the paper, might try to implement it later
        
        policy_linears, policy_activations = self._handle_sequential(self.mlp_extractor.policy_net)
        value_linears, value_activations = self._handle_sequential(self.mlp_extractor.value_net)
        
        self.optimizer = CBP(self.parameters(),
                             linear_layers=[policy_linears, value_linears],
                             activation_layers=[policy_activations, value_activations],
                             output_linears=[self.action_net, self.value_net],
                             lr=lr_schedule(1),
                             **self.optimizer_kwargs)
    
    def _handle_sequential(self, sequential):
        linears = []
        activations = []
        for i, layer in enumerate(sequential):
            if i%2 == 0: # Linear Layer
                linears.append(layer)
            else:        # Activation Layer
                activations.append(layer)
        return linears, activations

In [81]:
from cbp import CAdam as oCAdam

In [84]:
data = torch.rand(200,200,requires_grad=True).cuda()
target = torch.rand(200,200).cuda()

In [85]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = Adam([param])
opt.zero_grad()
loss.backward()
opt.step()
orig_res = param.data.clone()

In [86]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = oCAdam([param])
opt.zero_grad()
loss.backward()
opt.step()
ocadam_res = param.data.clone()

In [87]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = CAdam([param])
opt.zero_grad()
loss.backward()
opt.step()
cadam_res = param.data.clone()

In [93]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = Adam([param])
opt.zero_grad()
loss.backward()

In [94]:
%%timeit
opt.step()

28 µs ± 1.12 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [95]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = oCAdam([param])
opt.zero_grad()
loss.backward()

In [96]:
%%timeit
opt.step()

28 µs ± 2.12 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [97]:
param = torch.nn.Parameter(data)
loss = (data - target).sum()

opt = CAdam([param])
opt.zero_grad()
loss.backward()

In [98]:
%%timeit
opt.step()

26.4 µs ± 677 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [88]:
torch.allclose(orig_res, ocadam_res)

True

In [89]:
torch.allclose(orig_res, cadam_res)

True

In [49]:
beta = 0.9

In [50]:
%%timeit
1 - beta**3

262 ns ± 98.7 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [51]:
step = torch.full((50,50), 3).cuda()

In [52]:
%%timeit
1 - beta**step

123 µs ± 3.93 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [53]:
t_beta = torch.tensor(beta, device='cuda')

In [59]:
%%timeit
1 - t_beta**step

27.8 µs ± 482 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [61]:
prev = t_beta**(step-1)

In [62]:
%%timeit
1 - t_beta * prev

26.2 µs ± 631 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [101]:
one = torch.tensor(1, device='cuda')

In [106]:
%%timeit
one - t_beta**step

24.8 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [69]:
@torch.jit.script
def calcs(param, grad, exp_avg, exp_avg_sq, step, beta1: float, beta2: float, lr: float, eps: float, weight_decay: float):
    step.add_(1)
    if weight_decay != 0:
        grad = grad.add(param, alpha=weight_decay)
        
    exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
    exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2)
    bias_correction1 = (beta1 ** step).neg_().add_(1)
    bias_correction2 = (beta2 ** step).neg_().add_(1)
    step_size = lr / bias_correction1
    bias_correction2_sqrt = bias_correction2.sqrt_()
    denom = (exp_avg_sq.sqrt() / bias_correction2_sqrt).add_(eps)
    return (exp_avg / denom).mul_(step_size)

In [68]:
data = torch.rand(100,100, device='cuda')
param = torch.nn.Parameter(data, requires_grad=False)
grad = torch.rand_like(data, device='cuda')
exp_avg = torch.rand_like(data, device='cuda')
exp_avg_sq = torch.rand_like(data, device='cuda')
step = torch.full_like(data, 3, device='cuda')
beta1 = 0.9
beta2 = 0.999
lr = 0.0001
eps = 1e-8
weight_decay = 0

In [73]:
%%timeit
_cadam_calcs(param, grad, exp_avg, exp_avg_sq, step, beta1, beta2, lr, eps, weight_decay)

44.9 µs ± 629 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [70]:
%%timeit
calcs(param, grad, exp_avg, exp_avg_sq, step, beta1, beta2, lr, eps, weight_decay)

134 µs ± 1.79 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


207 µs ± 84.3 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [74]:
torch.allclose(_cadam_calcs(param, grad, exp_avg, exp_avg_sq, step, beta1, beta2, lr, eps, weight_decay), calcs(param, grad, exp_avg, exp_avg_sq, step, beta1, beta2, lr, eps, weight_decay))

True

In [234]:
ppo = PPO(CPPO_Policy, env, seed=42, **settings)
ppo.set_logger(DummyLogger())

In [235]:
ppo.rollout_buffer = buff

In [236]:
%%time
ppo.train()

CPU times: total: 23.9 s
Wall time: 23.9 s


CPU times: total: 23.6 s
Wall time: 23.6 s


In [377]:
(39-23.6)/39

0.39487179487179486

CPU times: total: 26.4 s
Wall time: 26.4 s


In [310]:
+hook jit script

CPU times: total: 26.7 s
Wall time: 26.9 s


In [304]:
(39-25)/39

0.358974358974359

In [246]:
jit cadam

CPU times: total: 27.5 s
Wall time: 27.6 s


In [247]:
(39-27)/39

0.3076923076923077

In [87]:
%%timeit
with torch.no_grad():
    pr = ppo.policy.optimizer.linear_layers[0][0].weight.abs().sum(1).detach_().add_(1e-4)

50.8 µs ± 721 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [92]:
pr = ppo.policy.optimizer.linear_layers[0][0].weight.abs().sum(1).detach_().add_(1e-4)

In [86]:
%%timeit
with torch.no_grad():
    po = ppo.policy.optimizer.linear_layers[0][1].weight.abs().sum(0).detach_()

36.7 µs ± 1.43 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [93]:
po = ppo.policy.optimizer.linear_layers[0][1].weight.abs().sum(0).detach_()

In [89]:
cbp_vals = ppo.policy.optimizer.cbp_vals[ppo.policy.optimizer.linear_layers[0][0]]
cbp_u = deepcopy(cbp_vals['u'])

In [90]:
cbp_vals['u'] = cbp_u

In [94]:
%%timeit
with torch.no_grad():
    y = (cbp_vals['h'] - cbp_vals['fhat']).abs_().mul_(po).div_(pr)

33.8 µs ± 1.54 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [95]:
y = (cbp_vals['h'] - cbp_vals['fhat']).abs_().mul_(po).div_(pr)

In [96]:
%%timeit cbp_vals['u'] = cbp_u
with torch.no_grad():
    cbp_vals['u'].mul_(1e-4).add_((1-1e-4)*y)

33.5 µs ± 274 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [97]:
150 * 4

600

In [65]:
ppo

CPU times: total: 16.7 s
Wall time: 16.8 s


In [61]:
cppo start

CPU times: total: 39 s
Wall time: 39.2 s


In [50]:
cProfile.run('', './speed/cppo_train_stats')

In [51]:
cppo_stats = pstats.Stats('./speed/cppo_train_stats')

In [98]:
cppo_stats.print_stats('step')

Sat Nov 26 10:20:37 2022    ./speed/cppo_train_stats

         5066660 function calls (4813187 primitive calls) in 42.098 seconds

   Random listing order was used
   List reduced from 194 to 2 due to restriction <'step'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2560    2.064    0.001   23.391    0.009 E:\JKU Bachelor\Practical Work - Continual Backprop\Continual-Backprop\cbp.py:51(step)
     2560    0.100    0.000   17.671    0.007 E:\JKU Bachelor\Practical Work - Continual Backprop\Continual-Backprop\cadam.py:12(step)




<pstats.Stats at 0x21bae6216a0>

In [99]:
ppo_stats.print_stats('step')

Sat Nov 26 09:48:46 2022    ./speed/ppo_stats

         5969307 function calls (5692247 primitive calls) in 41.532 seconds

   Random listing order was used
   List reduced from 693 to 16 due to restriction <'step'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     4136    0.021    0.000   11.392    0.003 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\base_vec_env.py:154(step)
       40    0.000    0.000    0.000    0.000 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py:38(step_async)
       40    0.000    0.000    0.049    0.001 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py:41(step_wait)
     2560    0.095    0.000    3.815    0.001 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\torch\optim\adam.py:105(step)
16384/4096    0.028    0.000    0.036    0.000 C:\Users\chris\anaconda3\envs\cbp\lib\site-packages\st

<pstats.Stats at 0x21b072be640>

In [40]:
import torch

In [41]:
alpha = torch.tensor(0.5)

In [48]:
def add(a):
    return torch.zeros(5, 5).add_(torch.ones(5, 5), alpha=a)

In [49]:
add(alpha)

tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000]])

In [58]:
@torch.jit.script
def add(a):
    return torch.zeros(5, 5).add_(torch.ones(5, 5), alpha=a)

In [59]:
add(alpha)

RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
  File "C:\Users\chris\AppData\Local\Temp\ipykernel_14680\1274348814.py", line 3, in add
@torch.jit.script
def add(a):
    return torch.zeros(5, 5).add_(torch.ones(5, 5), alpha=a)
           ~~~~~~~~~~~~~~~~~~~~~ <--- HERE
RuntimeError: Cannot input a tensor of dimension other than 0 as a scalar argument


In [54]:
@torch.jit.script
def add(a):
    return torch.zeros(5, 5).add(torch.ones(5, 5), alpha=a)

In [55]:
add(alpha)

tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000]])

In [52]:
@torch.jit.script
def add(a: float):
    return torch.zeros(5, 5).add_(torch.ones(5, 5), alpha=a)

In [53]:
add(alpha)

tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.5000, 0.5000]])