In [1]:
import torch

from zeroshotrl.rl_agents.ppo.ppo_end_to_end_relu_stack_align import Agent, FeatureExtractor, Policy

In [2]:
# random values for the policy
enc = FeatureExtractor(use_relative=False,
        pretrained=False, anchors_alpha=0.99, anchors_alpha_min=0.01, anchors_alpha_max=0.999, device="cpu")
data = torch.randn(1, 4, 3, 84, 84)
hid = enc(data)
num_actions = 4
pol = Policy(num_actions=num_actions, stack_n = 4)

In [3]:
agent = Agent(enc, pol, translation=None, num_envs=1, num_stack=4)

In [4]:
# Check if any parameter in the model requires gradients
def check_requires_grad(model):
    return any(param.requires_grad for param in model.parameters())

# Print whether the policy has requires_grad=True
print("Policy requires grad:", check_requires_grad(pol))

# Print whether the feature extractor has requires_grad=True
print("Feature extractor requires grad:", check_requires_grad(enc))

# Print whether the agent has requires_grad=True
print("Agent requires grad:", check_requires_grad(agent))

# Print whether encoder is in training mode
print("Encoder is in training mode:", enc.training)

# Print whether policy is in training mode
print("Policy is in training mode:", pol.training)

# Print whether agent is in training mode
print("Agent is in training mode:", agent.training)

Policy requires grad: True
Feature extractor requires grad: True
Agent requires grad: True
Encoder is in training mode: True
Policy is in training mode: True
Agent is in training mode: True


In [5]:
# set enc and pol to eval mode and not require gradients
enc.train()
pol.train()
for param in enc.parameters():
    param.requires_grad = False
for param in pol.parameters():
    param.requires_grad = True

# Check if any parameter in the model requires gradients
def check_requires_grad(model):
    return any(param.requires_grad for param in model.parameters())

# Print whether the policy has requires_grad=True
print("Policy requires grad:", check_requires_grad(pol))

# Print whether the feature extractor has requires_grad=True
print("Feature extractor requires grad:", check_requires_grad(enc))

# Print whether the agent has requires_grad=True
print("Agent requires grad:", check_requires_grad(agent))

# Print whether encoder is in training mode
print("Encoder is in training mode:", enc.training)

# Print whether policy is in training mode
print("Policy is in training mode:", pol.training)

# Print whether agent is in training mode
print("Agent is in training mode:", agent.training)

Policy requires grad: True
Feature extractor requires grad: False
Agent requires grad: True
Encoder is in training mode: True
Policy is in training mode: True
Agent is in training mode: True


In [10]:
import copy

# Assuming agent, encoder1, and policy2 are already instantiated

# Create deep copies of the modules
eval_enc = copy.deepcopy(enc)
eval_pol = copy.deepcopy(pol)

agent_eval = Agent(eval_enc, eval_pol, translation=None, num_envs=1, num_stack=4)
agent_eval.eval()

# Print whether the policy has requires_grad=True
print("Policy requires grad:", check_requires_grad(agent_eval.eval_enc))

# Print whether the feature extractor has requires_grad=True
print("Feature extractor requires grad:", check_requires_grad(agent_eval.eval_enc))

# Print whether the agent has requires_grad=True
print("Agent requires grad:", check_requires_grad(agent_eval.agent))

# Print whether encoder is in training mode
print("Encoder is in training mode:", agent_eval.eval_enc.training)

# Print whether policy is in training mode
print("Policy is in training mode:", agent_eval.eval_pol.training)

# Print whether agent is in training mode
print("Agent is in training mode:", agent_eval.training)

AttributeError: 'Agent' object has no attribute 'eval_enc'

In [7]:
# Check if any parameter in the model requires gradients
def check_requires_grad(model):
    return any(param.requires_grad for param in model.parameters())

# Print whether the policy has requires_grad=True
print("Policy requires grad:", check_requires_grad(pol))

# Print whether the feature extractor has requires_grad=True
print("Feature extractor requires grad:", check_requires_grad(enc))

# Print whether the agent has requires_grad=True
print("Agent requires grad:", check_requires_grad(agent))

# Print whether encoder is in training mode
print("Encoder is in training mode:", enc.training)

# Print whether policy is in training mode
print("Policy is in training mode:", pol.training)

# Print whether agent is in training mode
print("Agent is in training mode:", agent.training)

# Print whether agent is in training mode
print("Eval Agent is in training mode:", agent_eval.training)

Policy requires grad: True
Feature extractor requires grad: False
Agent requires grad: True
Encoder is in training mode: True
Policy is in training mode: True
Agent is in training mode: True
Eval Agent is in training mode: False


In [8]:
agent_eval.load_state_dict(agent.state_dict())


# Print whether the policy has requires_grad=True
print("Policy requires grad:", check_requires_grad(pol))

# Print whether the feature extractor has requires_grad=True
print("Feature extractor requires grad:", check_requires_grad(enc))

# Print whether the agent has requires_grad=True
print("Agent requires grad:", check_requires_grad(agent))

# Print whether encoder is in training mode
print("Encoder is in training mode:", enc.training)

# Print whether policy is in training mode
print("Policy is in training mode:", pol.training)

# Print whether agent is in training mode
print("Agent is in training mode:", agent.training)

# Print whether agent is in training mode
print("Eval Agent is in training mode:", agent_eval.training)

Policy requires grad: True
Feature extractor requires grad: False
Agent requires grad: True
Encoder is in training mode: True
Policy is in training mode: True
Agent is in training mode: True
Eval Agent is in training mode: False
