### Step into correct dir

In [1]:
%cd jku.wad

C:\Users\Benjamin\Documents\GitHub\DRL_DOOM\jku.wad


### Imports

In [2]:
from typing import Dict, Sequence, Tuple
import os
import torch
from collections import deque, OrderedDict
import collections
from copy import deepcopy
import random
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image
from datetime import datetime

from gym import Env
from torch import nn
from einops import rearrange

from doom_arena import VizdoomMPEnv
from doom_arena.reward import VizDoomReward
from doom_arena.render import render_episode
from IPython.display import HTML
from vizdoom import ScreenFormat
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [3]:
class LecturerReward(VizDoomReward):
    def __init__(self, num_players: int):
        super().__init__(num_players)

    def __call__(
        self,
        vizdoom_reward: float,
        game_var: Dict[str, float],
        game_var_old: Dict[str, float],
        player_id: int,
    ) -> Tuple[float, float, float]:
        """
        Custom reward function used by both training and evaluation.
        *  +100  for every new frag
        *  +2    for every hit landed
        *  -0.1  for every hit taken
        """
        self._step += 1
        _ = vizdoom_reward, player_id  # unused

        rwd_hit        =  2.0  * (game_var["HITCOUNT"]   - game_var_old["HITCOUNT"])
        rwd_hit_taken  = -0.1  * (game_var["HITS_TAKEN"] - game_var_old["HITS_TAKEN"])
        rwd_frag       = 100.0 * (game_var["FRAGCOUNT"]  - game_var_old["FRAGCOUNT"])

        return rwd_hit, rwd_hit_taken, rwd_frag

## Environment Config

In [4]:
USE_GRAYSCALE = False # ← flip to False for RGB

PLAYER_CONFIG = {
    "algo_type": "QVALUE",
    "n_stack_frames": 1,
    "extra_state": ["depth"],
    "hud": "none",
    "crosshair": True,
    "screen_format": 8 if USE_GRAYSCALE else 0
}

In [5]:
# TODO: environment training paramters
N_STACK_FRAMES = 1
NUM_BOTS = 4
EPISODE_TIMEOUT = 1000

#SCREEN_WIDTH = 120
#SCREEN_HEIGHT = 90
SCREEN_CHANNELS = 3 if not USE_GRAYSCALE else 1
EXTRA_STATE = ["depth"]
HUD = "none"
CROSSHAIR = True
ACTION_SPACE = 7
#OBSERVATION_SPACE = (SCREEN_CHANNELS * N_STACK_FRAMES, SCREEN_HEIGHT, SCREEN_WIDTH)

# TODO: model hyperparams
GAMMA = 0.997
# EPISODES = 100
EPISODES = 2000
BATCH_SIZE = 64
REPLAY_BUFFER_SIZE = 30000
LEARNING_RATE = 3e-4
EPSILON_START = 1.0
EPSILON_END = 0.1
EPSILON_DECAY = 0.999
N_EPOCHS = 1

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [7]:
DTYPE = torch.float32

env = VizdoomMPEnv(
        num_players=1,
        num_bots=4,
        bot_skill=0,
        doom_map="ROOM",
        episode_timeout=2000,
        screen_format=PLAYER_CONFIG["screen_format"],
        n_stack_frames=PLAYER_CONFIG["n_stack_frames"],
        extra_state=EXTRA_STATE,
        hud=PLAYER_CONFIG["hud"],
        crosshair=PLAYER_CONFIG["crosshair"],
        seed=PLAYER_CONFIG,
    )

Host 52287
Player 52287


## Trying to restore the good model and save

### Utility to load onnx and save with new config

In [73]:
#model_time = curr_time
import onnx
import onnx2pytorch as o2t
model_time = "2025_06_14 11_07_42"
#model_path = f"model_{model_time}.onnx"
model_path = "migrated_model_for_eval_2025_06_14 11_07_40.onnx"
#model_path = "restored_model_2025_06_14 11_07_40.onnx"

loaded_model = onnx.load(model_path)

loaded_torch_model = o2t.ConvertModel(loaded_model)

In [8]:
# ================================================================
# DQN — design your network here
# ================================================================

class DQN(nn.Module):
    """
    Deep-Q Network template.

    Expected behaviour
    ------------------
    forward(frame)      # frame: (B, C, H, W)  →  Q-values: (B, num_actions)

    What to add / change
    --------------------
    • Replace the two `NotImplementedError` lines.
    • Build an encoder (Conv2d / Conv3d) + a head (MLP or duelling).
    • Feel free to use residual blocks from `agents/utils.py` or any design you like.
    """

    def __init__(self, input_dim: int, action_space: int, hidden: int = 128):
        super().__init__()

        # -------- TODO: define your layers ------------------------
        # Example (very small) baseline — delete or improve:
        #
        self.encoder = nn.Sequential(
            nn.Conv2d(input_dim, 32, 8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2),       nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1),       nn.ReLU(),
        )
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 12 * 12, hidden), nn.ReLU(),
            nn.Linear(hidden, action_space),
        )
        # -----------------------------------------------------------

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, frame: torch.Tensor) -> torch.Tensor:
        # -------- TODO: implement forward -------------------------
        x = self.encoder(frame)
        x = self.head(x)
        return x
        # -----------------------------------------------------------


In [9]:
# ================================================================
# Initialise your networks and training utilities
# ================================================================
load = False

# main Q-network
in_channels = env.observation_space.shape[0]   # 1 if grayscale, else 3/4
model = DQN(
    input_dim    = in_channels,
    action_space = env.action_space.n,
    hidden       = 64,   # change or ignore
).to(device, dtype=DTYPE)
if load:
    model.load_state_dict(torch.load("save_curr_model_2025_06_13 23_34_15.pt"))
# TODO ------------------------------------------------------------
# 1. Create a target network (hard-copy or EMA)
# 2. Choose an optimiser + learning-rate schedule
# 3. Instantiate a replay buffer and set the initial epsilon value
#
# Hints:
model_tgt  = deepcopy(model).to(device)
# previosly trained with model_tgt parameters
optimizer  = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler  = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=GAMMA)
replay_buffer = collections.deque(maxlen=REPLAY_BUFFER_SIZE)
#replay_buffer = PrioritizedReplayBuffer(REPLAY_BUFFER_SIZE)
# ---------------------------------------------------------------


In [108]:
# ---------------------------------------------------------------
# 🧭 model save
# ---------------------------------------------------------------

import onnx
import json


def onnx_dump(env, model, config, filename: str):
    # dummy state
    init_state = env.reset()[0].unsqueeze(0)


    # Export to ONNX
    torch.onnx.export(
        model.cpu(),
        args=init_state,
        f=filename,
        export_params=True,
        opset_version=11,
        do_constant_folding=True,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
    )
    onnx_model = onnx.load(filename)

    meta = onnx_model.metadata_props.add()
    meta.key = "config"
    meta.value = json.dumps(config)

    onnx.save(onnx_model, filename)

In [10]:
model_time = "2025_06_14 11_07_40"
restored_model = DQN(
    input_dim    = in_channels,
    action_space = env.action_space.n,
    hidden       = 64,   # change or ignore
).to(device, dtype=DTYPE)

restored_model.load_state_dict(torch.load(f"save_curr_model_{model_time}.pt"))


<All keys matched successfully>

In [68]:
onnx_dump(env, restored_model, PLAYER_CONFIG, filename=f"restored_model_{model_time}.onnx")

In [39]:
class DQN_NEW(nn.Module):
    """
    Deep-Q Network with fixes for ONNX compatibility
    """
    def __init__(self, input_dim: int, action_space: int, hidden: int = 128):
        super().__init__()
        self.conv1 = nn.Conv2d(input_dim, 32, 8, stride=4)
        self.ac1 = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.ac2 = nn.ReLU()
        self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
        self.ac3 = nn.ReLU()

        self.fc1 = nn.Linear(64 * 12 * 12, hidden)
        self.ac4 = nn.ReLU()
        self.fc2 = nn.Linear(hidden, action_space)

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, frame: torch.Tensor) -> torch.Tensor:
        x = self.ac1(self.conv1(frame))
        x = self.ac2(self.conv2(x))
        x = self.ac3(self.conv3(x))
        x = torch.flatten(x, 1)
        x = self.ac4(self.fc1(x))
        x = self.fc2(x)
        return x

### Convert Model to model with no bias

In [70]:
import torch
import torch.nn as nn
from collections import OrderedDict
import onnx
import json
# Make sure your utility functions like onnx_dump and get_timestamp are available
# and the environment 'env' is initialized.

model_time = "2025_06_14 11_07_40"

# --- PATHS TO YOUR FILES ---
OLD_STATE_DICT_PATH = f"save_curr_model_{model_time}.pt" # <--- YOUR .pt FILE
NEW_ONNX_PATH = f"migrated_model_for_eval_{model_time}.onnx"          # <--- Desired output ONNX file name

# 1. Instantiate the NEW, corrected model structure
model = DQN_NEW(
    input_dim    = in_channels,
    action_space = env.action_space.n,
    hidden=64
)

new_state_dict = model.state_dict()

In [71]:

# 2. Load the weights from your old trained model
old_state_dict = torch.load(OLD_STATE_DICT_PATH)

# 3. Define the mapping from old Sequential names to new named layers
#    Old model names were like 'encoder.0', 'head.1', etc.
#    New model names are like 'conv1', 'fc1', etc.
key_mapping = {
    'encoder.0.weight': 'conv1.weight',
    'encoder.0.bias':   'conv1.bias',
    'encoder.2.weight': 'conv2.weight',  # Skip 'encoder.1' (ReLU)
    'encoder.2.bias':   'conv2.bias',
    'encoder.4.weight': 'conv3.weight',  # Skip 'encoder.3' (ReLU)
    'encoder.4.bias':   'conv3.bias',
    'head.1.weight':    'fc1.weight',    # Skip 'head.0' (Flatten)
    'head.1.bias':      'fc1.bias',
    'head.3.weight':    'fc2.weight',    # Skip 'head.2' (ReLU)
    'head.3.bias':      'fc2.bias',
}

# 4. Create a new state dictionary with the correct keys
new_state_dict = OrderedDict()
for old_key, value in old_state_dict.items():
    if old_key in key_mapping:
        new_key = key_mapping[old_key]
        new_state_dict[new_key] = value
    else:
        # This part is a safety check. If there are unexpected keys, we'll know.
        print(f"Warning: Key '{old_key}' not found in mapping. Skipping.")

# 5. Load the newly constructed state_dict into the model
try:
    model.load_state_dict(new_state_dict)
    print("Successfully loaded weights into the new model structure!")
except RuntimeError as e:
    print("Error loading state dict. This might be due to a mismatch in layer shapes.")
    print("This can happen if your old model was trained with the incorrect linear layer size (e.g., 64*12*12).")
    print(f"Error details: {e}")
    # If you see this error, you likely cannot migrate the weights for the linear layers
    # and will need to retrain.

# 6. Set the model to evaluation mode
model.eval()

# 7. Export the newly loaded and corrected model to ONNX for the evaluation script
#    Make sure the 'env' and 'PLAYER_CONFIG' variables are defined as in your training script.
print(f"Exporting corrected model to {NEW_ONNX_PATH}...")
onnx_dump(env, model, PLAYER_CONFIG, filename=NEW_ONNX_PATH)
print("Export complete. You can now use this ONNX file for evaluation.")

Successfully loaded weights into the new model structure!
Exporting corrected model to migrated_model_for_eval_2025_06_14 11_07_40.onnx...
Export complete. You can now use this ONNX file for evaluation.


In [66]:
def onnx_dump(env, model, config, filename: str):
    # dummy state
    init_state = env.reset()[0].unsqueeze(0)


    # Export to ONNX
    torch.onnx.export(
        model.cpu(),
        args=init_state,
        f=filename,
        #export_params=True,
        opset_version=20,
        #do_constant_folding=True,
        #input_names=["input"],
        #output_names=["output"],
        #dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
    )
    onnx_model = onnx.load(filename)

    meta = onnx_model.metadata_props.add()
    meta.key = "config"
    meta.value = json.dumps(config)

    onnx.save(onnx_model, filename)

In [53]:
init_state = env.reset()[0].unsqueeze(0)

In [61]:
import enum
from torch.distributions import Categorical



################################ eval notebook start ###################################
class ObsBuffer(enum.Enum):
    LABELS = "labels"
    DEPTH = "depth"
    AUTOMAP = "automap"


DTYPE = torch.float32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Seed random number generators
torch.backends.cudnn.deterministic = True
if os.path.exists("seed.rnd"):
    with open("seed.rnd", "r") as f:
        seed = int(f.readline().strip())
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
else:
    seed = 1337


def make_env(seed, config) -> VizdoomMPEnv:
    extra_state = None
    if config["extra_state"] is not None:
        extra_state = []
        for c in config["extra_state"]:
            if c in {e.value for e in ObsBuffer}:
                extra_state.append(ObsBuffer(c))

    env = VizdoomMPEnv(
        num_players=1,
        num_bots=4,
        bot_skill=0,
        doom_map="ROOM",
        episode_timeout=2000,
        screen_format=config["screen_format"],
        n_stack_frames=config["n_stack_frames"],
        extra_state=extra_state,
        hud=config["hud"],
        crosshair=config["crosshair"],
        seed=seed,
    )
    return env


class Agent:
    def __init__(self, model, config, device):
        self.model = model
        self.config = config
        self.device = device

    def select_action(self, frames):
        frames = frames.unsqueeze(0).to(DEVICE, dtype=DTYPE)
        logits = self.model(frames)
        if isinstance(logits, tuple):
            logits, _ = logits
        if "algo_type" not in self.config or self.config["algo_type"] == "POLICY":
            act = Categorical(logits=logits).sample()
        else:
            act = logits.argmax(-1)
        return act.cpu().numpy()[0]


def run_episode(agent: Agent, seed: int = 1337):
    env = make_env(seed, config=agent.config)
    obs = env.reset()
    score = 0
    done = False
    while not done:
        obs = obs[0]
        with torch.no_grad():
            action = agent.select_action(obs)
        obs, rwd, done, _ = env.step(action)
        score += rwd[0]
    env.close()
    return score


# Run evaluation
config = PLAYER_CONFIG
#config["extra_state"] = "depth"
n_episodes = 10
model = restored_model
model.eval()
model = model.to(DEVICE, dtype=DTYPE)
agent = Agent(model, config, DEVICE)

# Evaluate model
scores = []
for i in range(n_episodes):
    if seed is not None:
        seed = np.random.randint(1e7)
    scores.append(run_episode(agent, seed=seed))

# Print result
print("\n\n\n")
print(np.mean(scores))

Host 51863
Player 51863


KeyboardInterrupt: 