In [None]:
import nasim
import numpy as np
import random
import torch
import itertools
import pandas as pd

from nasim.agents.dqn_agent import DQNAgent

def set_global_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

scenario_path = "/home/kali/NetworkAttackSimulator/Experiments/medium_with_honeypot.yaml"

param_grid = {
    "lr":        [0.001, 0.0005, 0.01],
    "gamma":     [0.95, 0.99],
    "hidden_sizes": [[64,64], [128,128],[256,256], [512,512]],
    "batch_size": [32, 64,128],
    "training_steps": [20000, 50000],
    "final_epsilon": [0.05, 0.01,0.1],
    "exploration_steps": [10000, 20000]
}

param_names = list(param_grid.keys())
param_combos = list(itertools.product(*[param_grid[k] for k in param_names]))

results = []

for i, combo in enumerate(param_combos):
    params = dict(zip(param_names, combo))
    print(f"\n======== Test {i+1}/{len(param_combos)}: {params} ========")

    set_global_seed(42 + i) 
    
    env = nasim.load(
        scenario_path,
        fully_obs=True,
        flat_actions=True,
        flat_obs=True,
        render_mode=None
    )
    
    agent = DQNAgent(
        env,
        verbose=False,
        seed=42 + i,
        **params
    )
    agent.train()
    
    eval_ret, eval_steps, eval_goal = agent.run_eval_episode(env, render=False, eval_epsilon=0.01, render_mode=None)
    print(f"Reward: {eval_ret} | Steps: {eval_steps} | Goal reached: {eval_goal}")
    
    result_row = params.copy()
    result_row["reward"] = eval_ret
    result_row["steps"] = eval_steps
    result_row["goal"] = eval_goal
    results.append(result_row)
    
    # Salva modello
    # agent.save(f"dqn_tuning_run{i}.pt")

df = pd.DataFrame(results)
df.to_csv("dqn_tuning_results.csv", index=False)
print("\nTuning completato. Risultati salvati in dqn_tuning_results.csv")
