In this notebook you can train an expert through Behavioural Cloning for both [FrankaKitchen](https://robotics.farama.org/envs/franka_kitchen/franka_kitchen/) and [reacher](https://gymnasium.farama.org/environments/mujoco/reacher/) environments. You should set:
### Training Hyperparameters
- `seed`: For reproducibility of training runs
- `batch_size`: Number of samples state-action per batch
- `lr`: Learning rate for the optimizer
- `num_epochs`: Number of training epochs

### Environment
- `env_mode`: Select in which environment to train the Expert (`"kitchen"` or `"reacher"`)
- `reacher_dataset_type`: Quality of teacher demonstrations to use for Behavioural Cloning if Reacher is the selected environment (`"expert"` or `"medium"`)
- `filter`: If True, Reacher episodes with average reward below -0.1 will be removed



In [None]:
#@title Parameters and Import

import shutil
shutil.rmtree('DAgger4Robotics', ignore_errors=True)
!pip install gymnasium-robotics -q #Needed for Simulator
!pip install pyvirtualdisplay imageio -q
!git clone "https://github.com/cybernetic-m/DAgger4Robotics.git"
!pip install "minari[all]" -q

import minari # needed for dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch
import numpy as np
import matplotlib.pyplot as plt

from DAgger4Robotics.model.NetworkInterface import NetworkInterface
from DAgger4Robotics.dataset.myDatasetClass import myDatasetClass
from DAgger4Robotics.training.train import train
from DAgger4Robotics.test.test import test
from DAgger4Robotics.utils.preprocess_dataset import preprocess_dataset
from DAgger4Robotics.simulator.Simulator import Simulator


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Reproducibility instructions
seed=42 #@param {type:"integer"}
np.random.seed(seed)                   # NumPy
torch.manual_seed(seed)                # PyTorch CPU
torch.cuda.manual_seed(seed)           # PyTorch GPU
torch.cuda.manual_seed_all(seed)       # All GPUs

torch.backends.cudnn.deterministic = True   # Deterministic behaviour
torch.backends.cudnn.benchmark = False      # Avoid non-deterministic optimizations

# Hyperparameters for the training
batch_size = 64 # @param {"type":"integer"}
lr = 1e-3  #@param {type:"number"}    # Trial: 1e-3, 1e-2, 1e-4, 1e-1
num_epochs = 50 #@param {type:"integer"}

# Selected environment
env_mode = 'kitchen' #@param ["kitchen","reacher"]

# Dimensions of the problem
if env_mode == 'reacher':
  state_dim = 10
  action_dim = 2
elif env_mode == 'kitchen':
  state_dim = 20
  action_dim = 9
else:
  raise ValueError('Invalid environment name. Choose between ["reacher","kitchen"]')

# Type of Franka-kitchen dataset from which we will extract approximately the first 50–60 steps (that contains only the microwave task)
kitchen_dataset_type= "complete"

# Choose the type of teacher demonstrations to load and use for behavioural cloning: performed by a "medium" or "expert" policy
reacher_dataset_type = "expert" #@param ["expert","medium"]

# Filter that eliminates Reacher episodes with mean reward lower than -0.1
filter = False #@param {type:"boolean"}

### Dataset loading and preprocession
- Downloads the appropriate dataset (with optional filtering for Reacher)
- Splits it into train, validation, and test sets with a 0.7/0.2/0.1 ratio
- Loads the splits using PyTorch `DataLoader`.

In [None]:
if env_mode == 'kitchen':
  dataset = minari.load_dataset(f'D4RL/kitchen/{kitchen_dataset_type}-v2',download=True)
  dataset.set_seed(seed)  # Set a seed for reproducibility
  print("Dataset loaded successfully!")
  print(f'Total Episodes: {dataset.total_episodes}')

elif env_mode == 'reacher':
  dataset = minari.load_dataset(f'mujoco/reacher/{reacher_dataset_type}-v0',download=True)
  dataset.set_seed(seed)  # Set a seed for reproducibility
  print("Dataset loaded successfully!")
  print(f'Total Episodes: {dataset.total_episodes}')

  # Filter the dataset taking only episodes with mean reward greater then -0.1 (the mean reward is approximately between [-0.2, 0])
  if filter:
    filtered_dataset = dataset.filter_episodes(lambda episode: episode.rewards.mean() > -0.1)
    print(f'Total Episodes filtered dataset: {filtered_dataset.total_episodes}')

# Split the dataset into training, evaluation and test sets with percentage sizes 0.7, 0.2, 0.1
if filter:
  dataset_split = minari.split_dataset(filtered_dataset, sizes=[round(0.7*filtered_dataset.total_episodes), round(0.2*filtered_dataset.total_episodes), round(0.1*filtered_dataset.total_episodes)], seed=seed)
else:
  dataset_split = minari.split_dataset(dataset, sizes=[round(0.7*dataset.total_episodes), round(0.2*dataset.total_episodes), round(0.1*dataset.total_episodes)], seed=seed)

# Taking training, test and val splits
training_dataset = dataset_split[0]
validation_dataset = dataset_split[1]
test_dataset = dataset_split[2]
print(f"Training episodes: {len(training_dataset)}")
print(f"Validation episodes: {len(validation_dataset)}")
print(f"Test episodes: {len(test_dataset)}")

if env_mode=='kitchen':
  training_dataset = preprocess_dataset(training_dataset)
  validation_dataset = preprocess_dataset(validation_dataset)
  test_dataset = preprocess_dataset(test_dataset)


train_dataset_class= myDatasetClass(training_dataset, env_mode)
print(f"Training number of (state,action) pairs: {len(train_dataset_class)}")
train_loader=DataLoader(dataset=train_dataset_class, batch_size=batch_size, shuffle=True)
test_dataset_class= myDatasetClass(test_dataset, env_mode)
print(f"Test number of (state,action) pairs: {len(test_dataset_class)}")
test_loader=DataLoader(dataset=test_dataset_class, batch_size=batch_size, shuffle=True)
validation_dataset_class= myDatasetClass(validation_dataset, env_mode)
print(f"Validation number of (state,action) pairs: {len(validation_dataset_class)}")
val_loader=DataLoader(dataset=validation_dataset_class, batch_size=batch_size, shuffle=True)



### Train process
- Select the type of network to train
- Run the train loop
- The model with best loss on validation set will be saved

In [10]:
#Select the network to be used for the teacher/expert. It can be 'simple' or 'deep'
expert_type='deep' #@param ["simple","deep"]

In [None]:
net_wrapper = NetworkInterface(net_type=expert_type,input_dim=state_dim,output_dim=action_dim)
net_wrapper.summary()
model = net_wrapper.get_model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()

train(
    train_loader = train_loader,
    val_loader = val_loader,
    model = model,
    optimizer = optimizer,
    loss_fn = loss_fn,
    num_epochs = num_epochs,
    device = device
)

### Test process
- Select the newly trained network
- Evaluate its performances on the test set

In [None]:
model_path = '/content/expert_policy.pt'

net_wrapper = NetworkInterface(net_type=expert_type,input_dim=state_dim,output_dim=action_dim)
model = net_wrapper.get_model().to(device)
loss_fn = nn.MSELoss()
model.load_state_dict(torch.load(model_path,map_location=device))
# Testing the new teacher/expert on the test dataset
test(
    model = model,
    test_dataloader = test_loader,
    loss_fn = loss_fn,
    device = device
)

### Simulation
Use this section to run rollouts in the environment using the newly trained network. You can customize the simulation with the following parameters:
- `render`: If True, displays on screen the rollouts. **Requires a GPU**
- `framerate_per_episode` : Controls how frequently frames are rendered. Only frames where `(frame_idx % framerate_per_episode == 0)` are shown
- `video_saving`: If True, saves the video of the episodes in `./new_videos` folder. In **Kitchen** environment, this critically increase computational time
- `n_episodes`: Number of episodes to simulate
- `robot_noise`: Magnitude of noise added to the robot’s proprioceptive variables (only for the **Kitchen** environment)

At the end of all the Rollouts, a mean_rewards.json file will be saved, and it will contain:
- The mean rewards for each episode
- `mean_of_means`: The overall mean reward across all episodes

### Using a Saved model

If you want to simulate one of our pretrained models, you can load it manually by changing:
- `env_type`: Specifies which environment the model is intended for (`"reacher"` or `"kitchen"`)
- `path_to_model`: Insert here the complete path to the model provided by `Dagger4Robotics` folder
- `net_type`: The architecture type of the loaded model (`"simple"` or `"deep"`)

In [14]:
#@title Parameters of the experiments { run: "auto" }
render = True #@param {type:"boolean"}
framerate_per_episode=5  #@param {type:"integer"}
video_saving = False #@param {type:"boolean"}
n_episodes = 1 #@param {type:"integer"}
robot_noise=0.1 #@param {type:"number"}
env_type = env_mode

path_to_model='/content/expert_policy.pt'
net_type=expert_type



In [None]:
sim = Simulator(
        env_mode=env_mode,
        net_type=net_type,
        path_to_model=path_to_model,
        n_episodes=n_episodes,
        render=render,
        framerate_per_episode=framerate_per_episode,
        video_saving=video_saving,
        robot_noise=robot_noise, #Only useful for Franka-Kitchen
        device=device
    )
sim.run()