# Install dependencies

In [None]:
!pip install --ignore-requires-python mlagents==1.0.0

In [None]:
!pip install stable-baselines3

In [None]:
!pip show stable-baselines3

In [None]:
!pip install shimmy>=0.2.1

In [None]:
!pip install gdown

## Download Unity enviroment

Here you need to download your Unity environment assembly using an open-read archive from Google Drive or by directly uploading the files to Kaggle. Or, you can use my link before I delete these files :)

In [None]:
!gdown "17k-hP53qKr2MIOr3y9BOUn_02cr_F_k6"

In [None]:
!unzip -o /kaggle/working/spyder_walker_linux.zip -d /kaggle/working

## Check mlagents Gym enviroment

In [None]:
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper

In [None]:
import numpy as np

In [None]:
env_path = "/kaggle/working/spyder_walker_linux/AI Spyder.x86_64"
!chmod -R 755 "/kaggle/working/spyder_walker_linux/AI Spyder.x86_64"

In [None]:
def get_env():
    unity_env = UnityEnvironment(env_path, no_graphics=True)
    env = UnityToGymWrapper(unity_env)
    return env

In [None]:
env = get_env()
print("Action space: ", env.action_space)
print("State space: ", env.observation_space)
env.reset()
for i in range(5):
    state, reward, done, _ = env.step(np.ones(12))
    print(reward, " ", done)
env.close()

## Learning with StableBaselines3

In [None]:
from stable_baselines3 import SAC
from stable_baselines3.common.logger import configure

In [None]:
env = get_env()

tmp_path = "/kaggle/working/"
logger = configure(tmp_path, ["csv"])

model = SAC("MlpPolicy", env, verbose=1, gamma=0.85)
model.set_logger(logger)
model.learn(total_timesteps=200000, log_interval=1)
model.save("sac_model_gamma_85.zip")
env.close()

## Results visualization

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("progress.csv")

In [None]:
t = data["time/total_timesteps"]
r = data["rollout/ep_rew_mean"]

In [None]:
plt.plot(t, r, label="SAC")
plt.xlabel("step", fontsize=12)
plt.ylabel("mean reward", fontsize=12)
plt.legend()

## Convert to ONNX

In [None]:
!pip install onnxruntime

In [None]:
from typing import Tuple, Any
import torch as th
from torch.nn import Parameter
import onnxruntime as ort

In [None]:
CONTINUOUS_ACTIONS_SIZE = 12

In [None]:
class OnnxablePolicy(th.nn.Module):
    def __init__(self, actor: th.nn.Module):
        super().__init__()
        self.actor = actor
        self.version_number = Parameter(th.Tensor([3]), requires_grad=False)
        self.memory_size = Parameter(th.Tensor([0]), requires_grad=False)
        self.continuous_action_output_shape = Parameter(th.Tensor([CONTINUOUS_ACTIONS_SIZE]), requires_grad=False)

    def forward(self, observation: th.Tensor) -> tuple[Any, Parameter, Parameter, Parameter]:
        return self.actor(observation,
                          deterministic=True), self.continuous_action_output_shape, self.version_number, self.memory_size


def convert_to_onnx(model_path, output_path):
    model = SAC.load(model_path, device="cpu")
    onnxable_model = OnnxablePolicy(model.policy.actor)

    observation_size = model.observation_space.shape
    dummy_input = th.randn(1, *observation_size)
    th.onnx.export(
        onnxable_model,
        dummy_input,
        output_path,
        opset_version=17,
        input_names=["obs_0"],
        output_names=["continuous_actions", "continuous_action_output_shape", "version_number", "memory_size"],
    )

    observation = np.zeros((1, *observation_size)).astype(np.float32)
    ort_sess = ort.InferenceSession(output_path)
    scaled_action = ort_sess.run(None, {"obs_0": observation})[0]

    print(scaled_action)

    with th.no_grad():
        print(model.actor(th.as_tensor(observation), deterministic=True))

In [None]:
convert_to_onnx("/kaggle/working/sac_model_gamma_85.zip", "/kaggle/working/sac_model_gamma_85.onnx")