# Day 1

## MLAgent + Non-LM

### 튜토리얼 소개

#### 튜토리얼의 목적

### 기본 환경 세팅 - github 참조

```bash
conda create -name rl_tune python=3.9.18
conda activate rl_tune
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
pip install mlagents==0.30
pip install protobuf==3.20
pip install gymnasium==0.29.1
pip install mujoco==3.1.0
pip install jupyter
pip install transformers==4.34.1
```

### 모듈 불러오기
- 간단하게 불러온 함수의 기능에 대해 설명


In [None]:
from __future__ import print_function

from utils.setting.env_settings import analyze_env
from utils.init import set_seed
import torch

set_seed()
ngpu = 2 # 본인의 gpu 개수에 맞게!
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

### 학습 환경 설정
- 예제로 사용할 MLAgent 환경이 뭔지, 특징이 뭔지(state, action size)
- mlagent 링크 첨부

In [None]:
env_config, rl_params = analyze_env(env_name = "HalfCheetah-v4")

### 네트워크 모델 클래스 선언

여기서 각 네트워크의 특성에 대해 언급 
- 왜 actor, critic에는 ResMLP를 쓰고
- rev_env에는 MLP를 쓰는지.

In [None]:
from torch import nn

class ResBlock(nn.Module):
    def __init__(self, hidden_dim, dropout):
        super(ResBlock, self).__init__()
        self.linear1 = nn.Linear(hidden_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        residual = x
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out += residual
        out = self.relu(out)
        out = self.dropout(out)  
        return out

class ResMLP(nn.Module):
    def __init__(self, num_layer, d_model, dropout = 0.0):
        hidden_dim, num_blocks = d_model, num_layer
        super(ResMLP, self).__init__()
        self.layers = nn.Sequential(
            *(ResBlock(hidden_dim, dropout=dropout) for _ in range(num_blocks))
        )
    
    def forward(self, x, mask = None):
        out = self.layers(x)
        return out
    
class MLP(nn.Module):
    def create_deep_modules(self, layers_size, dropout = 0.0):
        deep_modules = []
        for in_size, out_size in zip(layers_size[:-1], layers_size[1:]):
            deep_modules.append(nn.Linear(in_size, out_size))
            deep_modules.append(nn.ReLU())
            deep_modules.append(nn.Dropout(dropout))
        return nn.Sequential(*deep_modules)

    def __init__(self, num_layer, d_model, dropout = 0.0):
        super(MLP, self).__init__()   
        self.deep = self.create_deep_modules([d_model] + [int(d_model) for i in range(num_layer)], dropout)
                
    def forward(self, x, mask = None):
        x = self.deep(x)
        return x

### 훈련 설정

In [None]:
from rl_tune import RLTune

rl_params.network.actor_network = ResMLP
rl_params.network.critic_network = ResMLP
rl_params.network.rev_env_network = MLP

with RLTune(env_config, rl_params, device, use_graphics = False, use_print = True) as rl_tune:
    rl_tune.train(resume_training = False)
