### 1. Import dependencies

In [None]:
!pip install tensorflow==2.7.0
!pip install gym
!pip install keras
!pip install keras-rl2

In [None]:
from BinPackingEnvironment1D import BinPacking

### 2. Create environment

In [None]:
env = BinPacking(num_bins=10, capacity=20, min_item_size=1)

In [None]:
env.action_space.sample()

In [None]:
env.observation_space.sample()

### 3. Run baseline test (No ML)

In [None]:
MAX_STEPS = 1000
episodes = 10

for episode in range(1, episodes+1):
    state = env.reset()
    steps = 0
    done = False
    score = 0
    
    while not done and steps < MAX_STEPS:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
        steps += 1

    print('Episode:{} Score:{}'.format(episode,score))
    print(env.logs)
    env.logs = { 'placed':0, 'misplaced':0, 'discarded':0 }

### 4. Train an RL Model

In [None]:
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Will throw an error if these don't exist
log_path = os.path.join('Training', 'Logs')

In [None]:
env = BinPacking(num_bins=10, capacity=20, min_item_size=1)

In [None]:
print(env.logs)

In [None]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=20000)

### 5. Save model

In [None]:
#PPO_Path = os.path.join('Training', 'Saved Models', 'Constant_PPO_Model_Discard_Penalty')

In [None]:
#model.save(PPO_Path)

### 6. Load model

In [None]:
#model = PPO.load(PPO_Path, env=env)

### 7. Test model

In [None]:
MAX_STEPS = 1000
episodes = 10

for episode in range(1, episodes+1):
    obs = env.reset()
    steps = 0
    done = False
    score = 0
    
    while not done and steps < MAX_STEPS:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        steps += 1

    print('Episode:{} Score:{}'.format(episode,score))
    print(env.logs)
    env.logs = { 'placed':0, 'misplaced':0, 'discarded':0 }