In [1]:
# Imports
# Gymnasium imports
import gymnasium as gym 
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 

import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout

# Import helpers
import numpy as np
import pandas as pd
import random
import os
import json
import matplotlib.pyplot as plt
import seaborn as sns

from collections import deque

# Import stable baselines
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

from Environment.env_v7 import *
from Functions.visualization_functions import *

In [None]:
# Configuration of the network
with open('./Config/network_config_v0.json') as file:
    network_config = file.read()

In [None]:
EP_LENGTH = 52  # Length of the episode

env = SS_Mngmt_Env(
    network_config=network_config,
    EP_LENGTH=EP_LENGTH,
    render_mode="human",
    stockout_cost=1000,
    order_cost=5,
    item_cost=0.1,
    stock_cost=0.5,
    item_prize=20,
    order_quantities=[0, 15, 50],
    demand_mean=10,
    demand_std=2,
    demand_noise=0,
    demand_noise_std=2,
)

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info, _ = env.step(action)
        score += reward

        env.render()

In [None]:
log_path = os.path.join('Training', 'Test', 'Logs')
model_path = os.path.join('Training', 'Models', 'Test', 'PPO')

In [None]:
dummy_env = DummyVecEnv([lambda: env])

model = PPO("MlpPolicy", 
            dummy_env,
            learning_rate = 0.0007,
            gamma = 0.99,
            verbose=0,
            tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=500_000)

In [None]:


evaluate_policy(model, dummy_env, n_eval_episodes=1, render=True, return_episode_rewards=True)

In [None]:
file = './Data/2024-11-06_21_last_environment_data.csv'
df = pd.read_csv(file)

In [None]:
plot_data(df)