In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from rl.agents import DQNAgent
import gym
from gym import spaces
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from rl.agents import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor

In [2]:
data = pd.read_excel("../Data/State_MDC_Aggregation_2021.xlsx", engine='openpyxl')
label_encoders = {}
categorical_cols = ['state', 'MDC', 'Fachabteilung', 'Sekundär']
for col in categorical_cols:
    le = LabelEncoder()
    data[col + '_encoded'] = le.fit_transform(data[col].astype(str))
    label_encoders[col] = le  # Save encoders for later use

# Normalize numerical columns (e.g., 'total_patients')
scaler = StandardScaler()
data['total_patients_normalized'] = scaler.fit_transform(data[['total_patients']])

In [3]:
class ReshapeProcessor(Processor):
    def process_state_batch(self, state_batch):
        # Remove the extra dimension: (batch, 1, 2) → (batch, 2)
        return np.squeeze(state_batch, axis=1)
    
class HospitalEnv(gym.Env):
    def __init__(self, data):
        super(HospitalEnv, self).__init__()
        self.data = data
        self.state_size = 2  # patients, state
        self.action_space = spaces.Discrete(27)  # 27 MDCs
        self.observation_space = spaces.Box(
            low=np.array([0, 0]), 
            high=np.array([1, 1]),
            dtype=np.float32
        )
        
    def _get_state(self):
        # Extract ONLY the two features we need
        out = self.current_patient[['total_patients_normalized', 'state_encoded']].values.flatten()
        return out
    
    def reset(self):
        # Randomly select a patient case
        self.current_patient = self.data.sample(1)
        return self._get_state()
    
    def step(self, action):
        # Simulate reward (e.g., correct department = +1, wrong = -1)
        correct_mdc = self.current_patient['MDC_encoded'].values[0]
        reward = 1 if action == correct_mdc else -1
        done = True  # One-step episode
        return self.reset(), reward, done, {}



In [4]:
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
model = Sequential([
    Dense(24, activation='relu', input_shape=(2,)),
    Dense(24, activation='relu'),
    Dense(27, activation='linear')  # 27 actions (MDCs)
])
# Add the processor to your agent
agent = DQNAgent(
    model=model,
    policy=policy,
    memory=memory,
    nb_actions=27,
    processor=ReshapeProcessor()  # Fixes shape issues
)

agent.compile(optimizer='adam')

# Train the agent
env = HospitalEnv(data)
agent.fit(env, nb_steps=10000, visualize=False, verbose=1)

Training for 10000 steps ...
Interval 1 (0 steps performed)
   40/10000 [..............................] - ETA: 40s - reward: -0.9500

  updates=self.state_updates,


 1001/10000 [==>...........................] - ETA: 32s - reward: -0.9101

  updates=self.state_updates,


 1234/10000 [==>...........................] - ETA: 54s - reward: -0.9157done, took 7.731 seconds


<keras.callbacks.History at 0x1c64bc1ffa0>

In [None]:
agent.save_weights('dqn_hospital_weights.h5f', overwrite=True)

# Test the agent
test_obs = env.reset()
action = agent.forward(test_obs)
print(f"Recommended Department: {label_encoders['MDC'].inverse_transform([action])}")

State shape: (2,)
State values: [-0.27857806 14.        ]




In [None]:
env = HospitalEnv(data)
state = env.reset()
print("State shape:", state.shape)  # Should be (2,)
print("State values:", state)