In [40]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from rl.agents import DQNAgent
import gym
from gym import spaces
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from rl.agents import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from keras.callbacks import TensorBoard
# from rl.callbacks import Callback
import os
import time

In [2]:
years_files = {
    # 2021: "../Data/merged_2021.xlsx",
    2022: "../Data/merged_2022.xlsx",
    2023: "../Data/merged_2023.xlsx"
}
dfs = []

for year, file_path in years_files.items():
    df = pd.read_excel(file_path, engine='openpyxl')
    df['Year'] = year
    dfs.append(df)
    
data = pd.concat(dfs, ignore_index=True)
pop_data = pd.read_excel('../Data/state_level_population_projection_wide_xgboost.xlsx')
pop_data_melted = pop_data.melt(
    id_vars=['Variant', 'Variant Description', 'Year', 'Age'],
    value_vars=['Baden-Württemberg', 'Bayern', 'Berlin', 'Brandenburg', 'Bremen',
               'Hamburg', 'Hessen', 'Mecklenburg-Vorpommern', 'Niedersachsen',
               'Nordrhein-Westfalen', 'Rheinland-Pfalz', 'Saarland', 'Sachsen',
               'Sachsen-Anhalt', 'Schleswig-Holstein', 'Thüringen'],
    var_name='state',
    value_name='population'
)
merged_data_population = pd.merge(
    data,
    pop_data_melted,
    left_on=['state', 'Year'],
    right_on=['state', 'Year'],
    how='left'
)

In [3]:
label_encoders = {}
categorical_cols = ['state', 'MDC', 'Age']
for col in categorical_cols:
    le = LabelEncoder()
    merged_data_population[col + '_encoded'] = le.fit_transform(merged_data_population[col].astype(str))
    label_encoders[col] = le  # Save encoders for later use
# Normalize numerical columns (e.g., 'total_patients')
scaler = StandardScaler()
numerical_cols = merged_data_population.columns.difference(['state', 'MDC','Variant','Variant Description','Fachabteilung', 'Sekundär','state_encoded','MDC_encoded','Land_x','Land_y'])
numerical_cols = [str(col) for col in numerical_cols]
merged_data_population[numerical_cols] = scaler.fit_transform(merged_data_population[numerical_cols])

  merged_data_population[col + '_encoded'] = le.fit_transform(merged_data_population[col].astype(str))
  merged_data_population[col + '_encoded'] = le.fit_transform(merged_data_population[col].astype(str))
  merged_data_population[col + '_encoded'] = le.fit_transform(merged_data_population[col].astype(str))


In [4]:
merged_data_population[merged_data_population.select_dtypes(include='number').columns] = merged_data_population.select_dtypes(include='number').fillna(0)
merged_data_population[merged_data_population.select_dtypes(include='object').columns] = merged_data_population.select_dtypes(include='object').fillna("unknown")

In [5]:
state_columns = []
state_columns.extend(numerical_cols)
state_columns.extend(['state_encoded','total_patients'])
len(state_columns)

200

In [32]:
from mdc_mapping import mdc_to_fachabteilung
class ReshapeProcessor(Processor):
    def process_state_batch(self, state_batch):
        # Remove the extra dimension: (batch, 1, 2) → (batch, 2)
        return np.squeeze(state_batch, axis=1)
    
class HospitalEnv(gym.Env):
    def __init__(self, data):
        super(HospitalEnv, self).__init__()
        self.data = data
        self.num_beds = 6
        self.mdc_to_fachabteilung = mdc_to_fachabteilung
        self.state_size = 200  # patients, state
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.state_size,), dtype=np.float32)
        self.mdc_list = list(mdc_to_fachabteilung.keys())
        self.action_space = spaces.Discrete(len(self.mdc_list) * self.num_beds)  # n_depts * beds allowed to re allocate
        self.mdc_to_index = {mdc: idx for idx, mdc in enumerate(self.mdc_list)}
        self.index_to_mdc = {idx: mdc for mdc, idx in self.mdc_to_index.items()}
        self.dept_columns = [col for col in data.columns if col.startswith(('0'))]
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(len(state_columns),), dtype=np.float32
        )
        self.label_encoders = label_encoders
        
        
    def decode_action(self, action):
        mdc = action // self.num_beds
        beds = action % self.num_beds
        return mdc, beds
    
    def _add_beds(self):
        """Adds 10 beds to the most overcrowded department."""
        utilization = self.current_patient[self.dept_columns] / self.current_patient[self.dept_columns].max()
        worst_dept = self.dept_columns[np.argmax(utilization)]
        self.current_patient[worst_dept] += 10
        self.current_patient['INSG_x'] += 10
        
    def _get_state(self):
        # Extract ONLY the two features we need
        # print(self.current_patient[numerical_cols])
        out = self.current_patient[state_columns].values.flatten()
        
        return out
    
    def _calculate_reward(self, old_state,action):

        mdc_index, _ = self.decode_action(action)
        mdc_label = self.label_encoders["MDC"].inverse_transform([mdc_index])[0]
        mapped_depts = mdc_to_fachabteilung.get(mdc_label, {})
        relevant_depts = mapped_depts.get("Fachabteilung", []) + mapped_depts.get("Sekundär", [])
        
        current_patients = self.current_patient['total_patients'].values[0]
        bed_values = self.current_patient[self.dept_columns].values.flatten()
        
        bed_ratios = bed_values / (current_patients + 1e-6)
        # Penalty for overcrowding (patients > beds)
        overcrowding_penalty = np.sum(np.where(bed_ratios < 1, 1 - bed_ratios, 0))

        balance_score = (np.mean(bed_ratios)) / (np.std(bed_ratios) + 1e-6)
        balance_reward = np.clip(balance_score, 0, 10) # Avoid division by zero
        # print(balance_reward)
        
        target_depts = set(relevant_depts)
        present_depts = set(self.dept_columns)
        matching_depts = target_depts & present_depts
        is_diagnostic_correct = False
        if matching_depts:
            is_diagnostic_correct = True
            
        # correct_mdc = self.current_patient['MDC_encoded'].values[0]
        diagnostic_reward = 1 if is_diagnostic_correct else -5
        total_reward = (
        0.7 * balance_reward +    
        0.3 * diagnostic_reward -           
        overcrowding_penalty      # Penalty (unweighted, absolute)
        ) 
        return float(total_reward)
    
    def reset(self):
        # Randomly select a patient case
        self.current_patient = self.data.sample(1)
        return self._get_state()
    
    def _reallocate_beds(self, mdc_index, beds):
        mdc_label = self.label_encoders["MDC"].inverse_transform([mdc_index])[0]
        mapping = mdc_to_fachabteilung.get(mdc_label, {})
        fach = mapping.get("Fachabteilung", [])
        sek = mapping.get("Sekundär", [])
        target_depts = list(set(fach + sek))
        present_depts = [d for d in target_depts if d in self.dept_columns]

        if len(present_depts) < 2 or beds == 0:
            return  

        util = self.current_patient[present_depts].values.flatten()
        from_dept = present_depts[np.argmax(util)]
        to_dept = present_depts[np.argmin(util)]

        self.current_patient[from_dept] = max(0, self.current_patient[from_dept] - beds)
        self.current_patient[to_dept] += beds



    
    def step(self, action):
        old_state = self._get_state()
        mdc_index, beds_to_move = self.decode_action(action)
        self._reallocate_beds(mdc_index, beds_to_move)
        reward = self._calculate_reward(old_state, action)
        done = True  
        return self.reset(), reward, done, {}
    

In [41]:
log_dir = "logs/fit/" + time.strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(
    log_dir=log_dir,
    histogram_freq=0,
    write_graph=False,
)

In [None]:
from logger import log_metrics

In [45]:
from rl.callbacks import Callback
import tensorflow as tf
class CustomTensorBoardCallback(Callback):
    def __init__(self, log_dir="./logs/"):
        self.writer = tf.summary.create_file_writer(log_dir)
        self.step = 0

    def on_step_end(self, step, logs={}):
        self.step += 1
        with self.writer.as_default():
            if 'reward' in logs:
                tf.summary.scalar('reward', logs['reward'], step=self.step)
            if 'loss' in logs:
                tf.summary.scalar('loss', logs['loss'], step=self.step)
            self.writer.flush()


In [None]:
import warnings
mdc_list = list(mdc_to_fachabteilung.keys())
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
model = Sequential([
    Dense(1024, activation='relu', input_shape=(200,)),
    Dense(512, activation='relu'),
    Dense(len(mdc_list) * 6, activation='linear')  #Number of distinct departments * number_beds
])
        
# Add the processor to your agent
agent = DQNAgent(
    model=model,
    policy=policy,
    memory=memory,
    nb_steps_warmup=1000,
    nb_actions=len(mdc_list) * 6,
    batch_size=64,
    enable_double_dqn=True,
    processor=ReshapeProcessor()  # Fixes shape issues
)

agent.compile(optimizer='adam')

# Train the agent
env = HospitalEnv(merged_data_population)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    agent.fit(env, nb_steps=10000, visualize=False, callbacks=[CustomTensorBoardCallback()])

Training for 10000 steps ...
Interval 1 (0 steps performed)
 1443/10000 [===>..........................] - ETA: 1:04:16 - reward: -81.3280

In [None]:
env = HospitalEnv(data)
state = env.reset()
print("State shape:", state.shape)  # Should be (2,)
print("State values:", state)

In [None]:
scores = agent.test(env, nb_episodes=100, visualize=False)
print("Average reward:", np.mean(scores.history['episode_reward']))

In [46]:
test_obs = env.reset()
action = agent.forward(test_obs)
print(f"Recommended Department: {label_encoders['MDC'].inverse_transform([action])}")

Recommended Department: ['-1 – Fehler-DRGs und sonstige DRGs']


In [22]:
print(action)

0


In [44]:
!tensorboard --logdir=./logs

^C
