In [73]:
import pandas as pd
import numpy as np
import torch
from sklearn.metrics.pairwise import euclidean_distances
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import datetime

# ---------------------------
# 0. 랜덤 시드 고정
# ---------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

# ---------------------------
# 1. 데이터 로드 및 전처리
# ---------------------------
employees = pd.read_csv("./data/rotation_employees.csv")
branches = pd.read_csv("./data/rotation_branches.csv")

eligible_employees = employees[employees['years_at_branch'] > 3].reset_index(drop=True)
non_eligible_employees = employees[employees['years_at_branch'] <= 3].copy()
non_eligible_employees['new_branch'] = non_eligible_employees['current_branch']
non_eligible_employees['reassigned'] = False

branch_names = branches['branch_name'].tolist()
ranks = sorted(employees['rank'].unique().tolist())  # 직급 정렬

def calculate_distance_matrix(emp_df, branch_df):
    home_coords = emp_df[['home_x', 'home_y']].values
    branch_coords = branch_df[['branch_x', 'branch_y']].values
    return euclidean_distances(home_coords, branch_coords)

distance_matrix = calculate_distance_matrix(eligible_employees, branches)

# ---------------------------
# 2. 강화학습 환경 정의
# ---------------------------
class RotationEnv(gym.Env):
    def __init__(self, emp_df, branch_df, distance_matrix, ranks):
        super(RotationEnv, self).__init__()
        self.emp_df = emp_df
        self.branch_df = branch_df
        self.distance_matrix = distance_matrix
        self.ranks = ranks

        self.n_emp = len(emp_df)
        self.n_branches = len(branch_df)
        self.max_branch_capacity = 12
        self.min_branch_capacity = 8

        self.branch_counts = np.zeros(self.n_branches, dtype=int)
        self.branch_rank_counts = {rank: np.zeros(self.n_branches, dtype=int) for rank in ranks}
        self.current_idx = 0

        self.action_space = spaces.Discrete(self.n_branches)
        self.observation_space = spaces.Box(
            low=0, high=1,
            shape=(2 + self.n_branches + len(ranks) * self.n_branches,),
            dtype=np.float32
        )

    def seed(self, seed=None):
        np.random.seed(seed)
        return [seed]

    def reset(self):
        self.current_idx = 0
        self.branch_counts[:] = 0
        self.branch_rank_counts = {rank: np.zeros(self.n_branches, dtype=int) for rank in self.ranks}
        return self._get_obs()

    def _get_obs(self):
        emp = self.emp_df.iloc[self.current_idx]
        home_xy = np.array([emp['home_x'] / 100.0, emp['home_y'] / 100.0])

        capacity_ratio = self.branch_counts / self.max_branch_capacity
        rank_ratios = []
        for rank in self.ranks:
            ratio = self.branch_rank_counts[rank] / (self.max_branch_capacity / len(self.ranks))
            rank_ratios.extend(ratio)

        return np.concatenate([home_xy, capacity_ratio, rank_ratios], axis=0).astype(np.float32)

    def _calculate_reward(self, action, emp, rank):
        distance = self.distance_matrix[self.current_idx][action] / 100
        projected_count = self.branch_counts[action] + 1

        over_penalty = max(0, projected_count - self.max_branch_capacity)
        under_penalty = max(0, self.min_branch_capacity - projected_count)

        ideal_rank_count = projected_count / len(self.ranks)
        projected_rank_count = self.branch_rank_counts[rank][action] + 1
        rank_deviation = abs(projected_rank_count - ideal_rank_count)

        projected_rank_counts = {
            r: (self.branch_rank_counts[r][action] + (1 if r == rank else 0)) for r in self.ranks
        }
        missing_rank_penalty = sum(1 for r in self.ranks if projected_rank_counts[r] == 0)

        reward = - 1.0 * distance \
                 - 3.0 * over_penalty \
                 - 3.0 * under_penalty \
                 - 3.0 * rank_deviation \
                 - 3.0 * missing_rank_penalty

        return reward

    def step(self, action):
        emp = self.emp_df.iloc[self.current_idx]
        rank = emp['rank']
        reward = self._calculate_reward(action, emp, rank)

        self.branch_counts[action] += 1
        self.branch_rank_counts[rank][action] += 1
        self.current_idx += 1

        done = self.current_idx >= self.n_emp
        obs = self._get_obs() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)
        return obs, reward, done, {}

# ---------------------------
# 3. PPO 학습 수행
# ---------------------------
env = RotationEnv(eligible_employees, branches, distance_matrix, ranks)
env.seed(SEED)

policy_kwargs = dict(
    activation_fn=torch.nn.SiLU,
    net_arch=[dict(pi=[256, 128], vf=[32])]
    # net_arch=[dict(pi=[128, 128], vf=[128, 128])]
)

model = PPO("MlpPolicy", env, verbose=1, seed=SEED,
            policy_kwargs=policy_kwargs,
            ent_coef=0.1,
            learning_rate=0.0003,
            clip_range=0.1,
            vf_coef = 0.3,
            tensorboard_log="./logs/rotation_tensorboard")

model.learn(total_timesteps=200000, log_interval=10)

# ---------------------------
# 4. 예측 및 저장
# ---------------------------
env_eval = RotationEnv(eligible_employees, branches, distance_matrix, ranks)
assignments = []
obs = env_eval.reset()
done = False

while not done:
    action, _ = model.predict(obs)
    assignments.append(int(action))
    obs, _, done, _ = env_eval.step(action)

eligible_employees['new_branch'] = [branch_names[a] for a in assignments]
eligible_employees['reassigned'] = True

final_df = pd.concat([eligible_employees, non_eligible_employees], ignore_index=True)
final_df.sort_values(by='employee_id', inplace=True)

filename = f"./data/rotation_assignments_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
final_df.to_csv(filename, index=False)

print(f"✅ 최종 발령표 저장 완료: {filename}")

#지점별 인원배분 점검
branch_rank_counts = final_df.groupby(['new_branch', 'rank']).size().unstack(fill_value=0)
branch_rank_counts

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./logs/rotation_tensorboard\PPO_52




-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 30            |
|    ep_rew_mean          | -689          |
| time/                   |               |
|    fps                  | 1026          |
|    iterations           | 10            |
|    time_elapsed         | 19            |
|    total_timesteps      | 20480         |
| train/                  |               |
|    approx_kl            | 2.8593466e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.3          |
|    explained_variance   | -0.618        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.04e+04      |
|    n_updates            | 90            |
|    policy_gradient_loss | -0.000287     |
|    value_loss           | 4.58e+04      |
-------------------------------------------
-----------------------------------------
| rollout/                |       

rank,과장,대리,사원
new_branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Branch_0,8,9,8
Branch_1,2,2,0
Branch_2,2,2,3
Branch_3,4,3,3
Branch_4,2,3,2
Branch_5,3,3,0
Branch_6,2,3,2
Branch_7,2,2,2
Branch_8,8,5,6
Branch_9,4,2,3


In [63]:
branch_rank_counts = final_df.groupby(['new_branch', 'rank']).size().unstack(fill_value=0)
branch_rank_counts

rank,과장,대리,사원
new_branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Branch_0,5,3,3
Branch_1,3,5,0
Branch_2,2,4,4
Branch_3,4,3,3
Branch_4,3,3,5
Branch_5,3,3,1
Branch_6,3,4,2
Branch_7,4,2,3
Branch_8,5,2,4
Branch_9,5,5,4
