<a href="https://colab.research.google.com/github/fatasfaps/duelingddqn-for-sepsis/blob/main/constructing_MDP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
from matplotlib.lines import Line2D
from matplotlib.colors import Normalize
from sklearn.model_selection import train_test_split
import os
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_dir = '/content/drive/MyDrive/sepsis-mdp'

In [None]:
# Import data
sepsis_cohort = pd.read_csv('/content/drive/MyDrive/mimic3/SEPSIS-COHORT-FINAL-DF.csv')
discretized_action = sepsis_cohort.copy()

max_vaso = 'max_dose_vaso'
iv_fluids = 'input_total'
N_BINS = 5
LABELS = np.arange(N_BINS)

In [None]:
# ----------------------------------------------------------------------
## VASOPRESSOR QUARTILES CALCULATION
# ----------------------------------------------------------------------
# 1. Isolasi data dosis Vasopressor yang positif
vaso_positive = discretized_action[discretized_action[max_vaso] > 0][max_vaso]

# 2. Hitung 4 Kuartil (25%, 50%, 75%, 100%) dari data positif
q_vaso_threshold = vaso_positive.quantile(np.linspace(0.25, 1.0, N_BINS - 1))

# 3. Definisikan Bins
# Bins: [min_value, 0.0] + [Q1_positive, Q2_positive, Q3_positive, Q4_positive_max]
# Kita menggunakan min() dari seluruh kolom untuk batas bawah yang aman.
vaso_min = discretized_action[max_vaso].min()
vaso_bins = [vaso_min - 0.0001, 0] + list(q_vaso_threshold.values)

print("--- Rentang Kuartil Max Dose Vasopressor ---")
print(f"Kuartil (dari dosis > 0): \n{q_vaso_threshold}")
print(f"\nBatas Aksi (Bins) Vaso (Total 5 Bins):")
print(f"Aksi 0 (No Vaso): ({vaso_min}, 0.0]")
print(f"Aksi 1: ({vaso_bins[1]:.4f}, {vaso_bins[2]:.4f}] (Q1)")
print(f"Aksi 2: ({vaso_bins[2]:.4f}, {vaso_bins[3]:.4f}] (Q2)")
print(f"Aksi 3: ({vaso_bins[3]:.4f}, {vaso_bins[4]:.4f}] (Q3)")
print(f"Aksi 4: ({vaso_bins[4]:.4f}, {vaso_bins[5]:.4f}] (Q4/Max)")

VASO_BINS_FINAL = vaso_bins

--- Rentang Kuartil Max Dose Vasopressor ---
Kuartil (dari dosis > 0): 
0.25      0.080
0.50      0.200
0.75      0.450
1.00    175.325
Name: max_dose_vaso, dtype: float64

Batas Aksi (Bins) Vaso (Total 5 Bins):
Aksi 0 (No Vaso): (0.0, 0.0]
Aksi 1: (0.0000, 0.0800] (Q1)
Aksi 2: (0.0800, 0.2000] (Q2)
Aksi 3: (0.2000, 0.4500] (Q3)
Aksi 4: (0.4500, 175.3250] (Q4/Max)


In [None]:
# ----------------------------------------------------------------------
## IV FLUIDS QUARTILES CALCULATION (Using Raw Positive Data to Match Vaso Format)
# ----------------------------------------------------------------------
# 0. Imputasi Nilai Negatif menjadi Nol
fluid_data_imputed = discretized_action[iv_fluids].copy()
fluid_data_imputed[fluid_data_imputed < 0] = 0.0

# Store the imputed min for printout (which is 0.0)
fluid_min_imputed = fluid_data_imputed.min()

# 1. Isolasi data Input Cairan yang positif (non-nol)
fluid_positive = fluid_data_imputed[fluid_data_imputed > 0]

# 2. Hitung Batas Bins Positif (Q1, Q2, Q3, dan Q4/Max)
# Kita hitung 4 batas (25%, 50%, 75%, 100%) dari data positif
q_fluid_quartile_q1_q4 = fluid_positive.quantile(np.linspace(0.25, 1.0, N_BINS - 1))

# Ambil nilai Q3 dan Q4
batas_kuartil_tertinggi_q3 = q_fluid_quartile_q1_q4.iloc[-2]
batas_kuartil_tertinggi_q4_max_raw = q_fluid_quartile_q1_q4.iloc[-1]

# 3. Definisikan Bins
fluid_min_teknis = -0.01

# Batas Q4 (100%) sekarang adalah Max dari data positif
fluid_bins = [
    fluid_min_teknis,
    0.0,
    list(q_fluid_quartile_q1_q4.values)[0],
    list(q_fluid_quartile_q1_q4.values)[1],
    list(q_fluid_quartile_q1_q4.values)[2],
    # Batas atas harus Max Absolut data imputed + epsilon
    fluid_data_imputed.max() + 0.01
]

print("\n--- Rentang Kuartil Input Total Cairan (Data Mentah) ---")
# Print kuartil (Q1-Q4) dari data mentah
print(f"Kuartil (dari dosis > 0): \n{q_fluid_quartile_q1_q4}")
print(f"\nBatas Aksi (Bins) Cairan (Total 5 Bins):")
# Aksi 0: Mengikuti format (0.0, 0.0]
print(f"Aksi 0 (No iv_fluids): ({fluid_min_imputed:.2f}, {fluid_bins[1]:.2f}]")
# Aksi 1: (0.00, Q1]
print(f"Aksi 1: ({fluid_bins[1]:.4f}, {fluid_bins[2]:.4f}] (Q1)")
# Aksi 2: (Q1, Q2]
print(f"Aksi 2: ({fluid_bins[2]:.4f}, {fluid_bins[3]:.4f}] (Q2)")
# Aksi 3: (Q2, Q3]
print(f"Aksi 3: ({fluid_bins[3]:.4f}, {fluid_bins[4]:.4f}] (Q3)")
# Aksi 4: (Q3, Max Raw]
# Batas atas printout menggunakan Max Raw (batas 100% kuartil) untuk kesesuaian format
print(f"Aksi 4: ({fluid_bins[4]:.4f}, {batas_kuartil_tertinggi_q4_max_raw:.4f}] (Q4/Max)")

FLUID_BINS_FINAL = fluid_bins


--- Rentang Kuartil Input Total Cairan (Data Mentah) ---
Kuartil (dari dosis > 0): 
0.25     1112.733
0.50     2956.749
0.75     6009.850
1.00    84879.000
Name: input_total, dtype: float64

Batas Aksi (Bins) Cairan (Total 5 Bins):
Aksi 0 (No iv_fluids): (0.00, 0.00]
Aksi 1: (0.0000, 1112.7330] (Q1)
Aksi 2: (1112.7330, 2956.7490] (Q2)
Aksi 3: (2956.7490, 6009.8500] (Q3)
Aksi 4: (6009.8500, 84879.0000] (Q4/Max)


In [None]:
# ----------------------------------------------------------------------
# LANGKAH KOREKTIF: Imputasi NaN pada kolom aksi menjadi 0.0
# ----------------------------------------------------------------------
nan_count_vaso = discretized_action[max_vaso].isna().sum()
nan_count_fluid = discretized_action[iv_fluids].isna().sum()

if nan_count_vaso > 0 or nan_count_fluid > 0:
    discretized_action[max_vaso].fillna(0.0, inplace=True)
    discretized_action[iv_fluids].fillna(0.0, inplace=True)
    print(f"{nan_count_vaso} NaN di Vasopressor dan {nan_count_fluid} NaN di Cairan IV diimputasi menjadi 0.0.")

# ----------------------------------------------------------------------
#  MENYESUAIKAN BATAS BINS
# ----------------------------------------------------------------------
EPSILON = 0.0001

# 1. Tentukan batas atas baru dari data yang sudah diimputasi
Vaso_Max_Absolut = discretized_action[max_vaso].max()
Fluid_Max_Absolut = discretized_action[iv_fluids].max()

# 2. Buat list bins baru yang terjamin mencakup Max Absolut
# Ambil batas Q0 hingga Q3 dari list bins lama (5 elemen pertama)
VASO_BINS_BASE = VASO_BINS_FINAL[:-1]
FLUID_BINS_BASE = FLUID_BINS_FINAL[:-1]

# Tambahkan batas Max Absolut + Epsilon sebagai batas ke-6
VASO_BINS_FIXED = VASO_BINS_BASE + [Vaso_Max_Absolut + EPSILON]
FLUID_BINS_FIXED = FLUID_BINS_BASE + [Fluid_Max_Absolut + EPSILON]

# ----------------------------------------------------------------------
# DISKRETISASI AKSI
# ----------------------------------------------------------------------
# --- 1. Vasopressor Diskrit (Aksi: 0-4) ---
# Biarkan hasilnya bertipe Categorical untuk sementara, mungkin mengandung NaN
action_vaso_cut = pd.cut(
    discretized_action[max_vaso],
    bins=VASO_BINS_FIXED,
    labels=LABELS,
    right=True,
    include_lowest=True
)

# Konversi Categorical ke Float, ganti NaN dengan Aksi 4, lalu konversi ke Int
# NaN di sini berarti nilai input berada di luar batas BINS. Kita asumsikan ini Aksi 4.
discretized_action['action_vaso_discrete'] = action_vaso_cut.astype(float).fillna(4).astype(np.int64)

# --- 2. IV Fluids Diskrit (Aksi: 0-4) ---
action_fluid_cut = pd.cut(
    discretized_action[iv_fluids],
    bins=FLUID_BINS_FIXED,
    labels=LABELS,
    right=True,
    include_lowest=True
)

# Konversi Categorical ke Float, ganti NaN dengan Aksi 4, lalu konversi ke Int
discretized_action['action_fluid_discrete'] = action_fluid_cut.astype(float).fillna(4).astype(np.int64)

# --- 3. Aksi Final (0-24) ---
discretized_action['action_t'] = (discretized_action['action_fluid_discrete'] * N_BINS) + discretized_action['action_vaso_discrete']

print(f"Diskretisasi Aksi Selesai. Total {len(discretized_action['action_t'].unique())} Aksi unik (0-24) dibuat.")

# Verifikasi Distribusi Aksi
print("\n--- Distribusi Aksi Final (action_t) ---")
print(discretized_action['action_t'].value_counts().sort_index().head(25))

Diskretisasi Aksi Selesai. Total 25 Aksi unik (0-24) dibuat.

--- Distribusi Aksi Final (action_t) ---
action_t
0     146203
1        372
2        160
3        173
4         68
5      48923
6       1942
7       1595
8       1244
9        682
10     47745
11      2044
12      1710
13      1629
14      1254
15     45841
16      2267
17      2143
18      2401
19      1731
20     42218
21      2668
22      3107
23      3213
24      3230
Name: count, dtype: int64


In [None]:
# Untuk melihat 5 baris pertama dari aksi diskrit
print(discretized_action[['action_vaso_discrete', 'action_fluid_discrete', 'action_t']].tail(30))

# Untuk menyimpan DataFrame yang sudah didiskretisasi (sebelumnya):
# discretized_action.to_csv('discretized_action_data.csv', index=False)

        action_vaso_discrete  action_fluid_discrete  action_t
364533                     0                      3        15
364534                     0                      3        15
364535                     0                      3        15
364536                     0                      3        15
364537                     0                      3        15
364538                     0                      3        15
364539                     0                      3        15
364540                     0                      3        15
364541                     0                      3        15
364542                     0                      3        15
364543                     0                      3        15
364544                     0                      3        15
364545                     0                      0         0
364546                     0                      0         0
364547                     0                      0         0
364548  

In [None]:
# ----------------------------------------------------------------------
# PRINT DEFINISI AKSI
# ----------------------------------------------------------------------
ACTION_DEFINITIONS = {
    0: "No", 1: "Q1", 2: "Q2", 3: "Q3", 4: "High/Outlier"
}

print("\n--- Definisi Aksi Final ---")
for action_t in range(N_BINS * N_BINS):
    # Hitung A_fluid dan A_vaso dari A_t
    a_fluid = action_t // N_BINS  # A_fluid = (A_t // 5)
    a_vaso = action_t % N_BINS    # A_vaso = (A_t % 5)

    fluid_desc = ACTION_DEFINITIONS.get(a_fluid, "Invalid")
    vaso_desc = ACTION_DEFINITIONS.get(a_vaso, "Invalid")

    print(
        f"Aksi {action_t:02d}: "
        f"A_fluid ({a_fluid}) = {fluid_desc} IV Fluids, "
        f"A_vaso ({a_vaso}) = {vaso_desc} Vasopressor"
    )


--- Definisi Aksi Final ---
Aksi 00: A_fluid (0) = No IV Fluids, A_vaso (0) = No Vasopressor
Aksi 01: A_fluid (0) = No IV Fluids, A_vaso (1) = Q1 Vasopressor
Aksi 02: A_fluid (0) = No IV Fluids, A_vaso (2) = Q2 Vasopressor
Aksi 03: A_fluid (0) = No IV Fluids, A_vaso (3) = Q3 Vasopressor
Aksi 04: A_fluid (0) = No IV Fluids, A_vaso (4) = High/Outlier Vasopressor
Aksi 05: A_fluid (1) = Q1 IV Fluids, A_vaso (0) = No Vasopressor
Aksi 06: A_fluid (1) = Q1 IV Fluids, A_vaso (1) = Q1 Vasopressor
Aksi 07: A_fluid (1) = Q1 IV Fluids, A_vaso (2) = Q2 Vasopressor
Aksi 08: A_fluid (1) = Q1 IV Fluids, A_vaso (3) = Q3 Vasopressor
Aksi 09: A_fluid (1) = Q1 IV Fluids, A_vaso (4) = High/Outlier Vasopressor
Aksi 10: A_fluid (2) = Q2 IV Fluids, A_vaso (0) = No Vasopressor
Aksi 11: A_fluid (2) = Q2 IV Fluids, A_vaso (1) = Q1 Vasopressor
Aksi 12: A_fluid (2) = Q2 IV Fluids, A_vaso (2) = Q2 Vasopressor
Aksi 13: A_fluid (2) = Q2 IV Fluids, A_vaso (3) = Q3 Vasopressor
Aksi 14: A_fluid (2) = Q2 IV Fluids, A_va

In [None]:
print(discretized_action.columns)

Index(['icustay_id', 'bloc', 'morta_hosp', 'mortality_90d', 'is_readmit',
       'on_mechvent', 'max_dose_vaso', 'SOFA', 'SIRS', 'RR', 'Temp_C', 'SpO2',
       'HR', 'MeanBP', 'GCS', 'Lactate', 'Arterial_pH', 'paO2', 'paCO2',
       'FiO2', 'PaO2_FiO2', 'Sodium', 'Potassium', 'Chloride', 'HCO3', 'BUN',
       'Creatinine', 'Total_bili', 'Albumin', 'Platelets_count', 'WBC_Count',
       'HCT', 'Glucose', 'input_total', 'uo_total', 'SysBP', 'DiaBP',
       'Shock_Index', 'Arterial_BE', 'Ionized Calcium', 'SGOT(AST)',
       'SGPT(ALT)', 'PT', 'PTT', 'INR', 'Hb', 'Magnesium', 'Calcium',
       'cumulated_balance', 'Weight_kg', 'elixhauser', 'age', 'gender_numeric',
       'action_vaso_discrete', 'action_fluid_discrete', 'action_t'],
      dtype='object')


In [None]:
# ----------------------------------------------------------------------
# 1. PERSIAPAN DATA (Menghitung Delta: t+1 - t)
# ----------------------------------------------------------------------
# Pastikan data diurutkan berdasarkan ID pasien dan waktu
discretized_action.sort_values(by=['icustay_id', 'bloc'], inplace=True)

# Shift kolom SOFA dan Laktat untuk mendapatkan nilai t+1
discretized_action['SOFA_t_plus_1'] = discretized_action.groupby('icustay_id')['SOFA'].shift(-1)
discretized_action['Lactate_t_plus_1'] = discretized_action.groupby('icustay_id')['Lactate'].shift(-1)

# Hitung Delta
discretized_action['Delta_SOFA'] = discretized_action['SOFA_t_plus_1'] - discretized_action['SOFA']
discretized_action['Delta_Lactate'] = discretized_action['Lactate_t_plus_1'] - discretized_action['Lactate']

# ----------------------------------------------------------------------
# 2. IMPLEMENTASI FUNGSI REWARD R_t (Intermediate Reward)
# ----------------------------------------------------------------------
C0 = -0.1
C1 = -2.5
C2 = -3.0

# --- R0 (Reward Stabilitas SOFA) ---
discretized_action['R0_stability'] = np.where(
    (discretized_action['SOFA_t_plus_1'].notna()) &
    (discretized_action['SOFA_t_plus_1'] == discretized_action['SOFA']) &
    (discretized_action['SOFA'] > 0),
    C0,
    0.0
)

# --- R1 (Reward Perubahan SOFA) ---
discretized_action['R1_SOFA_change'] = C1 * discretized_action['Delta_SOFA']

# --- R2 (Reward Perubahan Laktat) ---
# Menggunakan C2 * tanh(Delta_Lactate) karena C2 sudah negatif
discretized_action['R2_Lactate_change'] = C2 * np.tanh(discretized_action['Delta_Lactate'])

# --- REWARD INTERMEDIATE (R_t) ---
# R_t ini akan memiliki nilai NaN di baris terminal
discretized_action['reward_t'] = (
    discretized_action['R0_stability'] +
    discretized_action['R1_SOFA_change'] +
    discretized_action['R2_Lactate_change']
)
print("Perhitungan Reward Intermediate R_t selesai.")

# ----------------------------------------------------------------------
# 3. PENANGANAN BARIS TERMINAL (Imputasi +/- 1 ke SEMUA NaN)
# ----------------------------------------------------------------------
SURVIVAL_COL = 'morta_hosp'
TERMINAL_REWARD_SURVIVED = 15.0
TERMINAL_REWARD_DIED = -15.0

# Identifikasi semua baris yang merupakan transisi terakhir (reward_t is NaN)
is_terminal = discretized_action['reward_t'].isna()
rows_imputed = is_terminal.sum()

# --- Imputasi Terminal Reward (+/- 1) ke SEMUA NaN ---
# 1. Imputasi +1.0 jika terminal DAN Survive (morta_hosp == 0)
discretized_action.loc[is_terminal & (discretized_action[SURVIVAL_COL] == 0), 'reward_t'] = TERMINAL_REWARD_SURVIVED

# 2. Imputasi -1.0 jika terminal DAN Meninggal (morta_hosp == 1)
discretized_action.loc[is_terminal & (discretized_action[SURVIVAL_COL] == 1), 'reward_t'] = TERMINAL_REWARD_DIED

# ----------------------------------------------------------------------
# 4. VERIFIKASI ROWS
# ----------------------------------------------------------------------
rows_remaining = len(discretized_action)

print(f"{rows_imputed} baris terminal (NaN Reward) telah diimputasi dengan +/- 1.")
print(f"Total transisi yang digunakan untuk pelatihan: {rows_remaining}")
print(f"Nilai R_t rata-rata (termasuk terminal +/- 1): {discretized_action['reward_t'].mean():.4f}")

Perhitungan Reward Intermediate R_t selesai.
18830 baris terminal (NaN Reward) telah diimputasi dengan +/- 1.
Total transisi yang digunakan untuk pelatihan: 364563
Nilai R_t rata-rata (termasuk terminal +/- 1): 0.7185


In [None]:
print(discretized_action.tail(30))

        icustay_id  bloc  morta_hosp  mortality_90d  is_readmit  on_mechvent  \
364533      299992     9         0.0            0.0           0            1   
364534      299992    10         0.0            0.0           0            1   
364535      299992    11         0.0            0.0           0            1   
364536      299992    12         0.0            0.0           0            1   
364537      299992    13         0.0            0.0           0            1   
364538      299992    14         0.0            0.0           0            1   
364539      299992    15         0.0            0.0           0            1   
364540      299992    16         0.0            0.0           0            1   
364541      299992    17         0.0            0.0           0            1   
364542      299992    18         0.0            0.0           0            1   
364543      299992    19         0.0            0.0           0            1   
364544      299992    20         0.0    

In [None]:
# Menghitung jumlah unik pasien yang tersisa
jumlah_pasien_final = discretized_action['icustay_id'].nunique()

print(f"Jumlah Transisi (Baris) Final: {len(discretized_action)}")
print(f"Jumlah Pasien (Episode) Final: {jumlah_pasien_final}")

Jumlah Transisi (Baris) Final: 364563
Jumlah Pasien (Episode) Final: 18830


In [None]:
# ASUMSI: discretized_action sudah berisi kolom 'reward_t' yang sudah dihitung dan diimputasi (+/- 1)
SURVIVAL_COL = 'morta_hosp'

# 1. Kelompokkan data berdasarkan status survival dan hitung rata-rata reward_t
# Kami menggunakan DataFrame yang sudah diimputasi (dengan +/- 1)
average_total_reward = discretized_action.groupby(SURVIVAL_COL)['reward_t'].mean().reset_index()

# 2. Ganti nama untuk label output yang jelas
average_total_reward[SURVIVAL_COL] = average_total_reward[SURVIVAL_COL].replace({
    0: 'Survivor',
    1: 'Non-Survivor'
})

print("=========================================================")
print("Rata-rata Total Reward (Rt) Berdasarkan Status Survival")
print("=========================================================")
for index, row in average_total_reward.iterrows():
    status = row[SURVIVAL_COL]
    avg_reward = row['reward_t']

    print(f"Status Pasien: {status}")
    print(f"Rata-rata Total Reward (Rt): {avg_reward:.4f}")
    print("-" * 45)

print(f"Rata-rata Reward KESELURUHAN DATA: {discretized_action['reward_t'].mean():.4f}")
print("=========================================================")

Rata-rata Total Reward (Rt) Berdasarkan Status Survival
Status Pasien: Survivor
Rata-rata Total Reward (Rt): 0.9491
---------------------------------------------
Status Pasien: Non-Survivor
Rata-rata Total Reward (Rt): -0.6850
---------------------------------------------
Rata-rata Reward KESELURUHAN DATA: 0.7185


In [None]:
# ASUMSI: discretized_action sudah tersedia dan berisi semua transisi yang lengkap.

# 0. Menghapus Duplikat Transisi di DataFrame Sumber
print(f"Total baris sebelum hapus duplikat: {len(discretized_action)}")

# Hapus baris yang memiliki nilai persis sama di semua kolom
discretized_action_CLEAN = discretized_action.drop_duplicates()

print(f"Total baris setelah hapus duplikat: {len(discretized_action_CLEAN)}")
print(f"Jumlah transisi duplikat yang dihapus: {len(discretized_action) - len(discretized_action_CLEAN)}")

# Ganti variabel discretized_action dengan versi yang bersih
discretized_action = discretized_action_CLEAN

# 1. Ambil daftar semua ID pasien unik
all_icustay_ids = discretized_action['icustay_id'].unique()

# 2. Tahap Pertama: Split Train (80%) vs. Temporary (20%)
# test_size=0.2 berarti 20% untuk sementara (Validasi + Uji)
train_ids, temp_ids = train_test_split(
    all_icustay_ids,
    test_size=0.2,
    random_state=42
)

# 3. Tahap Kedua: Split Temporary (20%) menjadi Validasi (10%) dan Uji (10%)
# Karena temp_ids adalah 20% dari total, kita perlu membagi temp_ids menjadi dua (0.5 * 0.2 = 0.1)
val_ids, test_ids = train_test_split(
    temp_ids,
    test_size=0.5, # Bagi 20% menjadi dua bagian: 10% untuk Validasi dan 10% untuk Uji
    random_state=42
)

# 4. Verifikasi Rasio
total_count = len(all_icustay_ids)
print("--- Pembagian Pasien ---")
print(f"Total Pasien: {total_count}")
print(f"Data Latih: {len(train_ids)} ({len(train_ids)/total_count:.2f})")
print(f"Data Validasi: {len(val_ids)} ({len(val_ids)/total_count:.2f})")
print(f"Data Uji: {len(test_ids)} ({len(test_ids)/total_count:.2f})")

# 5. Pisahkan DataFrame Transisi
train = discretized_action[discretized_action['icustay_id'].isin(train_ids)].copy()
val = discretized_action[discretized_action['icustay_id'].isin(val_ids)].copy()
test = discretized_action[discretized_action['icustay_id'].isin(test_ids)].copy()

print("\n--- Pembagian Transisi ---")
print(f"Transisi Train: {len(train)}")
print(f"Transisi Val: {len(val)}")
print(f"Transisi Test: {len(test)}")

Total baris sebelum hapus duplikat: 364563
Total baris setelah hapus duplikat: 364563
Jumlah transisi duplikat yang dihapus: 0
--- Pembagian Pasien ---
Total Pasien: 18830
Data Latih: 15064 (0.80)
Data Validasi: 1883 (0.10)
Data Uji: 1883 (0.10)

--- Pembagian Transisi ---
Transisi Train: 291620
Transisi Val: 36598
Transisi Test: 36345


In [None]:
# ======================================================================
# DISTRIBUSI FITUR (Raghu's (2017))
# ======================================================================

BINER_COLS = [
    'gender_numeric',
    'on_mechvent',
    'is_readmit'
]

NORMAL_COLS = [
    'age', 'Weight_kg', 'GCS', 'HR', 'SysBP', 'DiaBP', 'MeanBP',
    'RR', 'Temp_C', 'FiO2', 'Potassium', 'Sodium', 'Chloride',
    'Glucose', 'Magnesium', 'Calcium', 'Hb', 'WBC_Count', 'Platelets_count',
    'PTT', 'PT', 'Arterial_pH', 'paO2', 'paCO2', 'Arterial_BE', 'HCO3',
    'Lactate', 'PaO2_FiO2', 'SOFA', 'SIRS', 'Shock_Index',
    'cumulated_balance', 'elixhauser', 'Albumin', 'HCT',
    'Ionized Calcium'
]

LOG_NORMAL_COLS = [
    'SpO2', 'BUN', 'Creatinine', 'SGOT(AST)', 'SGPT(ALT)', 'Total_bili', 'INR',
    'uo_total', 'input_total', 'max_dose_vaso'
]

# Gabungkan semua fitur klinis untuk referensi
ALL_FEATURE_COLS = BINER_COLS + NORMAL_COLS + LOG_NORMAL_COLS
print(f"Total Fitur Klinis yang akan dinormalisasi: {len(ALL_FEATURE_COLS)}")

# Inisialisasi DataFrame yang akan dinormalisasi
train_normalized = train.copy()
val_normalized = val.copy()
test_normalized = test.copy()

# ----------------------------------------------------------------------
# 1.a. NORMALISASI BINARY (Shift -0.5)
# ----------------------------------------------------------------------
for df in [train_normalized, val_normalized, test_normalized]:
    for col in BINER_COLS:
        if col in df.columns:
            df[col] = df[col] - 0.5

# ----------------------------------------------------------------------
# 1.b. NORMALISASI NORMAL (Z-Score)
# ----------------------------------------------------------------------
scaler_normal = StandardScaler()
normal_features_found = [col for col in NORMAL_COLS if col in train.columns]

if normal_features_found:
    # 1. FIT HANYA PADA TRAIN
    scaler_normal.fit(train_normalized[normal_features_found])

    # 2. TRANSFORM SEMUA SET
    for df in [train_normalized, val_normalized, test_normalized]:
        df[normal_features_found] = scaler_normal.transform(df[normal_features_found])

# ----------------------------------------------------------------------
# 1.c. NORMALISASI LOG-NORMAL (Log(x+1) lalu Z-Score)
# ----------------------------------------------------------------------
scaler_log = StandardScaler()
log_normal_features_found = [col for col in LOG_NORMAL_COLS if col in train.columns]

if log_normal_features_found:
    log_cols = [col + '_log' for col in log_normal_features_found]

    # Log-Transform pada semua set
    for df in [train_normalized, val_normalized, test_normalized]:
        for col in log_normal_features_found:
            df[col + '_log'] = np.log1p(df[col].clip(lower=0))

    # 1. FIT HANYA PADA HASIL LOG TRAIN
    scaler_log.fit(train_normalized[log_cols])

    # 2. TRANSFORM SEMUA SET dan ganti kolom asli
    for df in [train_normalized, val_normalized, test_normalized]:
        df[log_cols] = scaler_log.transform(df[log_cols])
        for original_col, log_col in zip(log_normal_features_found, log_cols):
            df[original_col] = df[log_col]
            del df[log_col]

print("Normalisasi Fitur State pada Train (Fit), Val, dan Test (Transform) selesai.")

Total Fitur Klinis yang akan dinormalisasi: 49
Normalisasi Fitur State pada Train (Fit), Val, dan Test (Transform) selesai.


In [None]:
print("--- Pemeriksaan Jumlah Transisi ---")
print(f"Transisi Train: {len(train_normalized):,}")
print(f"Transisi Validasi: {len(val_normalized):,}")
print(f"Transisi Uji: {len(test_normalized):,}")

print("\n--- Contoh Transisi Train (Normalisasi) ---")
# Menampilkan 5 baris pertama, fokus pada fitur State dan Reward
display_cols = ['SOFA', 'Lactate', 'reward_t', 'action_t']
print(train_normalized[display_cols + ['icustay_id', 'bloc']].head(10))

print("\n--- Contoh Transisi Validasi (Normalisasi) ---")
print(val_normalized[display_cols + ['icustay_id', 'bloc']].head(10))

print("\n--- Contoh Transisi Uji (Normalisasi) ---")
print(test_normalized[display_cols + ['icustay_id', 'bloc']].head(10))

--- Pemeriksaan Jumlah Transisi ---
Transisi Train: 291,620
Transisi Validasi: 36,598
Transisi Uji: 36,345

--- Contoh Transisi Train (Normalisasi) ---
        SOFA  Lactate  reward_t  action_t  icustay_id  bloc
20 -0.476977 -0.61226      -0.1         0      200003     1
21 -0.476977 -0.61226      -0.1         0      200003     2
22 -0.476977 -0.61226      -0.1         0      200003     3
23 -0.476977 -0.61226      -0.1         0      200003     4
24 -0.476977 -0.61226      -0.1         0      200003     5
25 -0.476977 -0.61226      -0.1         0      200003     6
26 -0.476977 -0.61226      -0.1         0      200003     7
27 -0.476977 -0.61226      -0.1         0      200003     8
28 -0.476977 -0.61226      -0.1         0      200003     9
29 -0.476977 -0.61226      -0.1         0      200003    10

--- Contoh Transisi Validasi (Normalisasi) ---
       SOFA  Lactate  reward_t  action_t  icustay_id  bloc
0 -0.476977 -0.61226      -0.1         0      200001     1
1 -0.476977 -0.61226  

In [None]:
# NAMA KOLOM FITUR STATE
STATE_FEATURES = ['age', 'Weight_kg', 'GCS', 'HR', 'SysBP', 'DiaBP', 'MeanBP',
    'RR', 'Temp_C', 'FiO2', 'Potassium', 'Sodium', 'Chloride', 'is_readmit',
    'Glucose', 'Magnesium', 'Calcium', 'Hb', 'WBC_Count', 'Platelets_count',
    'PTT', 'PT', 'Arterial_pH', 'paO2', 'paCO2', 'Arterial_BE', 'HCO3',
    'Lactate', 'PaO2_FiO2', 'SOFA', 'SIRS', 'Shock_Index', 'Total_bili', 'INR',
    'cumulated_balance','elixhauser', 'Albumin', 'HCT', 'uo_total',
    'Ionized Calcium',   'gender_numeric', 'on_mechvent', 'input_total',
    'SpO2', 'BUN', 'Creatinine', 'SGOT(AST)', 'SGPT(ALT)', 'max_dose_vaso']

# Kolom aksi diskret (0-24)
ACTION_COL = 'action_t'
# Kolom reward
REWARD_COL = 'reward_t'
# Kolom status terminal (morta_hosp)
TERMINAL_COL = 'morta_hosp'

In [None]:
# Kolom-kolom yang sudah didefinisikan: STATE_FEATURES, ACTION_COL, REWARD_COL

df_buffer_train = train_normalized.copy()

# ---------------------------------------------------------------------------------
# 2.1. PERSIAPAN NEXT STATE (s_t+1) dan DONE FLAG
# ---------------------------------------------------------------------------------
# Pastikan data diurutkan (sudah dilakukan di kode sebelumnya)
df_buffer_train.sort_values(by=['icustay_id', 'bloc'], inplace=True)

# Identifikasi baris terminal menggunakan kolom SOFA_t_plus_1 non-norm (yang sudah ada)
is_terminal_t = df_buffer_train['SOFA_t_plus_1'].isna()
df_buffer_train['done'] = is_terminal_t.astype(int)

# Hitung kolom State t+1
state_t_plus_1_features = [f'{f}_t_plus_1' for f in STATE_FEATURES]
for feature in STATE_FEATURES:
    # SHIFT PADA FITUR YANG SUDAH DINORMALISASI
    df_buffer_train[f'{feature}_t_plus_1'] = df_buffer_train.groupby('icustay_id')[feature].shift(-1)

    # Imputasi 0 pada state t+1 jika transisi terminal
    df_buffer_train.loc[df_buffer_train[f'{feature}_t_plus_1'].isna(), f'{feature}_t_plus_1'] = 0.0

# ---------------------------------------------------------------------------------
# 2.2. PEMBANGUNAN REPLAY BUFFER
# ---------------------------------------------------------------------------------
train_replay_buffer = []
df_valid_transitions_train = df_buffer_train.dropna(subset=[REWARD_COL]).copy()

for index, row in df_valid_transitions_train.iterrows():

    # State saat ini (s_t) - NORMALISASI
    state_t = row[STATE_FEATURES].values.astype(np.float32)

    # State berikutnya (s_t+1) - NORMALISASI
    state_t_plus_1 = row[state_t_plus_1_features].values.astype(np.float32)

    transition = (state_t, row[ACTION_COL], row[REWARD_COL], state_t_plus_1, row['done'])
    train_replay_buffer.append(transition)

print(f"Replay Buffer (Train set) berhasil dibuat dengan {len(train_replay_buffer)} transisi.")
print("Contoh transisi pertama:")
print(train_replay_buffer[0])

Replay Buffer (Train set) berhasil dibuat dengan 291620 transisi.
Contoh transisi pertama:
(array([-1.0552425 ,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 , -2.8015325 , -0.5       , -0.56807387,
        0.55481297, -0.43569317,  0.60048145,  0.6732273 ,  0.502724  ,
       -0.28537005, -0.9156012 , -1.4919697 ,  0.60813826,  1.3910626 ,
       -0.0371716 , -1.0361718 , -0.61225957, -0.1019327 , -0.4769771 ,
        1.0207901 , -0.7966858 , -0.56118613, -0.30165884, -0.16676067,
       -1.9019794 ,  0.70613277,  1.1857983 , -1.1779331 , -2.288429  ,
       -0.5       ,  0.5       , -1.1675899 ,  0.03933048,  0.380334  ,
        0.81809944, -1.1970271 , -0.944683  , -0.20760158], dtype=float32), 0, -0.1, array([-1.0552425 ,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 

In [None]:
# Kolom-kolom yang sudah didefinisikan: STATE_FEATURES, ACTION_COL, REWARD_COL

df_buffer_val = val_normalized.copy()

# ---------------------------------------------------------------------------------
# 3.1. PERSIAPAN NEXT STATE (s_t+1) dan DONE FLAG
# ---------------------------------------------------------------------------------
df_buffer_val.sort_values(by=['icustay_id', 'bloc'], inplace=True)

# Identifikasi baris terminal
is_terminal_t = df_buffer_val['SOFA_t_plus_1'].isna()
df_buffer_val['done'] = is_terminal_t.astype(int)

# Hitung kolom State t+1
state_t_plus_1_features = [f'{f}_t_plus_1' for f in STATE_FEATURES]
for feature in STATE_FEATURES:
    # SHIFT PADA FITUR YANG SUDAH DINORMALISASI
    df_buffer_val[f'{feature}_t_plus_1'] = df_buffer_val.groupby('icustay_id')[feature].shift(-1)

    # Imputasi 0 pada state t+1 jika transisi terminal
    df_buffer_val.loc[df_buffer_val[f'{feature}_t_plus_1'].isna(), f'{feature}_t_plus_1'] = 0.0

# ---------------------------------------------------------------------------------
# 3.2. PEMBANGUNAN REPLAY BUFFER
# ---------------------------------------------------------------------------------
val_replay_buffer = []
df_valid_transitions_val = df_buffer_val.dropna(subset=[REWARD_COL]).copy()

for index, row in df_valid_transitions_val.iterrows():

    # State saat ini (s_t) - NORMALISASI
    state_t = row[STATE_FEATURES].values.astype(np.float32)

    # State berikutnya (s_t+1) - NORMALISASI
    state_t_plus_1 = row[state_t_plus_1_features].values.astype(np.float32)

    transition = (state_t, row[ACTION_COL], row[REWARD_COL], state_t_plus_1, row['done'])
    val_replay_buffer.append(transition)

print(f"Replay Buffer (Val set) berhasil dibuat dengan {len(val_replay_buffer)} transisi.")
print("Contoh transisi pertama:")
print(val_replay_buffer[0])

Replay Buffer (Val set) berhasil dibuat dengan 36598 transisi.
Contoh transisi pertama:
(array([-0.25695565,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 , -2.8015325 ,  0.5       , -0.56807387,
        0.55481297, -0.43569317,  0.60048145,  0.6732273 ,  0.502724  ,
       -0.28537005, -0.9156012 , -1.4919697 ,  0.60813826,  1.3910626 ,
       -0.0371716 , -1.0361718 , -0.61225957, -0.1019327 , -0.4769771 ,
        1.0207901 , -0.7966858 , -0.56118613, -0.30165884, -0.16676067,
        1.3597721 ,  0.70613277,  1.1857983 , -1.1779331 , -2.288429  ,
        0.5       , -0.5       , -1.1675899 ,  0.03933048,  0.380334  ,
        0.81809944, -1.1970271 , -0.944683  , -0.20760158], dtype=float32), 0, -0.1, array([-0.25695565,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 , -

In [None]:
# Kolom-kolom yang sudah didefinisikan: STATE_FEATURES, ACTION_COL, REWARD_COL

df_buffer_test = test_normalized.copy()

# ---------------------------------------------------------------------------------
# 3.1. PERSIAPAN NEXT STATE (s_t+1) dan DONE FLAG
# ---------------------------------------------------------------------------------
df_buffer_test.sort_values(by=['icustay_id', 'bloc'], inplace=True)

# Identifikasi baris terminal
is_terminal_t = df_buffer_test['SOFA_t_plus_1'].isna()
df_buffer_test['done'] = is_terminal_t.astype(int)

# Hitung kolom State t+1
state_t_plus_1_features = [f'{f}_t_plus_1' for f in STATE_FEATURES]
for feature in STATE_FEATURES:
    # SHIFT PADA FITUR YANG SUDAH DINORMALISASI
    df_buffer_test[f'{feature}_t_plus_1'] = df_buffer_test.groupby('icustay_id')[feature].shift(-1)

    # Imputasi 0 pada state t+1 jika transisi terminal
    df_buffer_test.loc[df_buffer_test[f'{feature}_t_plus_1'].isna(), f'{feature}_t_plus_1'] = 0.0

# ---------------------------------------------------------------------------------
# 3.2. PEMBANGUNAN REPLAY BUFFER
# ---------------------------------------------------------------------------------
test_replay_buffer = []
df_valid_transitions_test = df_buffer_test.dropna(subset=[REWARD_COL]).copy()

for index, row in df_valid_transitions_test.iterrows():

    # State saat ini (s_t) - NORMALISASI
    state_t = row[STATE_FEATURES].values.astype(np.float32)

    # State berikutnya (s_t+1) - NORMALISASI
    state_t_plus_1 = row[state_t_plus_1_features].values.astype(np.float32)

    transition = (state_t, row[ACTION_COL], row[REWARD_COL], state_t_plus_1, row['done'])
    test_replay_buffer.append(transition)

print(f"Replay Buffer (Val set) berhasil dibuat dengan {len(test_replay_buffer)} transisi.")
print("Contoh transisi pertama:")
print(test_replay_buffer[0])

Replay Buffer (Val set) berhasil dibuat dengan 36345 transisi.
Contoh transisi pertama:
(array([-2.529003  ,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 , -2.8015325 , -0.5       , -0.56807387,
        0.55481297, -0.43569317,  0.60048145,  0.6732273 ,  0.502724  ,
       -0.28537005, -0.9156012 , -1.4919697 ,  0.60813826,  1.3910626 ,
       -0.0371716 , -1.0361718 , -0.61225957, -0.1019327 , -0.4769771 ,
        1.0207901 , -0.7966858 , -0.56118613, -0.30165884, -0.849193  ,
       -1.436015  ,  0.70613277,  1.1857983 ,  1.1690521 , -2.288429  ,
       -0.5       ,  0.5       ,  0.99181944,  0.03933048,  0.380334  ,
        0.81809944, -1.1970271 , -0.944683  , -0.20760158], dtype=float32), 15, 7.5, array([-2.529003  ,  0.41940925,  0.73408675,  0.63343686,  2.0226371 ,
        0.09836235,  1.5263258 , -0.9022007 , -0.97370785,  0.84936523,
        0.87478054,  0.6527905 , -

In [None]:
# Pastikan folder tujuan ada
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    print(f"Folder penyimpanan NumPy dibuat: {data_dir}")
else:
    print(f"Folder penyimpanan NumPy sudah ada: {data_dir}")

# Daftar buffer yang akan diproses
buffers_map = {
    "train": train_replay_buffer,
    "val": val_replay_buffer,
    "test": test_replay_buffer
}

for name, buffer_obj in buffers_map.items():
    # 1. Pisahkan Komponen Buffer menjadi Array NumPy terpisah
    print(f"\n--- Memproses {name.upper()} Buffer ({len(buffer_obj):,} transisi) ---")

    # Memastikan semua komponen diubah menjadi array NumPy
    states = np.array([t[0] for t in buffer_obj], dtype=np.float32)
    actions = np.array([t[1] for t in buffer_obj], dtype=np.int32)
    rewards = np.array([t[2] for t in buffer_obj], dtype=np.float32)
    next_states = np.array([t[3] for t in buffer_obj], dtype=np.float32)
    dones = np.array([t[4] for t in buffer_obj], dtype=np.int32)

    # 2. Simpan setiap Array ke file .npy
    np.save(os.path.join(data_dir, f'{name}_states.npy'), states)
    np.save(os.path.join(data_dir, f'{name}_actions.npy'), actions)
    np.save(os.path.join(data_dir, f'{name}_rewards.npy'), rewards)
    np.save(os.path.join(data_dir, f'{name}_next_states.npy'), next_states)
    np.save(os.path.join(data_dir, f'{name}_dones.npy'), dones)

    print(f"Komponen {name.upper()} Buffer berhasil disimpan sebagai .npy")

Folder penyimpanan NumPy sudah ada: /content/drive/MyDrive/sepsis-mdp

--- Memproses TRAIN Buffer (291,620 transisi) ---
Komponen TRAIN Buffer berhasil disimpan sebagai .npy

--- Memproses VAL Buffer (36,598 transisi) ---
Komponen VAL Buffer berhasil disimpan sebagai .npy

--- Memproses TEST Buffer (36,345 transisi) ---
Komponen TEST Buffer berhasil disimpan sebagai .npy


In [None]:
# --- 1. Muat Array NumPy ---
# Muat data State Anda
train_states = np.load(os.path.join(data_dir, 'train_states.npy'))
train_actions = np.load(os.path.join(data_dir, 'train_actions.npy'))

print("--- INFORMASI ARRAY NUMPY TRAIN STATES ---")
print(f"Dimensi (Shape): {train_states.shape}")
print(f"Tipe Data (Dtype): {train_states.dtype}")
print(f"Total Elemen: {train_states.size:,}")
print(f"Jumlah Transisi (Baris): {train_states.shape[0]:,}")
print(f"Jumlah Fitur State (Kolom): {train_states.shape[1]}")

# --- 2. Mencetak Baris/Kolom Tertentu (Sampling) ---

# Mencetak 5 baris pertama (5 transisi) dan semua 49 kolom fitur
print("\n--- 5 Transisi Pertama (State) ---")
# [Baris, Kolom]
print(train_states[:5, :])

# Mencetak 5 transisi pertama untuk kolom Vasopressor (Indeks 48) dan 4 kolom sebelumnya
print("\n--- Kolom Vasopressor (Indeks 48) dan Kolom Terdahulu ---")
# Mencetak kolom 45, 46, 47, 48
print(train_states[:5, 45:])

# --- 3. Mencetak Array Actions (Indeks Diskrit) ---
print("\n--- 10 Actions Pertama ---")
print(train_actions[:10])

--- INFORMASI ARRAY NUMPY TRAIN STATES ---
Dimensi (Shape): (291620, 49)
Tipe Data (Dtype): float32
Total Elemen: 14,289,380
Jumlah Transisi (Baris): 291,620
Jumlah Fitur State (Kolom): 49

--- 5 Transisi Pertama (State) ---
[[-1.0552425   0.41940925  0.73408675  0.63343686  2.0226371   0.09836235
   1.5263258  -0.9022007  -0.97370785  0.84936523  0.87478054  0.6527905
  -2.8015325  -0.5        -0.56807387  0.55481297 -0.43569317  0.60048145
   0.6732273   0.502724   -0.28537005 -0.9156012  -1.4919697   0.60813826
   1.3910626  -0.0371716  -1.0361718  -0.61225957 -0.1019327  -0.4769771
   1.0207901  -0.7966858  -0.56118613 -0.30165884 -0.16676067 -1.9019794
   0.70613277  1.1857983  -1.1779331  -2.288429   -0.5         0.5
  -1.1675899   0.03933048  0.380334    0.81809944 -1.1970271  -0.944683
  -0.20760158]
 [-1.0552425   0.41940925  0.73408675  0.63343686  2.0226371   0.09836235
   1.5263258  -0.9022007  -0.97370785  0.84936523  0.87478054  0.6527905
  -2.8015325  -0.5        -0.5680