In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib

# Load dataset augmentasi
df = pd.read_csv('synthetic_15k_complete_final.csv')

# Konfigurasi Fitur State
state_features = [
    'rx_mbps_p1', 'util_rx_pct_p1', 'policing_rate_kbps_p1',
    'rx_mbps_p2', 'util_rx_pct_p2', 'policing_rate_kbps_p2',
    'rx_mbps_p4', 'util_rx_pct_p4', 'delay_ms_p4', 'policing_rate_kbps_p4'
]

# Tentukan Threshold secara dinamis dari rujukan
SLA_DELAY_P4 = 10.0 # ms (Standard URLLC Healthcare)
TARGET_THROUGHPUT_P2 = 15.0 # Mbps (Target eMBB)

print(f"Dataset dimuat: {len(df)} baris. Memulai kalkulasi Reward & Penalty...")

Dataset dimuat: 15000 baris. Memulai kalkulasi Reward & Penalty...


In [2]:
def sigmoid_penalty(val, threshold, k=1.5):
    """Memberikan penalti yang smooth saat mendekati batas SLA."""
    # Semakin mendekati atau melewati threshold, nilai mendekati 1 (maksimal penalti)
    return 1 / (1 + np.exp(-k * (val - threshold)))

def calculate_sdh_reward(row):
    # 1. URLLC (Port 4) - Priority High
    # Menggunakan penalti sigmoid agar agen 'berhati-hati' sebelum melanggar SLA
    p_latency_p4 = sigmoid_penalty(row['delay_ms_p4'], threshold=SLA_DELAY_P4)
    
    # 2. eMBB (Port 2) - Throughput Incentive
    # Normalisasi throughput: Semakin mendekati target 15Mbps, reward semakin besar
    r_throughput_p2 = np.clip(row['rx_mbps_p2'] / TARGET_THROUGHPUT_P2, 0, 1)
    
    # 3. Global Reliability (Packet Drops)
    total_drop = row['drop_p1'] + row['drop_p2'] + row['drop_p4']
    p_drop = np.clip(total_drop / 50.0, 0, 1) # Penalti drop paket
    
    # FORMULASI TOTAL REWARD (Weighted Sum)
    # Bobot Port 4 (0.6) paling tinggi karena kritis (Priority-Aware)
    # Penalti diberikan tanda negatif (-)
    reward = (0.4 * r_throughput_p2) - (0.6 * p_latency_p4) - (0.2 * p_drop)
    
    return reward

In [3]:
# 1. Normalisasi State (Z-Score)
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[state_features]), columns=state_features)

drl_data = []
for i in range(len(df) - 1):
    state = df_scaled.iloc[i].values
    next_state = df_scaled.iloc[i+1].values
    
    # Menghitung Action (Perubahan Policing Rate P4 sebagai referensi)
    rate_t = df.iloc[i]['policing_rate_kbps_p4']
    rate_t_plus_1 = df.iloc[i+1]['policing_rate_kbps_p4']
    
    # Action Continuous (Ratio perubahan)
    action_cont = (rate_t_plus_1 - rate_t) / rate_t
    
    # Action Discrete (Mapping untuk DQN)
    if action_cont > 0.05: action_disc = 1 # Increase
    elif action_cont < -0.05: action_disc = 2 # Decrease
    else: action_disc = 0 # Maintain
    
    # Hitung Reward di state tujuan (i+1)
    reward = calculate_sdh_reward(df.iloc[i+1])
    
    drl_data.append({
        'state': state.tolist(),
        'action_continuous': action_cont,
        'action_discrete': action_disc,
        'reward': reward,
        'next_state': next_state.tolist()
    })

# Simpan dataset siap pakai
df_drl_ready = pd.DataFrame(drl_data)
df_drl_ready.to_csv('drl_preprocessed_final.csv', index=False)
joblib.dump(scaler, 'master_scaler.pkl')

print("Preprocessing Selesai! File 'drl_preprocessed_final.csv' siap untuk training.")

Preprocessing Selesai! File 'drl_preprocessed_final.csv' siap untuk training.
