In [1]:
from google.colab import drive
drive.mount('/content/drive')

# 1. 라이브러리 및 경로 설정
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Google Drive 경로 (필요에 맞게 수정)
BASE_PATH = "/content/drive/MyDrive/ML_Dataset"

# 2. Feature 파일 불러오기
X_burst_mon = np.load(f"{BASE_PATH}/features_burst_mon.npy")
X_cum_mon   = np.load(f"{BASE_PATH}/features_cum_mon.npy")
X_inout_mon = np.load(f"{BASE_PATH}/features_inout_mon.npy")
y_mon       = np.load(f"{BASE_PATH}/labels_mon.npy")

X_burst_unmon = np.load(f"{BASE_PATH}/features_burst_unmon.npy")
X_cum_unmon   = np.load(f"{BASE_PATH}/features_cum_unmon.npy")
X_inout_unmon = np.load(f"{BASE_PATH}/features_inout_unmon.npy")

print("Loaded all feature files successfully!\n")
print("Monitored shapes:", X_burst_mon.shape, X_cum_mon.shape, X_inout_mon.shape)
print("Unmonitored shapes:", X_burst_unmon.shape, X_cum_unmon.shape, X_inout_unmon.shape)

Mounted at /content/drive
Loaded all feature files successfully!

Monitored shapes: (19000, 11) (19000, 10) (19000, 4)
Unmonitored shapes: (3000, 11) (3000, 10) (3000, 4)


In [2]:
# =========================================
# ✅ Closed-world preprocessing
# =========================================


# Merge all monitored features
X_mon = np.concatenate([X_burst_mon, X_cum_mon, X_inout_mon], axis=1)
print("✅ Monitored feature shape:", X_mon.shape)

# Split train/test (Closed-world: only monitored)
X_train_cw, X_test_cw, y_train_cw, y_test_cw = train_test_split(
    X_mon, y_mon, test_size=0.25, stratify=y_mon, random_state=42
)

# Save
np.save(f"{BASE_PATH}/X_train_cw.npy", X_train_cw)
np.save(f"{BASE_PATH}/X_test_cw.npy",  X_test_cw)
np.save(f"{BASE_PATH}/y_train_cw.npy", y_train_cw)
np.save(f"{BASE_PATH}/y_test_cw.npy",  y_test_cw)

print("✅ Closed-world dataset saved successfully!")
print(f"Train shape: {X_train_cw.shape}, Test shape: {X_test_cw.shape}")


✅ Monitored feature shape: (19000, 25)
✅ Closed-world dataset saved successfully!
Train shape: (14250, 25), Test shape: (4750, 25)


In [3]:
# =========================================
# ✅ Open-world Binary preprocessing
# =========================================


# Merge features
X_mon    = np.concatenate([X_burst_mon, X_cum_mon, X_inout_mon], axis=1)
X_unmon  = np.concatenate([X_burst_unmon, X_cum_unmon, X_inout_unmon], axis=1)

# Label assignment: monitored=1, unmonitored=-1
y_mon    = np.ones(len(X_mon))
y_unmon  = -1 * np.ones(len(X_unmon))

# Combine all
X_all = np.vstack([X_mon, X_unmon])
y_all = np.concatenate([y_mon, y_unmon])

print(f"Combined shape: {X_all.shape}, Label shape: {y_all.shape}")

# Split
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(
    X_all, y_all, test_size=0.25, stratify=y_all, random_state=42
)

# Save
np.save(f"{BASE_PATH}/X_train_bin.npy", X_train_bin)
np.save(f"{BASE_PATH}/X_test_bin.npy",  X_test_bin)
np.save(f"{BASE_PATH}/y_train_bin.npy", y_train_bin)
np.save(f"{BASE_PATH}/y_test_bin.npy",  y_test_bin)

print("✅ Open-world Binary dataset saved successfully!")
print(f"Train shape: {X_train_bin.shape}, Test shape: {X_test_bin.shape}")


Combined shape: (22000, 25), Label shape: (22000,)
✅ Open-world Binary dataset saved successfully!
Train shape: (16500, 25), Test shape: (5500, 25)


In [4]:
# =========================================
# ✅ Open-world Multi-class preprocessing
# =========================================


# Merge features
X_mon    = np.concatenate([X_burst_mon, X_cum_mon, X_inout_mon], axis=1)
X_unmon  = np.concatenate([X_burst_unmon, X_cum_unmon, X_inout_unmon], axis=1)

# Label assignment: monitored 0–94, unmonitored -1
y_unmon = -1 * np.ones(len(X_unmon))

# Combine all
X_all = np.vstack([X_mon, X_unmon])
y_all = np.concatenate([y_mon, y_unmon])

print(f"Combined shape: {X_all.shape}, Label shape: {y_all.shape}")

# Split
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(
    X_all, y_all, test_size=0.25, stratify=y_all, random_state=42
)

# Save
np.save(f"{BASE_PATH}/X_train_mc.npy", X_train_mc)
np.save(f"{BASE_PATH}/X_test_mc.npy",  X_test_mc)
np.save(f"{BASE_PATH}/y_train_mc.npy", y_train_mc)
np.save(f"{BASE_PATH}/y_test_mc.npy",  y_test_mc)

print("✅ Open-world Multi-class dataset saved successfully!")
print(f"Train shape: {X_train_mc.shape}, Test shape: {X_test_mc.shape}")


Combined shape: (22000, 25), Label shape: (22000,)
✅ Open-world Multi-class dataset saved successfully!
Train shape: (16500, 25), Test shape: (5500, 25)
