In [None]:
# Contrastive Learning + Zero-Day Detection for IoMT IDS (Memperbaiki SPOOFING)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input
from pathlib import Path
from collections import Counter
import seaborn as sns

In [None]:
# === 1. Load Combined Dataset ===
# Asumsikan df sudah ada dari tahap sebelumnya
# Cek jika kolom 'class', 'attack' belum tersedia
if 'class' not in df.columns or 'attack' not in df.columns:
    raise ValueError("Dataset harus memiliki kolom 'class' dan 'attack' sebelum menjalankan skrip ini.")

# Normalisasi teks
df['attack'] = df['attack'].astype(str).str.strip().str.upper()
df['class'] = df['class'].astype(str).str.strip().str.upper()

# === Pisahkan kelas BENIGN dan SPOOFING ===
benign_df = df[df['class'] == 'BENIGN'].copy()
spoofing_df = df[df['attack'] == 'SPOOFING'].copy()

# Gabungkan sebagian serangan lain untuk kontras
known_attacks_df = df[(df['class'] == 'ATTACK') & (df['attack'] != 'SPOOFING')].sample(n=30000, random_state=42)

# === Buat label contrastive ===
benign_df['label'] = 0
known_attacks_df['label'] = 1

contrast_df = pd.concat([benign_df, known_attacks_df], ignore_index=True)

In [None]:
# === 1. Load Combined Dataset ===
df['attack'] = df['attack'].astype(str).str.strip().str.upper()

# === Pisahkan kelas BENIGN dan SPOOFING ===
benign_df = df[df['class'].str.upper() == 'BENIGN'].copy()
spoofing_df = df[df['attack'] == 'SPOOFING'].copy()

# Gabungkan sebagian serangan lain untuk kontras
known_attacks_df = df[(df['class'].str.upper() == 'ATTACK') & (df['attack'] != 'SPOOFING')].sample(n=30000, random_state=42)

# === Buat label contrastive ===
benign_df['label'] = 0
known_attacks_df['label'] = 1

contrast_df = pd.concat([benign_df, known_attacks_df], ignore_index=True)

In [None]:
# === Preprocessing ===
X = contrast_df.select_dtypes(include='number')
feature_cols = X.columns  # Simpan fitur yang digunakan

y = contrast_df['label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# === Latih Encoder untuk Embedding ===
inputs = Input(shape=(X_train.shape[1],))
x = Dense(128, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
embedding = Dense(32, activation='relu', name='embedding')(x)
out = Dense(1, activation='sigmoid')(embedding)

encoder_model = Model(inputs=inputs, outputs=out)
encoder_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
encoder_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=64, verbose=1)

In [None]:
# === Ambil embedding dari layer tengah ===
embedding_model = Model(inputs=encoder_model.input, outputs=encoder_model.get_layer('embedding').output)

# === Embedding untuk benign dan spoofing ===
X_spoof = spoofing_df[feature_cols].copy()
X_spoof_scaled = scaler.transform(X_spoof)
embedding_spoof = embedding_model.predict(X_spoof_scaled)

X_benign = benign_df[feature_cols].copy()
X_benign_scaled = scaler.transform(X_benign)
embedding_benign = embedding_model.predict(X_benign_scaled)