In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

# Load data
df = pd.read_csv("dataset.csv")
X = df.drop(columns=["Unnamed: 0", "label"])
y = df["label"]

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Use only class 0 to train autoencoder
X_train_ae = X_scaled[y == 0]

# Build autoencoder
input_dim = X_train_ae.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(16, activation='relu')(input_layer)
encoded = Dense(8, activation='relu')(encoded)
decoded = Dense(16, activation='relu')(encoded)
output_layer = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(1e-3), loss='mse')

# Train autoencoder
autoencoder.fit(X_train_ae, X_train_ae, epochs=20, batch_size=128, shuffle=True, verbose=0)

# Reconstruct all data and get errors
X_reconstructed = autoencoder.predict(X_scaled)
mse = np.mean(np.square(X_scaled - X_reconstructed), axis=1)

# Set threshold for anomaly detection
threshold = np.percentile(mse[y == 0], 99)  # top 1% of majority class errors
y_pred_bin = (mse > threshold).astype(int)  # 1 = anomaly
y_true_bin = (y != 0).astype(int)  # 1 = actual anomaly

# Evaluation
print(classification_report(y_true_bin, y_pred_bin))


[1m13364/13364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 4ms/step
              precision    recall  f1-score   support

           0       0.79      0.99      0.88    335847
           1       0.27      0.01      0.03     91778

    accuracy                           0.78    427625
   macro avg       0.53      0.50      0.45    427625
weighted avg       0.67      0.78      0.69    427625



HYPERPARAMETR TUNING NEEDED
