In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
data_file = 'add_your_dataset_here' 
data = pd.read_csv(data_file)

In [None]:
data.info()

In [None]:
print("\nShape of the dataset:")
print(f"Rows: {data.shape[0]}, Columns: {data.shape[1]}")

In [None]:
#print(data.describe())

In [None]:
X = data.iloc[:, :-1].values  
y = data.iloc[:, -1].values

In [None]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [None]:
print(data['Label_binary'].value_counts())

In [None]:
# print(label_encoder.classes_)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
logistic_model = LogisticRegression(random_state=42, max_iter=1000)
logistic_model.fit(X_train, y_train);

In [None]:
y_pred = logistic_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
benign_data = data[data['Label_binary'] == 'Benign']
X_benign = benign_data.iloc[:, :-1].values 

In [None]:
data.head()

In [None]:
print(data['Label_binary'].value_counts())

In [None]:
X_benign_scaled = scaler.fit_transform(X_benign)

In [None]:
input_dim = X_benign_scaled.shape[1]
input_layer = Input(shape=(input_dim,))
encoder = Dense(64, activation='relu')(input_layer)
encoder = Dense(32, activation='relu')(encoder)
latent_space = Dense(16, activation='relu')(encoder)
decoder = Dense(32, activation='relu')(latent_space)
decoder = Dense(64, activation='relu')(decoder)
output_layer = Dense(input_dim, activation='sigmoid')(decoder)



In [None]:
autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer='adam', loss='mse')



In [None]:
# early_stopping = EarlyStopping(
#     monitor='val_loss',
#     patience=10,
#     restore_best_weights=True
# )

In [None]:
autoencoder.fit(X_benign_scaled, X_benign_scaled, 
                epochs=50, 
                batch_size=32, 
                validation_split=0.2, 
                verbose=1)
                # callbacks=[early_stopping])

In [None]:

X_scaled = scaler.transform(X)  

reconstructed = autoencoder.predict(X_scaled)
reconstruction_error = np.mean(np.power(X_scaled - reconstructed, 2), axis=1)

threshold = np.percentile(reconstruction_error[y == 0], 99)  
y_pred_autoencoder = (reconstruction_error > threshold).astype(int) 


In [None]:
print("\nAutoencoder-Based Anomaly Detection:")
print(classification_report(y, y_pred_autoencoder, target_names=['Benign', 'Malicious']))
