In [8]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn
import imblearn
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split, cross_validate
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn import metrics
import warnings

warnings.filterwarnings('ignore')

# Load Data
train = pd.read_csv("C:/Users/ishan/OneDrive/Desktop/proj_component/networkintrusion.csv")
test = pd.read_csv("C:/Users/ishan/OneDrive/Desktop/proj_component/networkintrusion.csv")

# Data Exploration
print(train.head(4))
print("Training data has {} rows & {} columns".format(train.shape[0], train.shape[1]))
print(test.head(4))
print("Testing data has {} rows & {} columns".format(test.shape[0], test.shape[1]))

# Drop 'num_outbound_cmds' as it's redundant
train.drop(['num_outbound_cmds'], axis=1, inplace=True)
test.drop(['num_outbound_cmds'], axis=1, inplace=True)

# Check class distribution
print(train['class'].value_counts())
print(test['class'].value_counts())

# Scaling numerical attributes
scaler = StandardScaler()
num_cols = train.select_dtypes(include=['float64', 'int64']).columns

sc_train = scaler.fit_transform(train[num_cols])
sc_test = scaler.transform(test[num_cols])  # Use transform not fit_transform for test data

# Convert scaled data to DataFrame
sc_train_df = pd.DataFrame(sc_train, columns=num_cols)
sc_test_df = pd.DataFrame(sc_test, columns=num_cols)

# Encoding categorical attributes
encoder = LabelEncoder()
cat_train = train.select_dtypes(include=['object']).copy()
cat_test = test.select_dtypes(include=['object']).copy()

train_cat = cat_train.apply(encoder.fit_transform)
test_cat = cat_test.apply(encoder.transform)

# Separate target column
X_train_cat = train_cat.drop(['class'], axis=1)
y_train = train_cat[['class']]
X_test_cat = test_cat

# Combine numerical and categorical data
train_x = pd.concat([sc_train_df, X_train_cat], axis=1)
test_df = pd.concat([sc_test_df, X_test_cat], axis=1)

# Feature Selection using Random Forest
rfc = RandomForestClassifier()
rfc.fit(train_x, y_train.values.ravel())

importances = pd.DataFrame({'feature': train_x.columns, 'importance': rfc.feature_importances_})
importances = importances.sort_values('importance', ascending=False).set_index('feature')

# Plot feature importances
plt.figure(figsize=(12, 6))
importances.plot.bar()
plt.title('Feature Importances')
plt.ylabel('Importance')
plt.xlabel('Feature')
plt.show()

# Recursive Feature Elimination (RFE)
rfe = RFE(rfc, n_features_to_select=15)
rfe = rfe.fit(train_x, y_train)

selected_features = [f for i, f in enumerate(train_x.columns) if rfe.support_[i]]
print("Selected features:", selected_features)

# Update train_x and test_df with selected features
train_x = train_x[selected_features]
test_df = test_df[selected_features]

# Dataset Partition
X_train, X_val, y_train, y_val = train_test_split(train_x, y_train, train_size=0.70, random_state=2)

# Build and Train ANN
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units=8, kernel_initializer='uniform', activation='relu', input_dim=len(selected_features)))
    classifier.add(Dense(units=8, kernel_initializer='uniform', activation='relu'))
    classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
    classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return classifier

classifier = KerasClassifier(build_fn=build_classifier, batch_size=10, epochs=10)

# Train the model
history = classifier.fit(X_train, y_train, batch_size=10, epochs=10, validation_data=(X_val, y_val))

# Plot training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

# Plot training and validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='lower right')
plt.show()

# Predictions and Evaluations
y_pred_train = (classifier.predict(X_train) > 0.5)
y_pred_val = (classifier.predict(X_val) > 0.5)
y_pred_test = (classifier.predict(test_df) > 0.5)

print("Training Accuracy:", metrics.accuracy_score(y_train, y_pred_train))
print("Validation Accuracy:", metrics.accuracy_score(y_val, y_pred_val))

# Cross-validation using cross_validate
cv_results = cross_validate(estimator=classifier, X=X_train, y=y_train, cv=10, scoring='accuracy')
print("Cross-validation scores:", cv_results['test_score'])


ModuleNotFoundError: No module named 'keras.wrappers'