In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# List of attack types and benign type
attack_types = [
    "Bot", "DDoS", "DoS GoldenEye", "DoS Hulk", "DoS Slowhttptest",
    "DoS slowloris", "FTP-Patator", "PortScan", "SSH-Patator"
]
benign_type = "BENIGN"

# Create an empty list to store as dictionaries
results = []

def load_data(attack_type):
    try:
        importance_data = pd.read_csv(f"{attack_type}_importance.csv")
        selected_features = importance_data['Feature'][:3].tolist()
        data = pd.read_csv(f"{attack_type}_vs_{benign_type}.csv")
        return data[selected_features + [' Label']], selected_features
    except FileNotFoundError as e:
        print(f"File not found: {e.filename}")
        return None, []

def train_and_predict(X_train, y_train, X_test):
    # Train and predict the models
    models = {
        'Naive Bayes': GaussianNB(),
        'QDA': QuadraticDiscriminantAnalysis(),
        'MLP': MLPClassifier(random_state=42, max_iter=1000, learning_rate_init=0.001)
    }
    predictions = {}
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        predictions[model_name] = model.predict(X_test)
    return predictions

# Main execution loop to process each attack type
results = []
for attack_type in attack_types:
    selected_data, selected_features = load_data(attack_type)
    
    if selected_data is not None:
        X = selected_data[selected_features]
        y = selected_data[' Label']
        
        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
        
        # Train models and get predictions
        preds = train_and_predict(X_train, y_train, X_test)
        
        # Calculate and store results
        result_dict = {'Attack Type': attack_type}
        for model_name, pred in preds.items():
            result_dict[f'{model_name} Accuracy'] = accuracy_score(y_test, pred)
        
        results.append(result_dict)

# Create a Pandas DataFrame from the results list 
results_df = pd.DataFrame(results)

# Display the DataFrame
print(results_df)



File not found: PortScan_importance.csv
File not found: SSH-Patator_importance.csv
        Attack Type  Naive Bayes Accuracy  QDA Accuracy  MLP Accuracy
0               Bot              0.940801      0.940801      0.989353
1              DDoS              0.981494      0.985296      0.998705
2     DoS GoldenEye              0.951020      0.950939      0.954177
3          DoS Hulk              0.985059      0.985059      0.984385
4  DoS Slowhttptest              0.582361      0.587362      0.939536
5     DoS slowloris              0.976423      0.983899      0.977861
6       FTP-Patator              0.999265      0.998950      0.985297
