In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt

In [2]:
# Step 1: Load Data
def load_data(filepath):
    data = pd.read_csv(filepath)
    return data

In [5]:
# Step 2: Preprocessing
def preprocess_data(data):
    # Drop rows where 'Label' is missing
    if 'Label' in data.columns:
        data.dropna(subset=['Label'], inplace=True)
    else:
        raise KeyError("The 'Label' column is missing from the dataset.")

    # Drop irrelevant columns like 'Attack Type'
    data.drop(columns=['Attack Type'], errors='ignore', inplace=True)

    # Keep only numeric columns
    data = data.select_dtypes(include=[np.number])

    # Impute missing values
    data.fillna(data.mean(), inplace=True)

    # Label encoding for binary classification
    label_encoder = LabelEncoder()
    data['Label'] = label_encoder.fit_transform(data['Label'])

    # Scaling
    scaler = StandardScaler()
    features = data.drop(columns=['Label'])
    scaled_features = scaler.fit_transform(features)

    return pd.DataFrame(scaled_features, columns=features.columns), data['Label']

In [3]:
def main():
    filepath = r"C:\\Users\\HP\\Documents\\GitHub\\intrusion-detection-fewshot-vs-traditional\\cleaned_dataset.csv"  
    data = load_data(filepath)

if __name__ == "__main__":
    main()