In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

# Separação dos Dados em Treino e Teste

In [14]:
df = pd.read_csv('cicids2017_cleaned.csv')

# Separar features e labels
X = df.drop('Attack Type', axis=1)
y = df["Attack Type"]

# Separar Dados para treino
X_normal = X[y == "Normal Traffic"]
y_normal = y[y == "Normal Traffic"]

# Separar Dados para teste
X_misto = X
y_misto = y

# Separar uma parte dos dados para treino (80% treino, 20% teste)
X_train, X_val, y_train, y_val = train_test_split(X_normal, y_normal, test_size=0.2, random_state=42)

In [15]:
#Criar o normalizador
scaler = StandardScaler()

# Ajustar e transformar os dados de treino
X_train_scaled = scaler.fit_transform(X_train)

# Transformar os dados de teste
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_misto)


# Isolation Forest (Diferentes Valores)

In [16]:
y_misto_binary = ["Normal Traffic" if y == "Normal Traffic" else "Attack" for y in y_misto]

# Valores para contamination
contamination_values = [0.01, 0.05, 0.1, 0.15, 0.2]

for contamination in contamination_values:
    print(f"\n=== contamination: {contamination} ===")

    # Criar e treinar o modelo Isolation Forest
    isoForest = IsolationForest(contamination=contamination, random_state=42)
    isoForest.fit(X_train_scaled)

    # Previsão
    y_pred = isoForest.predict(X_test_scaled)
    y_pred_labels = ["Attack" if y == -1 else "Normal Traffic" for y in y_pred]

    # Avaliar
    print(classification_report(y_misto_binary, y_pred_labels, digits=4))




=== contamination: 0.01 ===
                precision    recall  f1-score   support

        Attack     0.8241    0.2301    0.3597    425694
Normal Traffic     0.8635    0.9900    0.9225   2095057

      accuracy                         0.8617   2520751
     macro avg     0.8438    0.6100    0.6411   2520751
  weighted avg     0.8569    0.8617    0.8274   2520751


=== contamination: 0.05 ===
                precision    recall  f1-score   support

        Attack     0.6783    0.5180    0.5874    425694
Normal Traffic     0.9065    0.9501    0.9278   2095057

      accuracy                         0.8771   2520751
     macro avg     0.7924    0.7340    0.7576   2520751
  weighted avg     0.8680    0.8771    0.8703   2520751


=== contamination: 0.1 ===
                precision    recall  f1-score   support

        Attack     0.5479    0.5962    0.5710    425694
Normal Traffic     0.9165    0.9000    0.9082   2095057

      accuracy                         0.8487   2520751
     macro