# Evaluación y crítica al modelo

In [1]:
# importaciones
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_sample_weight

In [2]:
dataset_path = "DB/"

output_files_name = ["X_train.csv", "y_train.csv", "X_test.csv", "y_test.csv"]

In [3]:
X_train = pd.read_csv(f"{dataset_path}{output_files_name[0]}")
y_train = pd.read_csv(f"{dataset_path}{output_files_name[1]}")
X_test  = pd.read_csv(f"{dataset_path}{output_files_name[2]}")
y_test  = pd.read_csv(f"{dataset_path}{output_files_name[3]}")

In [4]:
encoder = OneHotEncoder(sparse_output=False)
rf = RandomForestClassifier(criterion="gini", n_estimators=100,class_weight='balanced')
dt = DecisionTreeClassifier(criterion="gini", class_weight='balanced')
mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=500)
knn = KNeighborsClassifier(n_neighbors=3, weights='distance') 

In [5]:
encoder.fit(y_train)

y_train_encoded = encoder.transform(y_train)
y_test_encoded  = encoder.transform(y_test)

In [6]:
X_train_encoded = X_train.to_numpy()
X_test_encoded  = X_test.to_numpy()

In [7]:
rf.fit(X_train_encoded, y_train_encoded)

In [8]:
dt.fit(X_train_encoded, y_train_encoded)

In [9]:
sample_weights = compute_sample_weight(class_weight='balanced', y=y_train_encoded)
mlp.fit(X_train_encoded, y_train_encoded)

In [10]:
knn.fit(X_train_encoded, y_train_encoded)

In [11]:
y_pred_rf = rf.predict(X_test_encoded)
y_pred_dt = dt.predict(X_test_encoded)
y_pred_mlp = mlp.predict(X_test_encoded)
y_pred_knn = knn.predict(X_test_encoded)

[WinError 2] El sistema no puede encontrar el archivo especificado
  File "c:\Users\User\anaconda3\envs\ids_thesis\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\User\anaconda3\envs\ids_thesis\Lib\subprocess.py", line 550, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\User\anaconda3\envs\ids_thesis\Lib\subprocess.py", line 1028, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\User\anaconda3\envs\ids_thesis\Lib\subprocess.py", line 1540, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


In [12]:
# y_pred_rf_labels = np.argmax(y_pred_rf,axis=0)
# y_pred_dt_labels = np.argmax(y_pred_dt,axis=0)
# y_pred_mlp_labels = np.argmax(y_pred_mlp,axis=0)
# y_pred_knn_labels = np.argmax(y_pred_knn,axis=0)

In [13]:
print(f"RandomForestClassifier")
print(classification_report(y_test_encoded, y_pred_rf))

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.96      0.78      0.86     28405
           1       0.40      0.58      0.47        24
           2       0.99      1.00      0.99      1600
           3       0.08      0.99      0.14       129
           4       0.94      0.70      0.80      2888
           5       0.75      0.58      0.66        69
           6       0.63      0.90      0.74        72
           7       0.98      0.51      0.67        99
           8       1.00      0.96      0.98      1986
           9       0.04      0.55      0.08        74
          10       0.06      0.32      0.10        19
          11       0.10      0.50      0.16         8

   micro avg       0.88      0.79      0.84     35373
   macro avg       0.58      0.70      0.55     35373
weighted avg       0.95      0.79      0.86     35373
 samples avg       0.79      0.79      0.79     35373



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
print(f"DecisionTreeClassifier")
print(classification_report(y_test_encoded,y_pred_dt))

DecisionTreeClassifier
              precision    recall  f1-score   support

           0       0.95      0.76      0.85     28405
           1       0.18      0.58      0.27        24
           2       0.94      0.84      0.89      1600
           3       0.07      0.88      0.13       129
           4       0.53      0.69      0.60      2888
           5       0.29      0.57      0.39        69
           6       0.08      0.72      0.15        72
           7       0.92      0.48      0.64        99
           8       0.99      0.94      0.96      1986
           9       0.01      0.55      0.03        74
          10       0.06      0.26      0.09        19
          11       0.04      0.50      0.08         8

   micro avg       0.77      0.77      0.77     35373
   macro avg       0.42      0.65      0.42     35373
weighted avg       0.91      0.77      0.83     35373
 samples avg       0.77      0.77      0.77     35373



In [15]:
print(f"KNeighborsClassifier")
print(classification_report(y_test_encoded, y_pred_knn))

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.96      0.73      0.83     28405
           1       0.18      0.58      0.28        24
           2       0.95      0.84      0.89      1600
           3       0.04      0.91      0.07       129
           4       0.78      0.71      0.74      2888
           5       0.05      0.67      0.10        69
           6       0.06      0.90      0.11        72
           7       0.39      0.69      0.49        99
           8       0.65      0.99      0.78      1986
           9       0.04      0.55      0.08        74
          10       0.03      0.37      0.05        19
          11       0.05      0.25      0.08         8

   micro avg       0.75      0.75      0.75     35373
   macro avg       0.35      0.68      0.38     35373
weighted avg       0.92      0.75      0.81     35373
 samples avg       0.75      0.75      0.75     35373



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
print(f"MLPClassifier")
print(classification_report(y_test_encoded, y_pred_mlp))

MLPClassifier
              precision    recall  f1-score   support

           0       0.96      0.77      0.86     28405
           1       0.00      0.00      0.00        24
           2       0.90      0.94      0.92      1600
           3       0.24      0.87      0.37       129
           4       0.64      0.73      0.68      2888
           5       0.04      0.59      0.07        69
           6       0.03      0.53      0.06        72
           7       0.69      0.80      0.74        99
           8       0.99      0.96      0.98      1986
           9       0.05      0.51      0.10        74
          10       0.02      0.63      0.05        19
          11       0.00      0.00      0.00         8

   micro avg       0.82      0.79      0.80     35373
   macro avg       0.38      0.61      0.40     35373
weighted avg       0.92      0.79      0.84     35373
 samples avg       0.77      0.79      0.78     35373



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
