# Table 5.11 Comparison of Feature Importances

In [1]:
from common import Identifier
import pandas as pd
import pickle

In [2]:
matrices = dict()

id_ = Identifier()
id_.title = "netflow_sample"
id_.name = "cart"
id_.mod = "ext"

with open(f"../model/{id_.as_file()}.pkl", "rb") as file:
    cart = pickle.load(file)

id_.name = "idtree"
with open(f"../model/{id_.as_file()}.pkl", "rb") as file:
    idtree = pickle.load(file)

In [3]:
cart_features = pd.DataFrame(list(zip(cart.feature_names_in_, cart.feature_importances_))).sort_values(by=1, ascending=False, ignore_index=True)[:12]
cart_features.columns = ["CART", "Value"]
cart_features["Value"] = round(cart_features["Value"],3)
cart_features

Unnamed: 0,CART,Value
0,L7_PROTO,0.121
1,OUT_BYTES,0.106
2,TCP_FLAGS,0.1
3,TCP_WIN_MAX_IN,0.089
4,L4_DST_PORT,0.078
5,IPV4_SRC_ADDR_EX,0.075
6,IN_BYTES,0.055
7,SHORTEST_FLOW_PKT,0.053
8,L4_SRC_PORT,0.05
9,DST_TO_SRC_AVG_THROUGHPUT,0.045


In [4]:
idtree_features = pd.DataFrame(list(zip(idtree.feature_names_in_, idtree.feature_importances_))).sort_values(by=1, ascending=False, ignore_index=True)
idtree_features.columns = ["IntruDTree", "Value"]
idtree_features["Value"] = round(idtree_features["Value"],3)
idtree_features

Unnamed: 0,IntruDTree,Value
0,OUT_BYTES,0.213
1,TCP_WIN_MAX_IN,0.158
2,L7_PROTO,0.145
3,TCP_FLAGS,0.105
4,IN_BYTES,0.105
5,IPV4_SRC_ADDR_EX,0.076
6,SRC_TO_DST_SECOND_BYTES,0.067
7,L4_DST_PORT,0.065
8,MIN_TTL,0.028
9,IPV4_DST_ADDR_EX,0.022


In [5]:
set(idtree_features["IntruDTree"]).symmetric_difference(set(cart_features["CART"]))

{'CLIENT_TCP_FLAGS',
 'DST_TO_SRC_AVG_THROUGHPUT',
 'IPV4_DST_ADDR_EX',
 'L4_SRC_PORT',
 'MAX_TTL',
 'MIN_TTL',
 'NUM_PKTS_UP_TO_128_BYTES',
 'SHORTEST_FLOW_PKT',
 'SRC_TO_DST_AVG_THROUGHPUT',
 'SRC_TO_DST_SECOND_BYTES'}

In [6]:
combined_features = pd.concat([cart_features, idtree_features], axis=1)
combined_features

Unnamed: 0,CART,Value,IntruDTree,Value.1
0,L7_PROTO,0.121,OUT_BYTES,0.213
1,OUT_BYTES,0.106,TCP_WIN_MAX_IN,0.158
2,TCP_FLAGS,0.1,L7_PROTO,0.145
3,TCP_WIN_MAX_IN,0.089,TCP_FLAGS,0.105
4,L4_DST_PORT,0.078,IN_BYTES,0.105
5,IPV4_SRC_ADDR_EX,0.075,IPV4_SRC_ADDR_EX,0.076
6,IN_BYTES,0.055,SRC_TO_DST_SECOND_BYTES,0.067
7,SHORTEST_FLOW_PKT,0.053,L4_DST_PORT,0.065
8,L4_SRC_PORT,0.05,MIN_TTL,0.028
9,DST_TO_SRC_AVG_THROUGHPUT,0.045,IPV4_DST_ADDR_EX,0.022


In [36]:
combined_features.to_csv("../data/tables/feature_importances.csv", sep="&", index=False)