In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import altair as alt
from sklearn.inspection import plot_partial_dependence
from mlxtend.evaluate import feature_importance_permutation

In [None]:
file_url = '../Dataset/KDDCup99.csv'

In [None]:
df = pd.read_csv(file_url)

In [None]:
df.head()

In [None]:
y = df.pop('label')

In [None]:
df = pd.get_dummies(df)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.3, random_state=1)

In [None]:
rf_model = RandomForestClassifier(random_state=168)
rf_model.fit(X_train, y_train)

In [None]:
train_preds = rf_model.predict(X_train)
test_preds = rf_model.predict(X_test)

In [None]:
train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)
print(train_acc)
print(test_acc)

In [None]:
imp_vals, _ = feature_importance_permutation(predict_method=rf_model.predict, X=X_train.values, y=y_train.values, metric='accuracy', num_rounds=1, seed=2)
imp_vals

In [None]:
perm_varimp_df = pd.DataFrame({'feature': X_train.columns, 'importance': imp_vals})

In [None]:
perm_varimp_df.sort_values('importance', ascending=False, inplace=True)
perm_varimp_df.head()

In [None]:
alt.Chart(perm_varimp_df[:20]).mark_bar().encode(
    x='importance',
    y=alt.Y('feature:N', sort=alt.SortField(field='importance', order='descending'))
)

In [None]:
feature_index = df.columns.get_loc("src_bytes")
plot_partial_dependence(rf_model, X_train, features=[feature_index], feature_names=X_train.columns,  target="normal", response_method="predict_proba", n_jobs=-1) 

In [None]:
!pip install lime

In [None]:
from lime.lime_tabular import LimeTabularExplainer

In [None]:
class_names = sorted(y.unique())
class_names

In [None]:
lime_explainer = LimeTabularExplainer(X_train.values,
      feature_names=X_train.columns,
      class_names=class_names,
      mode='classification')

In [None]:
exp = lime_explainer.explain_instance(X_test.iloc[99893,], rf_model.predict_proba, num_features=50, top_labels=1)
exp.show_in_notebook()