In [None]:
# https://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html?highlight=feature_importances_#sklearn.ensemble.RandomForestClassifier.feature_importances_

# Feature Importanca com Random Forest

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
sample_behavior = pd.read_csv("../bases/AmostraBehavior.csv")

In [None]:
labels = sample_behavior.columns[2:len(sample_behavior.columns)]
 
X = sample_behavior[labels] # Covariáveis
y = sample_behavior.Perf_final # Target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 999)

## Aplicando o Random Forest

In [None]:
forest = RandomForestClassifier(random_state=0)
forest.fit(X_train, y_train)

In [None]:
# Verificando a importância de cada Feature
importances = forest.feature_importances_
importances

In [None]:
#pd.DataFrame({"variaveis":X_train.columns,"importancia":forest.feature_importances_}).sort_values('importancia',ascending=False)

In [None]:
# As importâncias são relativas pelo total, logo somam 1
sum(importances)

property feature_importances_
The impurity-based feature importances.

The higher, the more important the feature. The importance of a feature is computed as the (normalized) total reduction of the criterion brought by that feature. It is also known as the Gini importance.

In [None]:
forest_importances = pd.Series(importances, index = X_train.columns)

fig, ax = plt.subplots(figsize = (12,5))
forest_importances.plot.bar(ax = ax)
ax.set_ylabel("Feature Importance")
fig.tight_layout()

In [None]:
sorted_idx = forest.feature_importances_.argsort()

fig, ax = plt.subplots(figsize = (15,7))
bars = ax.bar(X_train.columns[sorted_idx], forest.feature_importances_[sorted_idx])

# Axis formatting.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["bottom"].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color = "#EEEEEE")
ax.xaxis.grid(False)

# Add text annotations to the top of the bars.
bar_color = bars[0].get_facecolor()
for bar in bars:
  ax.text(
      bar.get_x() + bar.get_width() / 2,
      bar.get_height() + 0.005,
      round(bar.get_height(), 2),
      horizontalalignment = "center",
      color = bar_color,
      weight = "bold"
  )

ax.set_xlabel("Features", labelpad=15, color='#333333')
ax.set_ylabel("Feature Importance", labelpad=15, color='#333333')
plt.xticks(rotation = 90)
fig.tight_layout()