### Creating a graph showing top 10 features as shown by Random Forest

In [None]:
# Split your data into input and output
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create decision tree classifer object
clf = RandomForestClassifier(random_state=0, n_estimators=100)

# Train model
model = clf.fit(X, y)

# Plot the top 10 features based on its importance
(pd.Series(model.feature_importances_, index=X.columns)
   .nlargest(10)   # can adjust based on how many top features you want
   .plot(kind='barh', figsize=[20,20])
    .invert_yaxis()) # Ensures that the feature with the most importance is on top, in descending order

plt.title('Top 10 Features derived by Random Forest', size=15)

### An alternate way to create the graph

In [None]:
# Create decision tree classifer object
clf = RandomForestClassifier(random_state=0, n_estimators=100)

# Train model
model = clf.fit(X, y)

# Calculate feature importances
importances = model.feature_importances_

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]  # largest to smallest

# Rearrange feature names so they match the sorted feature importances
names = [colnames[i] for i in indices]

# Create plot
plt.figure(figsize=[20,20])

# Create plot title
plt.title("Feature Importance")

# Add bars
plt.barh(range(X.shape[1]), importances[indices])

# Add feature names as x-axis labels
plt.yticks(range(X.shape[1]), names)
plt.gca().invert_yaxis()

# Show plot
plt.show()

### Let's use SelectFromModel to identify features which have greater importance than the mean importance

In [None]:
# Specify random forest instance, indicate the number of trees
# Use SelectFromModel to automatically select the features

sel = SelectFromModel(RandomForestClassifier(n_estimators=100), max_features=45, threshold=.1)
sel.fit(X_train, y_train)

# See which features are important. Those with value of True are features whose
# importance is greater than mean importance.
sel.get_support()

# Make a list and count selected features
selected_feat= X_train.columns[(sel.get_support())]
print(len(selected_feat))

# Get names of the features
print(selected_feat)