## Random Forest Example

#### Load the Random Forest Package

In [None]:

from sklearn.ensemble import RandomForestClassifier


#### Read in the Data

In [None]:
import pandas as pd
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"

wine = pd.read_csv(data_url, delimiter=';')


#### Identify the Target Feature

In [None]:
#Split the quality column out of the data
wine_target = wine['quality']
wine_data = wine.drop('quality', axis=1)


#### Divide the Data

In [None]:
from sklearn import model_selection
test_size = 0.30
seed = 7
train_data, test_data, train_target, test_target = model_selection.train_test_split(wine_data,
        wine_target, test_size=test_size, 
        random_state=seed)

#### Fit the Random Forest Model

In [None]:

model = RandomForestClassifier()
model.fit(train_data, train_target)



#### Apply the Model to the Test Data

In [None]:
forest_results = model.predict(test_data)


#### Compute Feature Importance

In [None]:
#list(zip(train_data, model.feature_importances_))
importances = model.feature_importances_



#### Display Feature Importance

In [None]:
import numpy as np

indices = np.argsort(importances)[::-1]

print("Feature ranking:")
col_names = list(train_data.columns.values)
for f in range(len(indices)):
    ndx = indices[f]
    feature = col_names[ndx]
    value = importances[ndx]
    #feature = col_names[indices[f]]
    space = ' '*(20 - len(feature))

    print("%d.\t %s %s %f" % (f + 1, feature, space, value))


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ndx = np.argsort(importances)
value = pd.DataFrame(importances[ndx])

ax = value.plot(kind='barh', figsize=(8, 10), color='#4466ff', zorder=2, width=0.85)

# Despine
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

# Switch off ticks
ax.tick_params(axis="both", which="both", bottom="off", top="off", labelbottom="on", left="off", right="off", labelleft="on")
column_names = [col_names[x] for x in ndx]
 
ax.set_yticklabels(column_names)
plt.show()