## Random Forest Example with Python

#### Load the Random Forest Package

In [None]:

from sklearn.ensemble import RandomForestClassifier


#### Read in the Data

In [None]:
import pandas as pd
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"

wine = pd.read_csv(data_url, delimiter=';')
wine.head()

#### Identify the Target Feature

In [None]:
#Split the quality column out of the data
wine_target = wine['quality']
wine_data = wine.drop('quality', axis=1)


#### Divide the Data

In [None]:
from sklearn import model_selection
test_size = 0.30
seed = 7
train_data, test_data, train_target, test_target = model_selection.train_test_split(wine_data,
        wine_target, test_size=test_size, 
        random_state=seed)

#### Fit the Random Forest Model

In [None]:

clf = RandomForestClassifier(n_jobs=1)
clf.fit(train_data, train_target)



#### Apply the Model to the Test Data

In [None]:
forest_results = clf.predict(test_data)


#### Compute Feature Importance

In [None]:
list(zip(train_data, clf.feature_importances_))
importances = clf.feature_importances_



#### Display Feature Importance

In [None]:
import numpy as np

indices = np.argsort(importances)[::-1]
print("Feature ranking:")
col_names = list(train_data.columns.values)
for f in range(len(indices)):
    feature = col_names[indices[f]]
    space = ' '*(20 - len(feature))
    print("%d.\t %s %s (%f)" % \
   (f + 1, feature, space, importances[indices[f]]))


In [None]:
#  Display Feature importance as a bar graph
from matplotlib import pyplot as plt
r_indices = indices[::-1]
ndx = [i for i in indices]
features=[ col_names[i] for i in ndx]
N = 10

plt.title('Feature Importances')
plt.barh(range(N), importances[ndx[0:N]], color='b', align = 'center')
plt.yticks(range(N), features[0:N])
plt.xlabel('Relative Importance')