In [2]:
# [Random Forest Classifier Example - Machine Learning](https://chrisalbon.com/machine-learning/random_forest_classifier_example_scikit.html)
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

custom_style = {'axes.labelcolor': 'black',
                'xtick.color': 'black',
                'ytick.color': 'black'}
sns.set_style("darkgrid", rc=custom_style)

np.random.seed(0)

In [90]:
# create an object called iris with the iris data
iris = load_iris()

# create a dataframe with the four feature variables
df = pd.DataFrame(iris.data, columns=iris.feature_names)
# view the top 5 rows
# df.head()

# Add a new column with the species names, this is what we are going to try to predict
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
# df.head()

df['is_train'] = np.random.uniform(0, 1, len(df)) <= .75

# create two new dataframes, one with the training rows, one with the test rows
train, test = df[df['is_train']==True], df[df['is_train']==False]

# create a list of the feature column's names
features = df.columns[:4]

# view features
features

y = pd.factorize(train['species'])[0]

clf = RandomForestClassifier(n_jobs=2, random_state=0)
clf.fit(train[features], y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=2,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [95]:
# clf.predict(test[features])
# clf.predict_proba(test[features])[0:10]

# create actual english names for the plants for each predicted plant class
preds = iris.target_names[clf.predict(test[features])]
# view the predicted species for the first five observations
# preds[0:5]
test['species'].head()

1     setosa
4     setosa
6     setosa
8     setosa
14    setosa
Name: species, dtype: category
Categories (3, object): [setosa, versicolor, virginica]