In [18]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load the data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

<h3>1. Using a Random Forest Model and checking the Feature Importances</h3>

In [11]:
# Model Building and fitting

rf_clf = RandomForestClassifier(n_estimators=300,
                               max_leaf_nodes=16,
                               oob_score=True, 
                               n_jobs=-1)
rf_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=16,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=-1, oob_score=True, random_state=None, verbose=0,
                       warm_start=False)

In [12]:
# Prediction and Evaluation

pred = rf_clf.predict(X_test)
print("oob Score:", rf_clf.oob_score_)
print("Accuracy Score:", accuracy_score(y_test, pred))

oob Score: 0.9333333333333333
Accuracy Score: 0.9333333333333333


In [16]:
# Feature Importances
for name, score in zip(load_iris()["feature_names"], rf_clf.feature_importances_):
    print(name, round(score*100,2))

sepal length (cm) 11.05
sepal width (cm) 2.17
petal length (cm) 42.71
petal width (cm) 44.07


The features petal length and width seem to be strong predictors.

<h3>1. Using the Extra Trees Classifier Model</h3>

In [25]:
et_clf = ExtraTreesClassifier(n_estimators=300,
                              max_leaf_nodes=16, 
                              n_jobs=-1)

et_clf.fit(X_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
                     max_depth=None, max_features='auto', max_leaf_nodes=16,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=-1,
                     oob_score=False, random_state=None, verbose=0,
                     warm_start=False)

In [26]:
pred = rf_clf.predict(X_test)
print("Accuracy Score:", accuracy_score(y_test, pred))

Accuracy Score: 0.9333333333333333
