In [13]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import joblib

In [2]:
# Load the Iris Dataset
iris = load_iris()

In [3]:
# Split data into testing/training and target/features
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                    random_state=1, 
                                                    stratify=iris.target)

In [4]:
# Scale the features

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Set model parameters

param_grid = {'n_estimators': [200, 500],
              'max_features': ['auto', 'sqrt'],
              'min_samples_split': [2, 5, 10],
              'bootstrap': [True, False]}

rf = GridSearchCV(RandomForestClassifier(), param_grid, verbose=3)

In [11]:
# Fit
rf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200, score=0.957, total=   0.2s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200, score=0.826, total=   0.2s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200, score=1.000, total=   0.2s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.4s remaining:    0.0s


[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200, score=0.909, total=   0.2s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=200, score=0.955, total=   0.2s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500, score=0.957, total=   0.5s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500, score=0.826, total=   0.5s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500, score=1.000, total=   0.4s
[CV] bootstrap=True, max_features=auto, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=True, max_features=auto, min_samples_split=2, n_estima

[CV]  bootstrap=True, max_features=sqrt, min_samples_split=5, n_estimators=500, score=1.000, total=   0.5s
[CV] bootstrap=True, max_features=sqrt, min_samples_split=5, n_estimators=500 
[CV]  bootstrap=True, max_features=sqrt, min_samples_split=5, n_estimators=500, score=0.909, total=   0.5s
[CV] bootstrap=True, max_features=sqrt, min_samples_split=5, n_estimators=500 
[CV]  bootstrap=True, max_features=sqrt, min_samples_split=5, n_estimators=500, score=1.000, total=   0.5s
[CV] bootstrap=True, max_features=sqrt, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=True, max_features=sqrt, min_samples_split=10, n_estimators=200, score=0.957, total=   0.2s
[CV] bootstrap=True, max_features=sqrt, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=True, max_features=sqrt, min_samples_split=10, n_estimators=200, score=0.826, total=   0.2s
[CV] bootstrap=True, max_features=sqrt, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=True, max_features=sqrt, min_samples_split=10, n_

[CV]  bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200, score=0.826, total=   0.2s
[CV] bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200, score=1.000, total=   0.1s
[CV] bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200, score=0.909, total=   0.2s
[CV] bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200 
[CV]  bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=200, score=0.955, total=   0.1s
[CV] bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=500, score=0.957, total=   0.4s
[CV] bootstrap=False, max_features=sqrt, min_samples_split=2, n_estimators=500 
[CV]  bootstrap=False, max_features=sqrt, min_samples_split=

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:   35.8s finished


GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'bootstrap': [True, False],
                         'max_features': ['auto', 'sqrt'],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [200, 500]},
             verbose=3)

In [15]:
# Print classification repot
rf_pred = rf.predict(X_test_scaled)
print(confusion_matrix(y_test, rf_pred))
print(classification_report(y_test, rf_pred))

[[12  0  0]
 [ 0 13  0]
 [ 0  1 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.93      1.00      0.96        13
           2       1.00      0.92      0.96        13

    accuracy                           0.97        38
   macro avg       0.98      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



In [16]:
# Save model
joblib.dump(rf, 'iris_rf.sav')

['iris_rf.sav']