# <b>Random Forests Classifier on Iris Dataset</b>

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import datasets

In [3]:
# laad the Iris dataset and define feature and target variables
iris_data = datasets.load_iris()

features = iris_data.data
targets = iris_data.target

In [4]:
# split dataset fot training and testing. %80 for training, %20 for testing with test_size=0.2 parameter.
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2)

In [8]:
# create the Random Forests model and fit
model = RandomForestClassifier(n_estimators=1000, max_features="sqrt")
fitted_model = model.fit(X_train, y_train)

In [10]:
# get predictions of fitted model
predictions = fitted_model.predict(X_test)

In [12]:
# show confusion matrix and accuracy score
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[10  0  0]
 [ 0 10  2]
 [ 0  0  8]]
0.9333333333333333


# <b>Random Forests Classifier on Credit Dataset</b>

In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate

In [14]:
# read the data from csv file
credit_data = pd.read_csv("./datasets/credit_data.csv")

In [15]:
# define features and targets
features = credit_data[["income", "age", "loan"]]
target = credit_data["default"]

In [16]:
# machine learning handle arrays, not DataFrames
X = np.array(features).reshape(-1,3)
y = np.array(target)

In [17]:
# create the Random Forests model and cross validate
model = RandomForestClassifier()
predicted = cross_validate(model, X, y, cv=10)

In [18]:
# show the mean of accuracy
np.mean(predicted["test_score"])

0.9894999999999999

# <b>Random Forests Classifier on Digits Dataset</b>

In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import datasets

In [29]:
# laad the Digits dataset

digit_data = datasets.load_digits()
print(digit_data["data"][0])
print(digit_data["target"][0])

[ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
 15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
  0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
  0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]
0


In [31]:
# define features and target variables
image_features = digit_data["images"].reshape((len(digit_data.images), -1)) #flatten the data
image_target = digit_data["target"]

In [33]:
# split dataset fot training and testing. %80 for training, %20 for testing with test_size=0.2 parameter.
X_train, X_test, y_train, y_test = train_test_split(image_features, image_target, test_size=0.2)

In [35]:
# define the param grid for Grid Search
param_grid = {"n_estimators": [10, 100, 500, 1000],
              "max_depth": [1, 5, 10, 15],
              "min_samples_leaf": [1, 2, 4, 10, 15, 30, 50]}

In [37]:
# create the Random Forests model and run grid search on it
model = RandomForestClassifier(n_jobs=-1, max_features="sqrt")
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=10)
grid_search.fit(X_train, y_train)

In [38]:
# get the optimal parameters for Random Forests model
grid_search.best_params_

{'max_depth': 15, 'min_samples_leaf': 1, 'n_estimators': 100}

In [39]:
# make predictions with trained model
grid_predictions = grid_search.predict(X_test)

In [40]:
# show confusion matrix and accuracy score
print(confusion_matrix(y_test, grid_predictions))
print(accuracy_score(y_test, grid_predictions))

[[32  0  0  0  1  0  0  0  0  0]
 [ 0 33  0  0  0  0  0  0  0  0]
 [ 0  0 40  0  0  0  0  0  0  0]
 [ 0  0  0 45  0  0  0  1  1  0]
 [ 0  0  0  0 40  0  0  2  0  0]
 [ 0  0  0  0  0 32  0  0  0  0]
 [ 0  0  0  0  0  0 37  0  0  0]
 [ 0  0  0  0  0  0  0 36  0  0]
 [ 0  2  0  0  0  0  0  0 35  0]
 [ 0  0  0  2  0  0  0  0  0 21]]
0.975
