In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier


import sklearn.model_selection as model_selection
from utilities2 import visualize_classifier
%matplotlib widget

In [2]:
classifier_type='erf'

In [3]:
# Load input data
input_file = 'data_random_forests.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

In [4]:
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])
class_2 = np.array(X[y==2])

In [5]:
# Visualize input data
plt.figure()
plt.scatter(class_0[:, 0], class_0[:, 1], s=75, facecolors='white', 
                edgecolors='black', linewidth=1, marker='s')
plt.scatter(class_1[:, 0], class_1[:, 1], s=75, facecolors='white', 
                edgecolors='black', linewidth=1, marker='o')
plt.scatter(class_2[:, 0], class_2[:, 1], s=75, facecolors='white', 
                edgecolors='black', linewidth=1, marker='^')
plt.title('Input data')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Input data')

In [6]:
# Split data into training and testing datasets 
X_train, X_test, y_train, y_test =model_selection.train_test_split(
        X, y, test_size=0.25, random_state=5)

In [7]:
# Ensemble Learning classifier
params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0}
if classifier_type == 'rf':
    classifier = RandomForestClassifier(**params)
else:
    classifier = ExtraTreesClassifier(**params)

classifier.fit(X_train, y_train)
visualize_classifier(classifier, X_train, y_train)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)


In [8]:
y_test_pred = classifier.predict(X_test)
visualize_classifier(classifier, X_test, y_test)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)


In [9]:
# Evaluate classifier performance
class_names = ['Class-0', 'Class-1', 'Class-2']
print("\n" + "#"*40)
print("\nClassifier performance on training dataset\n")
print(classification_report(y_train, classifier.predict(X_train), target_names=class_names))
print("#"*40 + "\n")


########################################

Classifier performance on training dataset

              precision    recall  f1-score   support

     Class-0       0.89      0.83      0.86       221
     Class-1       0.82      0.84      0.83       230
     Class-2       0.83      0.86      0.85       224

    accuracy                           0.85       675
   macro avg       0.85      0.85      0.85       675
weighted avg       0.85      0.85      0.85       675

########################################



In [10]:
print("#"*40)
print("\nClassifier performance on test dataset\n")
print(classification_report(y_test, y_test_pred, target_names=class_names))
print("#"*40 + "\n")

########################################

Classifier performance on test dataset

              precision    recall  f1-score   support

     Class-0       0.92      0.85      0.88        79
     Class-1       0.84      0.84      0.84        70
     Class-2       0.85      0.92      0.89        76

    accuracy                           0.87       225
   macro avg       0.87      0.87      0.87       225
weighted avg       0.87      0.87      0.87       225

########################################



In [11]:
# Compute confidence
test_datapoints = np.array([[5, 5], [3, 6], [6, 4], [7, 2], [4, 4], [5, 2]])

In [12]:
print("\nConfidence measure:")
for datapoint in test_datapoints:
    probabilities = classifier.predict_proba([datapoint])[0]
    predicted_class = 'Class-' + str(np.argmax(probabilities))
    print('\nDatapoint:', datapoint)
    print('Predicted class:', predicted_class) 
    print('With Proba:',probabilities)


Confidence measure:

Datapoint: [5 5]
Predicted class: Class-0
With Proba: [0.48904419 0.28020114 0.23075467]

Datapoint: [3 6]
Predicted class: Class-0
With Proba: [0.66707383 0.12424406 0.20868211]

Datapoint: [6 4]
Predicted class: Class-1
With Proba: [0.25788769 0.49535144 0.24676087]

Datapoint: [7 2]
Predicted class: Class-1
With Proba: [0.10794013 0.6246677  0.26739217]

Datapoint: [4 4]
Predicted class: Class-2
With Proba: [0.33383778 0.21495182 0.45121039]

Datapoint: [5 2]
Predicted class: Class-2
With Proba: [0.18671115 0.28760896 0.52567989]


In [13]:
# Visualize the datapoints
visualize_classifier(classifier, test_datapoints, [0]*len(test_datapoints))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)
