In [9]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_validate
from sklearn.tree import DecisionTreeClassifier
import sklearn.model_selection as model_selection
from utilities2 import visualize_classifier
%matplotlib widget

In [2]:
# Load input data
input_file = 'data_decision_trees.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

In [3]:
# Separate input data into two classes based on labels
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])

In [4]:
class_1

array([[ 2.58,  9.88],
       [ 8.3 ,  5.36],
       [ 3.41,  1.46],
       [ 2.18,  8.08],
       [ 7.41,  5.87],
       [ 2.6 ,  1.7 ],
       [ 1.88,  7.33],
       [ 7.93,  6.52],
       [ 3.1 ,  3.05],
       [ 2.02,  8.78],
       [ 8.9 ,  4.67],
       [ 2.53,  2.63],
       [ 3.54,  7.7 ],
       [ 6.44,  4.94],
       [ 3.34,  2.62],
       [ 2.35,  7.94],
       [ 7.86,  4.65],
       [ 4.47,  1.69],
       [ 2.88,  7.87],
       [ 7.21,  5.37],
       [ 2.46,  2.61],
       [ 2.11,  8.14],
       [ 8.68,  5.61],
       [ 3.19,  2.19],
       [ 1.59,  8.38],
       [ 6.98,  4.72],
       [ 2.9 ,  3.02],
       [ 1.08,  7.69],
       [ 8.13,  5.58],
       [ 3.13,  1.68],
       [ 2.85,  6.91],
       [ 8.68,  5.95],
       [ 3.69,  1.27],
       [ 2.46,  8.31],
       [ 7.81,  4.54],
       [ 2.8 ,  1.32],
       [ 2.18,  8.42],
       [ 8.22,  5.77],
       [ 4.04,  1.1 ],
       [ 2.53,  7.92],
       [ 7.31,  5.45],
       [ 3.35,  2.18],
       [ 1.94,  8.62],
       [ 7.

In [5]:
# Visualize input data
plt.figure()
plt.scatter(class_0[:, 0], class_0[:, 1], s=75, facecolors='black', 
        edgecolors='black', linewidth=1, marker='x')
plt.scatter(class_1[:, 0], class_1[:, 1], s=75, facecolors='white', 
        edgecolors='black', linewidth=1, marker='o')
plt.title('Input data')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Input data')

In [6]:
# Split data into training and testing datasets 
X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, test_size=0.25, random_state=5)

In [10]:
# Decision Trees classifier 
params = {'random_state': 0, 'max_depth': 4}
classifier = DecisionTreeClassifier(**params)
classifier.fit(X_train, y_train)
visualize_classifier(classifier, X_train, y_train, 'Training dataset')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)


In [11]:
y_test_pred = classifier.predict(X_test)
visualize_classifier(classifier, X_test, y_test, 'Test dataset')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray)


In [12]:
# Evaluate classifier performance
class_names = ['Class-0', 'Class-1']
print("\n" + "#"*40)
print("\nClassifier performance on training dataset\n")
print(classification_report(y_train, classifier.predict(X_train), target_names=class_names))
print("#"*40 + "\n")


########################################

Classifier performance on training dataset

              precision    recall  f1-score   support

     Class-0       0.99      1.00      1.00       137
     Class-1       1.00      0.99      1.00       133

    accuracy                           1.00       270
   macro avg       1.00      1.00      1.00       270
weighted avg       1.00      1.00      1.00       270

########################################



In [13]:
print("#"*40)
print("\nClassifier performance on test dataset\n")
print(classification_report(y_test, y_test_pred, target_names=class_names))
print("#"*40 + "\n")



########################################

Classifier performance on test dataset

              precision    recall  f1-score   support

     Class-0       0.93      1.00      0.97        43
     Class-1       1.00      0.94      0.97        47

    accuracy                           0.97        90
   macro avg       0.97      0.97      0.97        90
weighted avg       0.97      0.97      0.97        90

########################################



In [14]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from utilities import visualize_classifier

input_file = 'data_random_forests.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

# Separate input data into three classes based on labels
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])
class_2 = np.array(X[y==2])


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)


In [15]:
from sklearn.model_selection import GridSearchCV 

# defining parameter range 
param_grid = {'n_estimators': [10, 100, 200, 300, 400, 500, 750, 1000], 
			'max_depth': [1, 2, 3, 4, 5, 6]} 


from sklearn.ensemble import RandomForestClassifier
grid = GridSearchCV(RandomForestClassifier(), param_grid, refit = True, verbose = 3)

grid.fit(X_train, y_train)

# print best parameter after tuning 
print(grid.best_params_) 

# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_)

grid_predictions = grid.predict(X_test) 

# print classification report 
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test, grid_predictions))

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.815, total=   0.1s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.830, total=   0.1s
[CV] max_depth=1, n_estimators=10 ....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] ........ max_depth=1, n_estimators=10, score=0.748, total=   0.1s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.741, total=   0.0s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.822, total=   0.0s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.830, total=   0.3s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.837, total=   0.2s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.793, total=   0.2s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.763, total=   0.2s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] .

[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed:  3.8min finished


{'max_depth': 3, 'n_estimators': 500}
RandomForestClassifier(max_depth=3, n_estimators=500)
[[67  5  7]
 [ 5 59  6]
 [ 1  7 68]]
              precision    recall  f1-score   support

         0.0       0.92      0.85      0.88        79
         1.0       0.83      0.84      0.84        70
         2.0       0.84      0.89      0.87        76

    accuracy                           0.86       225
   macro avg       0.86      0.86      0.86       225
weighted avg       0.86      0.86      0.86       225

