# Random Forest Classification

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_all.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
X = dataset.iloc[:, 1:].values

In [4]:
print(X)

[[ 5345    12     0 ...     0 15601     1]
 [10935     2     0 ...     0 19707     1]
 [ 5292     2     0 ...     0 15613     1]
 ...
 [ 6462     0     0 ...     0 14944     0]
 [ 6747     0     0 ...     7 13566     0]
 [ 5241     0     0 ...     0 13555     0]]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

[[ 5334     0     0 ...     0 10580     1]
 [ 6504     0     0 ...     0 14485     0]
 [ 3999     0     0 ...     0 11879     1]
 ...
 [ 4580    29     0 ...     0 14414     1]
 [ 5550     0     0 ...     0  9293     1]
 [ 3700    99     0 ...     0 10119     1]]


In [7]:
print(y_train)

[1 0 1 ... 1 1 1]


In [8]:
print(X_test)

[[ 3948     0     0 ...     0 12508     1]
 [ 5619    18     0 ...     0 11894     1]
 [ 5700     0     0 ...     0 12682     1]
 ...
 [ 4971     0     0 ...     0  9922     1]
 [ 5229     2     0 ...     0 13103     1]
 [ 3771     0     0 ...     0 10426     1]]


In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[ 0.14193965 -0.18795293 -0.05884865 ... -0.07606341 -0.65642826
   1.00456598]
 [ 0.67038318 -0.18795293 -0.05884865 ... -0.07606341  0.72655228
  -0.99545477]
 [-0.46102795 -0.18795293 -0.05884865 ... -0.07606341 -0.19637916
   1.00456598]
 ...
 [-0.19861284  1.63314085 -0.05884865 ... -0.07606341  0.70140718
   1.00456598]
 [ 0.23949846 -0.18795293 -0.05884865 ... -0.07606341 -1.11222747
   1.00456598]
 [-0.59607463  6.02888445 -0.05884865 ... -0.07606341 -0.81969433
   1.00456598]]


In [12]:
print(X_test)

[[-0.48406267 -0.18795293 -0.05884865 ... -0.07606341  0.02638518
   1.00456598]
 [ 0.27066308  0.94238114 -0.05884865 ... -0.07606341 -0.19106682
   1.00456598]
 [ 0.30724763 -0.18795293 -0.05884865 ... -0.07606341  0.08800838
   1.00456598]
 ...
 [-0.02201334 -0.18795293 -0.05884865 ... -0.07606341 -0.88946313
   1.00456598]
 [ 0.09451524 -0.06236025 -0.05884865 ... -0.07606341  0.23710821
   1.00456598]
 [-0.56400669 -0.18795293 -0.05884865 ... -0.07606341 -0.71096833
   1.00456598]]


## Training the Random Forest Classification model on the Training set

In [13]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2959    0]
 [   0 3041]]


1.0

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2959
           1       1.00      1.00      1.00      3041

    accuracy                           1.00      6000
   macro avg       1.00      1.00      1.00      6000
weighted avg       1.00      1.00      1.00      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2959    0]
 [   0 3041]]
Accuracy: 1.000
F1 Score: 1.000
Sensitivity: 1.000
Precision: 1.000
Recall: 1.000
