# Random Forest Classification

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_rmean.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
X = dataset.iloc[:, 1:].values

In [4]:
print(X)

[[  1.48568756 116.4166667    0.         ...   0.         159.7700147
    1.        ]
 [  0.1911294   91.           0.         ...   0.         149.8398539
    1.        ]
 [  1.218065   115.           0.         ...   0.         154.1894575
    1.        ]
 ...
 [  0.21742495   0.           0.         ...   0.         145.9154176
    0.        ]
 [  0.2261746    0.           0.         ... 114.4285714  144.3177797
    0.        ]
 [  0.22343064   0.           0.         ...   0.         141.1554408
    0.        ]]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

[[  0.2727784    0.           0.         ...   0.         160.9055766
    1.        ]
 [  0.33733087   0.           0.         ...   0.         177.4334139
    0.        ]
 [  0.89122281   0.           0.         ...   0.         175.5484468
    1.        ]
 ...
 [  1.4010917  110.8965517    0.         ...   0.         149.9181351
    1.        ]
 [  0.27243243   0.           0.         ...   0.         158.9433983
    1.        ]
 [  2.46162162 101.8181818    0.         ...   0.         140.4350232
    1.        ]]


In [8]:
print(y_train)

[1 0 1 ... 1 1 1]


In [7]:
print(X_test)

[[6.87943262e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.71546210e+02 1.00000000e+00]
 [1.97722015e-01 1.01444444e+02 0.00000000e+00 ... 0.00000000e+00
  1.50751387e+02 1.00000000e+00]
 [3.01754386e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.84260842e+02 1.00000000e+00]
 ...
 [1.82860591e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.39794598e+02 1.00000000e+00]
 [2.62574106e-01 1.04500000e+02 0.00000000e+00 ... 0.00000000e+00
  1.46185912e+02 1.00000000e+00]
 [7.41713073e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.73090543e+02 1.00000000e+00]]


In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.43464389 -0.48040164 -0.09754058 ... -0.24405691 -0.08072605
   1.00456598]
 [-0.34474473 -0.48040164 -0.09754058 ... -0.24405691  0.91543395
  -0.99545477]
 [ 0.4266343  -0.48040164 -0.09754058 ... -0.24405691  0.80182388
   1.00456598]
 ...
 [ 1.13670453  2.16232891 -0.09754058 ... -0.24405691 -0.74295726
   1.00456598]
 [-0.43512571 -0.48040164 -0.09754058 ... -0.24405691 -0.19898977
   1.00456598]
 [ 2.61365425  1.94598597 -0.09754058 ... -0.24405691 -1.31452005
   1.00456598]]


In [12]:
print(X_test)

[[ 0.14353652 -0.48040164 -0.09754058 ... -0.24405691  0.56060249
   1.00456598]
 [-0.53917136  1.93707959 -0.09754058 ... -0.24405691 -0.69273578
   1.00456598]
 [-0.39429041 -0.48040164 -0.09754058 ... -0.24405691  1.32693431
   1.00456598]
 ...
 [-0.55986816 -0.48040164 -0.09754058 ... -0.24405691 -1.35311954
   1.00456598]
 [-0.44885493  2.00989529 -0.09754058 ... -0.24405691 -0.96790453
   1.00456598]
 [ 0.21841918 -0.48040164 -0.09754058 ... -0.24405691  0.65368196
   1.00456598]]


## Training the Random Forest Classification model on the Training set

In [13]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2959    0]
 [   0 3041]]


1.0

In [15]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2959
           1       1.00      1.00      1.00      3041

    accuracy                           1.00      6000
   macro avg       1.00      1.00      1.00      6000
weighted avg       1.00      1.00      1.00      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2959    0]
 [   0 3041]]
Accuracy: 1.000
F1 Score: 1.000
Sensitivity: 1.000
Precision: 1.000
Recall: 1.000
