Classification decision tree using *CART* algorithm for binary target variable and numeric input features

In [None]:
import pandas as pa 
import numpy as np 
from collections import Counter

class Node: 
    
    def __init__(
        self, 
        Y: list,
        X: pa.DataFrame,
        minSamplesForStop=None,
        maxDepth=None,
        depth=None,
        nodeType=None,
        rule=None
    ):
        
        self.Y = Y 
        self.X = X

        # stopping criterion and their default values
        self.minSamplesForStop = minSamplesForStop if minSamplesForStop else 20
        self.maxDepth = maxDepth if maxDepth else 5

        # Default depth of current node 
        self.depth = depth if depth else 0

        # Feature extraction
        self.features = list(self.X.columns)

        # setting the type of the node 
        self.nodeType = nodeType if nodeType else 'root'

        # splitting rule
        self.rule = rule if rule else ""

        self.counts = Counter(Y)

        # calculate Gini Impurity
        self.giniImpurity = self.getGini()

        # Sorting the counts and saving the final prediction of the node 
        sortedCounts = list(sorted(self.counts.items(), key=lambda item: item[1]))

        # Getting the last item
        ylast = None
        if len(sortedCounts) > 0:
            ylast = sortedCounts[-1][0]

        # Leaf Node to predict the class with the highest number of samples from a class
        self.ylast = ylast 
        
        self.n = len(Y)

        # set default values to left and right nodes
        self.left = None 
        self.right = None 

        # Default values for splits
        self.bestFeature = None 
        self.bestValue = None 

    @staticmethod
    def calculateGiniImpurity(y1Count: int, y2Count: int) -> float:
        
        if y1Count is None:
            y1Count = 0

        if y2Count is None:
            y2Count = 0
        
        # Calculate total observations
        totalCount = y1Count + y2Count
        
        # If totalCount = 0 then the samples are from same class. Hence Gini impurity is zero
        if totalCount == 0:
            return 0.0

        # Calculate the probability of the binary classes
        class1 = y1Count / totalCount
        class2 = y2Count / totalCount
        
        # Calculate Gini impurity
        gini = 1 - (class1 ** 2 + class2 ** 2)        
        
        return gini

    @staticmethod
    def ma(x: np.array, window: int) -> np.array:       
        return np.convolve(x, np.ones(window), 'valid') / window

    def getGini(self):        
        # Get Gini impurity for binary classes 0 or 1
        y1_count, y2_count = self.counts.get(0, 0), self.counts.get(1, 0)       
        return self.calculateGiniImpurity(y1_count, y2_count)

    def bestSplit(self) -> tuple:        
        # Split data
        df = self.X.copy()
        df['Y'] = self.Y
        
        baseGini = self.getGini()       
        maxGain = 0       
        bestFeature = None
        bestValue = None

        for feature in self.features:
            # Droping missing values
            Xdf = df.dropna().sort_values(feature)            
            xmeans = self.ma(Xdf[feature].unique(), 2)

            for value in xmeans:
                # Spliting the dataset 
                leftCounts = Counter(Xdf[Xdf[feature]<value]['Y'])
                rightCounts = Counter(Xdf[Xdf[feature]>=value]['Y'])

                # Getting the Y distribution
                y0Left, y1Left, y0Right, y1Right = leftCounts.get(0, 0), leftCounts.get(1, 0), rightCounts.get(0, 0), rightCounts.get(1, 0)

                # Getting gini impurities for left and right impurities
                giniLeft = self.calculateGiniImpurity(y0Left, y1Left)
                giniRight = self.calculateGiniImpurity(y0Right, y1Right)
              
                totalLeft = y0Left + y1Left
                totalRight = y0Right + y1Right
                total = totalLeft + totalRight

                # Calculating the weights for each of the nodes
                leftWeight = totalLeft / total
                rightWeight = totalRight / total
               
                giniWeight = leftWeight * giniLeft + rightWeight * giniRight
               
                giniGain = baseGini - giniWeight

                # Check if it's the best split
                if giniGain > maxGain:
                    bestFeature = feature
                    bestValue = value                    
                    maxGain = giniGain

        return (bestFeature, bestValue)

    def growTree(self):        
        df = self.X.copy()
        df['Y'] = self.Y
       
        if (self.depth < self.maxDepth) and (self.n >= self.minSamplesForStop):

            # Getting the best split 
            bestFeature, bestValue = self.bestSplit()

            if bestFeature is not None:
                # Saving the best split to the current node 
                self.bestFeature = bestFeature
                self.bestValue = bestValue

                # Getting the left and right nodes
                left_df, right_df = df[df[bestFeature]<=bestValue].copy(), df[df[bestFeature]>bestValue].copy()

                # Creating the left and right nodes
                left = Node(
                    left_df['Y'].values.tolist(), 
                    left_df[self.features], 
                    depth=self.depth + 1, 
                    maxDepth=self.maxDepth, 
                    minSamplesForStop=self.minSamplesForStop, 
                    nodeType='left_node',
                    rule=f"{bestFeature} <= {round(bestValue, 3)}"
                    )

                self.left = left 
                self.left.growTree()

                right = Node(
                    right_df['Y'].values.tolist(), 
                    right_df[self.features], 
                    depth=self.depth + 1, 
                    maxDepth=self.maxDepth, 
                    minSamplesForStop=self.minSamplesForStop,
                    nodeType='right_node',
                    rule=f"{bestFeature} > {round(bestValue, 3)}"
                    )

                self.right = right
                self.right.growTree()
    
    def predict(self, X:pa.DataFrame):       
        predictions = []

        for index, x in X.iterrows():
            values = {}
            for feature in self.features:
                values.update({feature: x.loc[feature]})
        
            predictions.append(self.predictObservations(values))
        
        return predictions

    def predictObservations(self, values: dict) -> int:       
        currentNode = self
        while  currentNode is not None and currentNode.bestFeature is not None and currentNode.depth < currentNode.maxDepth:
            bestFeature = currentNode.bestFeature
            bestValue = currentNode.bestValue

            if currentNode.n < currentNode.minSamplesForStop:
                break 

            if (values.get(bestFeature) < bestValue):
                if self.left is not None:
                    currentNode = currentNode.left
            else:
                if self.right is not None:
                    currentNode = currentNode.right
            
        return  currentNode.ylast   

    

In [None]:
class Util: 
 def convertToBinaryArr(inputArr, matchWithOne):
      binaryArr =[]
      for target in inputArr:
        if target == matchWithOne:
          binaryArr.append(1)
        else:
          binaryArr.append(0)

      return binaryArr   

 def calculateAccuracy(predictedLabels, targetLabels):
      count=0
      accuracy=0        
      for i in range(len(predictedLabels)):
          if targetLabels[i]== predictedLabels[i]:
            count +=1
      return (count/len(predictedLabels))*100 


 def getPerformanceMetrics(targetLabels,predictedLabels,tpLable):
      tp=0
      tn=0
      fp=0
      fn=0
      for i in range(len(predictedLabels)):
        if targetLabels[i]== predictedLabels[i]:
          if targetLabels[i]==tpLable:
            tp +=1
          else:
            tn +=1
        else:
           if targetLabels[i]==tpLable:
             fn +=1
           else:
             fp +=1
      accuracy = (tp+tn)/(tp+fn+fp+tn)
      recall = tp/(tp+fn)
      specificity = tn/(fp+tn)
      precision = tp/(tp+fp)
      f1 = 2 *((precision * recall)/(precision + recall))
      return  (accuracy, recall,specificity, precision, f1)   

Applying above algorithm for Heart dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from warnings import filterwarnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score

#read the sample data from CSV file
dataFile=pd.read_csv("heart.csv")

#split the data to train and test by using cross validation
train, test = train_test_split(dataFile, test_size=0.3 , random_state=1)


#drop the column alp2 from training set as it is used for prediction
X_train = train.drop("a1p2",axis=1)
y_train = train["a1p2"]
X_test = test.drop("a1p2",axis=1)
y_test = test["a1p2"]


col_header = X_train.columns
scaler = StandardScaler()
X_train_transform = scaler.fit_transform(X_train)
X_test_transform = scaler.transform(X_test)

X_train = pd.DataFrame(X_train_transform, columns = col_header)
X_test = pd.DataFrame(X_test_transform, columns = col_header)
trainLabels = Util.convertToBinaryArr(y_train, 1)
testLabels =  Util.convertToBinaryArr(y_test, 1)

root = Node(trainLabels, X_train, maxDepth=3, minSamplesForStop=100)  
root.growTree()

xtrainClone = X_train.copy()
xtrainClone['predicted'] = root.predict(xtrainClone)

#calculate accuracy for train data
trainAccuracy = Util.calculateAccuracy(np.array(xtrainClone['predicted']), trainLabels) 
print('Training Accuracy: %f' % trainAccuracy)

xtestClone = X_test.copy()
xtestClone['predicted'] = root.predict(xtestClone)
#calculate accuracy for test data
testAccuracy = Util.calculateAccuracy(np.array(xtestClone['predicted']), testLabels) 
print('Testing Accuracy: %f' % testAccuracy)

accuracy, recall,specificity, precision, f1 = Util.getPerformanceMetrics(testLabels,np.array(xtestClone['predicted']),1)

print('Performance Metrics of Test Data : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy*100 , recall*100 ,specificity*100 , precision*100 , f1*100 ) )
print('----------------------------------------------------')

#compare performance metrics with scikit learn library by training using scikit
dTree = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, random_state=1)
dTree.fit(X_train, trainLabels)
y_predict = dTree.predict(X_test)

tn, fp, fn, tp = confusion_matrix(testLabels,y_predict).ravel()

print('Performance Metrics of Test Data with scikit : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(testLabels, y_predict)*100 , recall_score(testLabels, y_predict)*100 ,(tn / (tn+fp))*100 , precision_score(testLabels, y_predict)*100 , f1_score(testLabels, y_predict)*100 ) )
print('----------------------------------------------------')



Training Accuracy: 78.835979
Testing Accuracy: 70.370370
Performance Metrics of Test Data : Accuracy : 70.37 , Recall : 74.47 ,Specificity: 64.71 , Precision : 74.47, F1 Score: 74.47
----------------------------------------------------
Performance Metrics of Test Data with scikit : Accuracy : 76.54 , Recall : 82.98 ,Specificity: 67.65 , Precision : 78.00, F1 Score: 80.41
----------------------------------------------------


Improve performance on Heart DataSet by changing the cross validation and dropping a feature

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from warnings import filterwarnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score

#read the sample data from CSV file
dataFile=pd.read_csv("heart.csv")

#drop and check 
dataFile = dataFile.drop('thal',axis=1)


#split the data to train and test by using cross validation
train, test = train_test_split(dataFile, test_size=0.33 , random_state=1)


#drop the column alp2 from training set as it is used for prediction
X_train = train.drop("a1p2",axis=1)
y_train = train["a1p2"]
X_test = test.drop("a1p2",axis=1)
y_test = test["a1p2"]


col_header = X_train.columns
scaler = StandardScaler()
X_train_transform = scaler.fit_transform(X_train)
X_test_transform = scaler.transform(X_test)

X_train = pd.DataFrame(X_train_transform, columns = col_header)
X_test = pd.DataFrame(X_test_transform, columns = col_header)
trainLabels = Util.convertToBinaryArr(y_train, 1)
testLabels =  Util.convertToBinaryArr(y_test, 1)

root = Node(trainLabels, X_train, maxDepth=3, minSamplesForStop=100)  
root.growTree()

xtrainClone = X_train.copy()
xtrainClone['predicted'] = root.predict(xtrainClone)

#calculate accuracy for train data
trainAccuracy = Util.calculateAccuracy(np.array(xtrainClone['predicted']), trainLabels) 
print('Training Accuracy: %f' % trainAccuracy)

xtestClone = X_test.copy()
xtestClone['predicted'] = root.predict(xtestClone)
#calculate accuracy for test data
testAccuracy = Util.calculateAccuracy(np.array(xtestClone['predicted']), testLabels) 
print('Testing Accuracy: %f' % testAccuracy)

accuracy, recall,specificity, precision, f1 = Util.getPerformanceMetrics(testLabels,np.array(xtestClone['predicted']),1)

print('Performance Metrics of Test Data : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy*100 , recall*100 ,specificity*100 , precision*100 , f1*100 ) )
print('----------------------------------------------------')

#compare performance metrics with scikit learn library by training using scikit
dTree = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, random_state=1)
dTree.fit(X_train, trainLabels)
y_predict = dTree.predict(X_test)

tn, fp, fn, tp = confusion_matrix(testLabels,y_predict).ravel()

print('Performance Metrics of Test Data with scikit : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(testLabels, y_predict)*100 , recall_score(testLabels, y_predict)*100 ,(tn / (tn+fp))*100 , precision_score(testLabels, y_predict)*100 , f1_score(testLabels, y_predict)*100 ) )
print('----------------------------------------------------')



Training Accuracy: 75.000000
Testing Accuracy: 75.555556
Performance Metrics of Test Data : Accuracy : 75.56 , Recall : 80.00 ,Specificity: 70.00 , Precision : 76.92, F1 Score: 78.43
----------------------------------------------------
Performance Metrics of Test Data with scikit : Accuracy : 81.11 , Recall : 88.00 ,Specificity: 72.50 , Precision : 80.00, F1 Score: 83.81
----------------------------------------------------


Apply CART algorithm for credit card data

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score

credit_card_data=pd.read_csv("credit.csv")
for feature in credit_card_data.columns: # Loop through all columns in the dataframe
    if credit_card_data[feature].dtype == 'object': # Only apply for columns with categorical strings
        credit_card_data[feature] = pd.Categorical(credit_card_data[feature])# Replace strings with an integer
replaceCategorical = {
               "checking_balance":     {"< 0 DM": 1, "1 - 200 DM": 2 ,"> 200 DM": 3 ,"unknown":-1},
                "credit_history": {"critical": 1, "poor":2 , "good": 3, "very good": 4,"perfect": 5},
                 "savings_balance": {"< 100 DM": 1, "100 - 500 DM":2 , "500 - 1000 DM": 3, "> 1000 DM": 4,"unknown": -1},
                 "employment_duration":     {"unemployed": 1, "< 1 year": 2 ,"1 - 4 years": 3 ,"4 - 7 years": 4 ,"> 7 years": 5},
                "phone":     {"no": 1, "yes": 2 },
                 "default":     {"no": 0, "yes": 1 } 
                    }
oneHotEncodingCols=["purpose","housing","other_credit","job"]


credit_card_data=credit_card_data.replace(replaceCategorical)
credit_card_data=pd.get_dummies(credit_card_data, columns=oneHotEncodingCols)
X = credit_card_data.drop("default" , axis=1)
y = credit_card_data.pop("default")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.30, random_state=1)

root = Node(y_train, X_train, maxDepth=3, minSamplesForStop=100)  
root.growTree()

train_clone = X_train.copy()
train_clone['predicted'] = root.predict(train_clone)

#calculate accuracy for train data
trainAccuracy = Util.calculateAccuracy(np.array(train_clone['predicted']), np.array(y_train))
print('Training Accuracy: %f' % trainAccuracy)

text_clone = X_test.copy()
text_clone['predicted'] = root.predict(text_clone)
#calculate accuracy for test data
testAccuracy = Util.calculateAccuracy(np.array(text_clone['predicted']), np.array(y_test)) 
print('Testing Accuracy: %f' % testAccuracy)

accuracy, recall,specificity, precision, f1 = Util.getPerformanceMetrics(np.array(y_test),np.array(text_clone['predicted']),1)

print('Performance Metrics of Test Data of credit card dataset : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy*100 , recall*100 ,specificity*100 , precision*100 , f1*100 ) )
print('----------------------------------------------------')
#compare performance metrics with scikit learn library by training using scikit
dTree = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, random_state=1)
dTree.fit(X_train, y_train)
y_predict = dTree.predict(X_test)

tn, fp, fn, tp = confusion_matrix(y_test,y_predict).ravel()

print('Performance Metrics of Test Data with scikit : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_predict)*100 , recall_score(y_test, y_predict)*100 ,tn / (tn+fp)*100 , precision_score(y_test, y_predict)*100 , f1_score(y_test, y_predict)*100 ) )
print('----------------------------------------------------')

Training Accuracy: 74.857143
Testing Accuracy: 75.000000
Performance Metrics of Test Data of credit card dataset : Accuracy : 75.00 , Recall : 46.51 ,Specificity: 86.45 , Precision : 57.97, F1 Score: 51.61
----------------------------------------------------
Performance Metrics of Test Data with scikit : Accuracy : 74.33 , Recall : 46.51 ,Specificity: 85.51 , Precision : 56.34, F1 Score: 50.96
----------------------------------------------------


Apply CART algorithm for voice data

In [None]:
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score
from sklearn.ensemble import RandomForestClassifier


voice_data = pd.read_csv(r"voice.csv",header = 0)
#voice_data.head()

#check for missing or null values
voice_data.isnull().sum()

colname=voice_data.columns

#converting categorical values to numerical values using label encoder
lable_encoder=preprocessing.LabelEncoder()

for x in colname:
    voice_data[x]=lable_encoder.fit_transform(voice_data[x])

#voice_data.head()

X=voice_data.values[:,:-1]
Y=voice_data.values[:,-1]
Y=Y.astype(int)

scaler = StandardScaler()
scaler.fit(X)
X=scaler.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3,
random_state=10)

colname = colname[:-1]
X_train = pd.DataFrame(X_train, columns = colname)
X_test = pd.DataFrame(X_test, columns =colname)

root = Node(y_train, X_train, maxDepth=3, minSamplesForStop=100)  
root.growTree()

train_clone = X_train.copy()
train_clone['predicted'] = root.predict(train_clone)

#calculate accuracy for train data
trainAccuracy = Util.calculateAccuracy(np.array(train_clone['predicted']), np.array(y_train))
print('Training Accuracy: %f' % trainAccuracy)


test_clone = X_test.copy()
test_clone['predicted'] = root.predict(test_clone)
#calculate accuracy for test data
testAccuracy = Util.calculateAccuracy(np.array(test_clone['predicted']), y_test) 
print('Testing Accuracy: %f' % testAccuracy)

accuracy, recall,specificity, precision, f1 = Util.getPerformanceMetrics(y_test,np.array(test_clone['predicted']),1)

print('Performance Metrics of Test Data : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy*100 , recall*100 ,specificity*100 , precision*100 , f1*100 ) )

print('----------------------------------------------------')
#compare performance metrics with scikit learn library by training using scikit
dTree = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, random_state=1)
dTree.fit(X_train, y_train)
y_predict = dTree.predict(X_test)

tn, fp, fn, tp = confusion_matrix(y_test,y_predict).ravel()

print('Performance Metrics of Test Data with scikit : Accuracy : %0.2f , Recall : %0.2f ,Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_predict)*100 , recall_score(y_test, y_predict)*100 ,(tn / (tn+fp))*100 , precision_score(y_test, y_predict)*100 , f1_score(y_test, y_predict)*100 ) )

print('----------------------------------------------------')
randomForest = RandomForestClassifier()
randomForest =randomForest.fit(X_train, y_train)
y_rand = randomForest.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test,y_rand).ravel()
print('Performance Metrics of Test Data with scikit for Random Forest: Accuracy : %0.2f , Recall : %0.2f , Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_rand)*100 , recall_score(y_test, y_rand)*100 , (tn / (tn+fp))*100 , precision_score(y_test, y_rand)*100 , f1_score(y_test, y_rand)*100 ) )
print('----------------------------------------------------')
from sklearn.ensemble import BaggingClassifier

baggingClassifier1 = BaggingClassifier(base_estimator=dTree)

baggingClassifier1 = baggingClassifier1.fit(X_train, y_train)
y_bagging = baggingClassifier1.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test,y_bagging).ravel()
print('Performance Metrics of Test Data with scikit for Bagging Classifier: Accuracy : %0.2f , Recall : %0.2f , Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_bagging)*100 , recall_score(y_test, y_bagging)*100 , (tn / (tn+fp))*100 , precision_score(y_test, y_bagging)*100 , f1_score(y_test, y_bagging)*100 ) )
print('----------------------------------------------------')
from sklearn.ensemble import AdaBoostClassifier
adaBoost = AdaBoostClassifier()
adaBoost = adaBoost.fit(X_train, y_train)
y_adaboost = adaBoost.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test,y_adaboost).ravel()
print('Performance Metrics of Test Data with scikit for AdaBoost Classifier: Accuracy : %0.2f , Recall : %0.2f ,  Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_adaboost)*100 , recall_score(y_test, y_adaboost)*100 , (tn / (tn+fp))*100 , precision_score(y_test, y_adaboost)*100 , f1_score(y_test, y_adaboost)*100 ) )
print('----------------------------------------------------')

from sklearn.ensemble import GradientBoostingClassifier
gradientBoost = GradientBoostingClassifier()
gradientBoost = gradientBoost.fit(X_train, y_train)
y_gradientBoost = gradientBoost.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test,y_gradientBoost).ravel()
print('Performance Metrics of Test Data with scikit for GradientBoosting Classifier: Accuracy : %0.2f , Recall : %0.2f , Specificity: %0.2f , Precision : %0.2f, F1 Score: %0.2f' % (accuracy_score(y_test, y_gradientBoost)*100 , recall_score(y_test, y_gradientBoost)*100 , (tn / (tn+fp))*100 ,  precision_score(y_test, y_gradientBoost)*100 , f1_score(y_test, y_gradientBoost)*100 ) )
print('----------------------------------------------------')


Training Accuracy: 96.662156
Testing Accuracy: 96.529968
Performance Metrics of Test Data : Accuracy : 96.53 , Recall : 96.16 ,Specificity: 96.93 , Precision : 97.14, F1 Score: 96.65
----------------------------------------------------
Performance Metrics of Test Data with scikit : Accuracy : 96.85 , Recall : 97.37 ,Specificity: 96.27 , Precision : 96.59, F1 Score: 96.98
----------------------------------------------------
Performance Metrics of Test Data with scikit for Random Forest: Accuracy : 98.21 , Recall : 97.98 , Specificity: 98.46 , Precision : 98.58, F1 Score: 98.28
----------------------------------------------------
Performance Metrics of Test Data with scikit for Bagging Classifier: Accuracy : 97.27 , Recall : 96.97 , Specificity: 97.59 , Precision : 97.76, F1 Score: 97.36
----------------------------------------------------
Performance Metrics of Test Data with scikit for AdaBoost Classifier: Accuracy : 97.48 , Recall : 97.78 ,  Specificity: 97.15 , Precision : 97.38, F1 