In [33]:
import math

class NB:
    def __init__(self):
        self.classes = {}
        # counts used for attributes that are not numeric
        self.counts = {}
        # totals used for attributes that are numereric
        # we will use these to compute the mean and ssd for each attribute - class pair.
        self.totals = {}
        self.priors = {}
        self.conditional = {}
        self.means = {}
        self.ssds = {}


    def train(self, data, cat_feature_names, num_feature_names, label_name):
        for i in range(len(data[label_name])):
            # now process this instance
            current_calss = data[label_name][i]
            self.classes.setdefault(current_calss, 0)
            self.classes[current_calss] += 1
            self.counts.setdefault(current_calss, {})
            self.totals.setdefault(current_calss, {})
            # now process each non-numeric attribute of the instance
            for col in cat_feature_names:
                self.counts[current_calss].setdefault(col, {})
                self.counts[current_calss][col].setdefault(data[col][i], 0)
                self.counts[current_calss][col][data[col][i]] += 1
            # process numeric attributes
            for col in num_feature_names:
                self.totals[current_calss].setdefault(col, 0)
                self.totals[current_calss][col] += data[col][i]

        # ok done counting. now compute probabilities

        # first prior probabilities p(h)
        for (klass, count) in self.classes.items():
            self.priors[klass] = count / len(data[label_name])

        #
        # now compute conditional probabilities p(h|D)
        #
        for (klass, features) in self.counts.items():
            self.conditional.setdefault(klass, {})
            for (feature, value_counts) in features.items():
                self.conditional[klass].setdefault(feature, {})
                for (value, count) in value_counts.items():
                    self.conditional[klass][feature][value] = ( count / self.classes[klass])

        # now compute mean and ssd
        for (klass, features) in self.totals.items():
            self.means.setdefault(klass, {})
            for (feature, feature_total) in features.items():
                self.means[klass][feature] = feature_total / self.classes[klass]

        # standard deviation
        for (klass, features) in self.means.items():
            self.ssds.setdefault(klass, {})
            for (feature, mean) in features.items():
                sum_of_square_differences = 0
                for i in range(len(data[feature])):
                    if data[label_name][i] == klass:
                        sum_of_square_differences += (data[feature][i] - mean)**2
                self.ssds[klass][feature] = math.sqrt(sum_of_square_differences / \
                                                      (self.classes[klass] - 1))
    

    # making prediction for an unseen instance using trained NB model
    def classify(self, instance, cat_feature_names, num_feature_names):
        results = []
        for (klass, prior) in self.priors.items():
            prob = prior
            for feature in cat_feature_names:
                if not instance[feature] in self.conditional[klass][feature]:
                    # we did not find any instances of this attribute value
                    # occurring with this class so prob = 0
                    prob = 0
                else:
                    prob = prob * self.conditional[klass][feature][instance[feature]]

            for feature in num_feature_names:
                mean = self.means[klass][feature]
                ssd = self.ssds[klass][feature]
                prob = prob * self.pdf(mean, ssd, instance[feature])

            results.append((prob, klass))

        # return the class with the highest probability
        return(max(results)[1])


    # Probability Density Function  computing P(x|y)
    def pdf(self, mean, ssd, x):
        ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
        return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart


    # take a list of instances which each one is a dictionary
    def test(self, test_data, cat_feature_names, num_feature_names):
        predictions = []

        # make a prediction for every instance
        feature_names = list(test_data.keys())
        length = len(test_data[ feature_names[0] ])
        for i in range(length):
            test_instance = { feature:test_data[feature][i] for feature in feature_names }

            predictions.append(self.classify(test_instance, cat_feature_names, num_feature_names))

        return predictions
    

In [31]:
# Replace the values of a list with a new item if they exist in o_items
def replace_items(l, o_items, n_item):
    return [n_item if item in o_items else item for item in l]

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pprint

# breast-cancer-wisconsin dataset

## One Run

In [11]:
df = pd.read_csv('./breast-cancer-wisconsin.data', header=None)
# organize data into input and output
X = df.drop(columns=10)
#X = X.drop(columns=0)
y = df[10]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
d_demo = pd.concat([X_train, y_train], axis=1).to_dict(orient='list')
d_test = X_test.to_dict(orient="list")

feature_names = list(d_demo.keys())
label_name = feature_names[-1]
feature_names.remove(label_name)
feature_names.remove(0)
#num_feature_names = [feature_names[0]]
num_feature_names = []
cat_feature_names = [i for i in feature_names if i not in num_feature_names]

classifier = NB()
classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)

my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
accuracy_score(y_test, my_pred)

0.9785714285714285

## 50 runs (10 * 5 folds)

In [12]:
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import StratifiedKFold 

df = pd.read_csv('./breast-cancer-wisconsin.data', header=None)

target = df[10]
skf = StratifiedKFold(n_splits=5, shuffle=True)
accs = []
for run in range(10):
    print("================Run {}================".format(run))
    fold_no = 1
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:]
        test = df.loc[test_index,:]
        
        X = train.drop(columns=10)
        y = train[10]
        d_demo = pd.concat([X, y], axis=1).to_dict(orient='list')
        
        X_test = test.drop(columns=10)
        y_test = test[10]
        d_test = X_test.to_dict(orient="list")

        feature_names = list(d_demo.keys())
        label_name = feature_names[-1]
        feature_names.remove(label_name)
        feature_names.remove(0)
        num_feature_names = []
        cat_feature_names = [i for i in feature_names if i not in num_feature_names]
                
        classifier = NB()
        classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
        my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)

        print("fold: ", fold_no, "===>", "accuracy: ", accuracy_score(y_test, my_pred))
        accs.append(accuracy_score(y_test, my_pred))
        fold_no += 1

fold:  1 ===> accuracy:  0.9571428571428572
fold:  2 ===> accuracy:  0.9571428571428572
fold:  3 ===> accuracy:  0.9714285714285714
fold:  4 ===> accuracy:  0.9642857142857143
fold:  5 ===> accuracy:  0.9640287769784173
fold:  1 ===> accuracy:  0.9785714285714285
fold:  2 ===> accuracy:  0.9714285714285714
fold:  3 ===> accuracy:  0.9857142857142858
fold:  4 ===> accuracy:  0.9357142857142857
fold:  5 ===> accuracy:  0.9568345323741008
fold:  1 ===> accuracy:  0.9714285714285714
fold:  2 ===> accuracy:  0.9642857142857143
fold:  3 ===> accuracy:  0.9857142857142858
fold:  4 ===> accuracy:  0.9642857142857143
fold:  5 ===> accuracy:  0.9496402877697842
fold:  1 ===> accuracy:  0.9428571428571428
fold:  2 ===> accuracy:  0.9714285714285714
fold:  3 ===> accuracy:  0.9928571428571429
fold:  4 ===> accuracy:  0.9642857142857143
fold:  5 ===> accuracy:  0.9640287769784173
fold:  1 ===> accuracy:  0.9857142857142858
fold:  2 ===> accuracy:  0.9428571428571428
fold:  3 ===> accuracy:  0.96428

In [13]:
len(accs)

50

In [14]:
import statistics


print("Standard deviation: ", statistics.stdev(accs) )
print("Mean: ", statistics.mean(accs) )

Standard deviation:  0.015832749333826498
Mean:  0.9649527235354574


# car dataset

## One Run

In [19]:
df = pd.read_csv('./car.data', header=None)
# drop rows with missing values, missing = ?
df = df.replace("?", np.nan)
df = df.dropna()
# organize data into input and output
X = df.drop(columns=6)
y = df[6]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
d_demo = pd.concat([X_train, y_train], axis=1).to_dict(orient='list')
d_test = X_test.to_dict(orient="list")

feature_names = list(d_demo.keys())
label_name = feature_names[-1]
feature_names.remove(label_name)
num_feature_names = []
cat_feature_names = [i for i in feature_names if i not in num_feature_names]

classifier = NB()
classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
accuracy_score(y_test, my_pred)

0.8353765323992994

## 50 runs (10 * 5 folds)

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import KFold 

df = pd.read_csv('./car.data', header=None)
# drop rows with missing values, missing = ?
df = df.replace("?", np.nan)
df = df.dropna()

target = df[6]
skf = KFold(n_splits=5, shuffle=True)
accs = []
for run in range(10):
    print("================Run {}================".format(run))
    fold_no = 1
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:]
        test = df.loc[test_index,:]
        
        X = train.drop(columns=6)
        y = train[6]
        d_demo = pd.concat([X, y], axis=1).to_dict(orient='list')
        
        X_test = test.drop(columns=6)
        y_test = test[6]
        d_test = X_test.to_dict(orient="list")

        feature_names = list(d_demo.keys())
        label_name = feature_names[-1]
        feature_names.remove(label_name)
        num_feature_names = []
        cat_feature_names = [i for i in feature_names if i not in num_feature_names]
        
        classifier = NB()
        classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
        my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
        
        print("fold: ", fold_no, "===>", "accuracy: ", accuracy_score(y_test, my_pred))
        accs.append(accuracy_score(y_test, my_pred))
        fold_no += 1

fold:  1 ===> accuracy:  0.8641618497109826
fold:  2 ===> accuracy:  0.8352601156069365
fold:  3 ===> accuracy:  0.8583815028901735
fold:  4 ===> accuracy:  0.8724637681159421
fold:  5 ===> accuracy:  0.863768115942029
fold:  1 ===> accuracy:  0.8265895953757225
fold:  2 ===> accuracy:  0.8497109826589595
fold:  3 ===> accuracy:  0.8497109826589595
fold:  4 ===> accuracy:  0.8492753623188406
fold:  5 ===> accuracy:  0.8608695652173913
fold:  1 ===> accuracy:  0.8236994219653179
fold:  2 ===> accuracy:  0.861271676300578
fold:  3 ===> accuracy:  0.8786127167630058
fold:  4 ===> accuracy:  0.863768115942029
fold:  5 ===> accuracy:  0.8492753623188406
fold:  1 ===> accuracy:  0.9017341040462428
fold:  2 ===> accuracy:  0.8352601156069365
fold:  3 ===> accuracy:  0.8526011560693642
fold:  4 ===> accuracy:  0.8753623188405797
fold:  5 ===> accuracy:  0.7942028985507247
fold:  1 ===> accuracy:  0.869942196531792
fold:  2 ===> accuracy:  0.8294797687861272
fold:  3 ===> accuracy:  0.867052023

In [21]:
len(accs)

50

In [22]:
import statistics


print("Standard deviation: ", statistics.stdev(accs) )
print("Mean: ", statistics.mean(accs) )

Standard deviation:  0.023183805279741124
Mean:  0.855788389042473


# mushroom dataset

## One Run

In [23]:
df = pd.read_csv('./mushroom.data', header=None)
# organize data into input and output
X = df.drop(columns=0)
X = X.drop(columns=11)
y = df[0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
d_demo = pd.concat([X_train, y_train], axis=1).to_dict(orient='list')
d_test = X_test.to_dict(orient="list")
feature_names = list(d_demo.keys())
label_name = feature_names[-1]
feature_names.remove(label_name)
num_feature_names = []
cat_feature_names = [i for i in feature_names if i not in num_feature_names]


classifier = NB()
classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
accuracy_score(y_test, my_pred)

0.9977620290936218

## 50 runs (10 * 5 folds)

In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import KFold 

df = pd.read_csv('./mushroom.data', header=None)
target = df[0]
skf = KFold(n_splits=5, shuffle=True)
accs = []
for run in range(10):
    print("================Run {}================".format(run))
    fold_no = 1
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:]
        test = df.loc[test_index,:]
        
        X = train.drop(columns=0)
        X = X.drop(columns=11)
        y = train[0]
        d_demo = pd.concat([X, y], axis=1).to_dict(orient='list')
        
        X_test = test.drop(columns=0)
        X_test = X_test.drop(columns=11)
        y_test = test[0]
        d_test = X_test.to_dict(orient="list")

        feature_names = list(d_demo.keys())
        label_name = feature_names[-1]
        feature_names.remove(label_name)
        num_feature_names = []
        cat_feature_names = [i for i in feature_names if i not in num_feature_names]

        
        classifier = NB()
        classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
        my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)

        print("fold: ", fold_no, "===>", "accuracy: ", accuracy_score(y_test, my_pred))
        accs.append(accuracy_score(y_test, my_pred))
        fold_no += 1

fold:  1 ===> accuracy:  0.9956923076923077
fold:  2 ===> accuracy:  0.9963076923076923
fold:  3 ===> accuracy:  0.9981538461538462
fold:  4 ===> accuracy:  0.9981538461538462
fold:  5 ===> accuracy:  0.9975369458128078
fold:  1 ===> accuracy:  0.9950769230769231
fold:  2 ===> accuracy:  0.9969230769230769
fold:  3 ===> accuracy:  0.9981538461538462
fold:  4 ===> accuracy:  0.9975384615384615
fold:  5 ===> accuracy:  0.9969211822660099
fold:  1 ===> accuracy:  0.9987692307692307
fold:  2 ===> accuracy:  0.9969230769230769
fold:  3 ===> accuracy:  0.9969230769230769
fold:  4 ===> accuracy:  0.9956923076923077
fold:  5 ===> accuracy:  0.9969211822660099
fold:  1 ===> accuracy:  0.9981538461538462
fold:  2 ===> accuracy:  0.9969230769230769
fold:  3 ===> accuracy:  0.9981538461538462
fold:  4 ===> accuracy:  0.9938461538461538
fold:  5 ===> accuracy:  0.9975369458128078
fold:  1 ===> accuracy:  0.9932307692307693
fold:  2 ===> accuracy:  0.9981538461538462
fold:  3 ===> accuracy:  0.99815

In [25]:
len(accs)

50

In [26]:
import statistics


print("Standard deviation: ", statistics.stdev(accs) )
print("Mean: ", statistics.mean(accs) )

Standard deviation:  0.0014969148982159944
Mean:  0.9968981129215612


# ecoli dataset

## One Run

In [34]:
df = pd.read_csv('./ecoli.data', header=None)
# organize data into input and output
X = df.drop(columns=8)
y = df[8]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
d_demo = pd.concat([X_train, y_train], axis=1).to_dict(orient='list')
d_demo[8] = replace_items(d_demo[8],'imS','im')
d_demo[8] = replace_items(d_demo[8],'imL','im')
d_demo[8] = replace_items(d_demo[8],'omL','mL')

d_test = X_test.to_dict(orient="list")
y_test = replace_items(y_test,'imS','im')
y_test = replace_items(y_test,'imL','im')
y_test = replace_items(y_test,'omL','mL')

feature_names = list(d_demo.keys())
label_name = feature_names[-1]
feature_names.remove(label_name)
num_feature_names = [ feature_names[i] for i in [1,2,5,6,7] ]
cat_feature_names = [ feature_names[i] for i in [3,4] ]


classifier = NB()
classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
accuracy_score(y_test, my_pred)

0.9411764705882353

## 50 runs (10 * 5 folds)

In [35]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import StratifiedKFold 
from sklearn.model_selection import KFold

df = pd.read_csv('./ecoli.data', header=None)
target = df[8]
skf = StratifiedKFold(n_splits=5, shuffle=True)
accs = []
for run in range(10):
    print("================Run {}================".format(run))
    fold_no = 1
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:]
        test = df.loc[test_index,:]

        X = train.drop(columns=8)
        y = train[8]
        d_demo = pd.concat([X, y], axis=1).to_dict(orient='list')
        d_demo[8] = replace_items(d_demo[8],'imS','im')
        d_demo[8] = replace_items(d_demo[8],'imL','im')
        d_demo[8] = replace_items(d_demo[8],'omL','mL')
        
        X_test = test.drop(columns=8)
        y_test = test[8]
        d_test = X_test.to_dict(orient="list")
        y_test = replace_items(y_test,'imS','im')
        y_test = replace_items(y_test,'imL','im')
        y_test = replace_items(y_test,'omL','mL')

        feature_names = list(d_demo.keys())
        label_name = feature_names[-1]
        feature_names.remove(label_name)
        num_feature_names = [ feature_names[i] for i in [1,2,5,6,7] ]
        cat_feature_names = [ feature_names[i] for i in [3,4] ]
        
        
        classifier = NB()
        classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
        my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)

        print("fold: ", fold_no, "===>", "accuracy: ", accuracy_score(y_test, my_pred))
        accs.append(accuracy_score(y_test, my_pred))
        fold_no += 1

fold:  1 ===> accuracy:  0.9117647058823529
fold:  2 ===> accuracy:  0.8507462686567164
fold:  3 ===> accuracy:  0.8059701492537313
fold:  4 ===> accuracy:  0.8656716417910447
fold:  5 ===> accuracy:  0.8955223880597015
fold:  1 ===> accuracy:  0.8088235294117647
fold:  2 ===> accuracy:  0.8656716417910447
fold:  3 ===> accuracy:  0.8656716417910447
fold:  4 ===> accuracy:  0.9104477611940298
fold:  5 ===> accuracy:  0.8507462686567164
fold:  1 ===> accuracy:  0.8676470588235294
fold:  2 ===> accuracy:  0.8656716417910447
fold:  3 ===> accuracy:  0.9104477611940298
fold:  4 ===> accuracy:  0.8059701492537313
fold:  5 ===> accuracy:  0.8059701492537313
fold:  1 ===> accuracy:  0.8823529411764706




fold:  2 ===> accuracy:  0.9104477611940298
fold:  3 ===> accuracy:  0.8656716417910447
fold:  4 ===> accuracy:  0.835820895522388
fold:  5 ===> accuracy:  0.8656716417910447
fold:  1 ===> accuracy:  0.9705882352941176
fold:  2 ===> accuracy:  0.8507462686567164
fold:  3 ===> accuracy:  0.8059701492537313
fold:  4 ===> accuracy:  0.8208955223880597
fold:  5 ===> accuracy:  0.8507462686567164
fold:  1 ===> accuracy:  0.8235294117647058
fold:  2 ===> accuracy:  0.8656716417910447
fold:  3 ===> accuracy:  0.8656716417910447
fold:  4 ===> accuracy:  0.8059701492537313
fold:  5 ===> accuracy:  0.8805970149253731
fold:  1 ===> accuracy:  0.8970588235294118
fold:  2 ===> accuracy:  0.8656716417910447
fold:  3 ===> accuracy:  0.8507462686567164
fold:  4 ===> accuracy:  0.8507462686567164
fold:  5 ===> accuracy:  0.8656716417910447




fold:  1 ===> accuracy:  0.8676470588235294
fold:  2 ===> accuracy:  0.8805970149253731
fold:  3 ===> accuracy:  0.8805970149253731
fold:  4 ===> accuracy:  0.7761194029850746
fold:  5 ===> accuracy:  0.9552238805970149
fold:  1 ===> accuracy:  0.8235294117647058
fold:  2 ===> accuracy:  0.835820895522388
fold:  3 ===> accuracy:  0.835820895522388
fold:  4 ===> accuracy:  0.835820895522388
fold:  5 ===> accuracy:  0.8656716417910447
fold:  1 ===> accuracy:  0.8970588235294118
fold:  2 ===> accuracy:  0.9104477611940298
fold:  3 ===> accuracy:  0.8805970149253731
fold:  4 ===> accuracy:  0.7761194029850746
fold:  5 ===> accuracy:  0.8656716417910447




In [36]:
len(accs)

50

In [37]:
import statistics


print("Standard deviation: ", statistics.stdev(accs) )
print("Mean: ", statistics.mean(accs) )

Standard deviation:  0.040201892457361305
Mean:  0.8600746268656716


# letter-recognition dataset

## One Run

In [38]:
df = pd.read_csv('./letter-recognition.data', header=None)
# organize data into input and output
X = df.drop(columns=0)
y = df[0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
d_demo = pd.concat([X_train, y_train], axis=1).to_dict(orient='list')
d_test = X_test.to_dict(orient="list")

feature_names = list(d_demo.keys())
label_name = feature_names[-1]
feature_names.remove(label_name)
num_feature_names = []
cat_feature_names = feature_names

classifier = NB()
classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
accuracy_score(y_test, my_pred)

0.75375

## 50 runs (10 * 5 folds)

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import KFold 

df = pd.read_csv('./letter-recognition.data', header=None)
target = df[0]
skf = KFold(n_splits=5, shuffle=True)
accs = []
for run in range(10):
    print("================Run {}================".format(run))
    fold_no = 1
    for train_index, test_index in skf.split(df, target):
        train = df.loc[train_index,:]
        test = df.loc[test_index,:]

        X = train.drop(columns=0)
        y = train[0]
        d_demo = pd.concat([X, y], axis=1).to_dict(orient='list')
        
        X_test = test.drop(columns=0)
        y_test = test[0]
        d_test = X_test.to_dict(orient="list")

        feature_names = list(d_demo.keys())
        label_name = feature_names[-1]
        feature_names.remove(label_name)
        num_feature_names = []
        cat_feature_names = feature_names

        classifier = NB()
        classifier.train(d_demo, cat_feature_names, num_feature_names, label_name)
        my_pred = classifier.test(d_test, cat_feature_names, num_feature_names)
        accuracy_score(y_test, my_pred)

        print("fold: ", fold_no, "===>", "accuracy: ", accuracy_score(y_test, my_pred))
        accs.append(accuracy_score(y_test, my_pred))
        fold_no += 1

fold:  1 ===> accuracy:  0.7465
fold:  2 ===> accuracy:  0.755
fold:  3 ===> accuracy:  0.7375
fold:  4 ===> accuracy:  0.7505
fold:  5 ===> accuracy:  0.74575
fold:  1 ===> accuracy:  0.7505
fold:  2 ===> accuracy:  0.745
fold:  3 ===> accuracy:  0.7455
fold:  4 ===> accuracy:  0.75075
fold:  5 ===> accuracy:  0.7495
fold:  1 ===> accuracy:  0.749
fold:  2 ===> accuracy:  0.75825
fold:  3 ===> accuracy:  0.74175
fold:  4 ===> accuracy:  0.75175
fold:  5 ===> accuracy:  0.73675
fold:  1 ===> accuracy:  0.74725
fold:  2 ===> accuracy:  0.74775
fold:  3 ===> accuracy:  0.75175
fold:  4 ===> accuracy:  0.73975
fold:  5 ===> accuracy:  0.74525
fold:  1 ===> accuracy:  0.7495
fold:  2 ===> accuracy:  0.74675
fold:  3 ===> accuracy:  0.746
fold:  4 ===> accuracy:  0.7515
fold:  5 ===> accuracy:  0.74225
fold:  1 ===> accuracy:  0.733
fold:  2 ===> accuracy:  0.74875
fold:  3 ===> accuracy:  0.75675
fold:  4 ===> accuracy:  0.74725
fold:  5 ===> accuracy:  0.7525
fold:  1 ===> accuracy:  0.76

In [40]:
len(accs)

50

In [41]:
import statistics
print("Standard deviation: ", statistics.stdev(accs) )
print("Mean: ", statistics.mean(accs) )

Standard deviation:  0.005974221834733976
Mean:  0.74735
