In [1]:
import numpy as np
import pandas as pd

np.random.seed(10)

In [2]:
df_spect = pd.read_csv('SPECT.csv')
df_spect['Class'] = df_spect['Class'].map(lambda x: 1 if x == 'Yes' else 0)
df_spect.head()

Unnamed: 0,Class,Attr_1,Attr_2,Attr_3,Attr_4,Attr_5,Attr_6,Attr_7,Attr_8,Attr_9,...,Attr_13,Attr_14,Attr_15,Attr_16,Attr_17,Attr_18,Attr_19,Attr_20,Attr_21,Attr_22
0,1,0,0,0,1,0,0,0,1,1,...,1,1,0,0,0,0,0,0,0,0
1,1,0,0,1,1,0,0,0,1,1,...,1,1,0,0,0,0,0,0,0,1
2,1,1,0,1,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1
4,1,0,0,0,0,0,0,0,1,0,...,1,0,1,1,0,0,0,0,0,0


In [3]:
target = 'Class'
features = df_spect.columns[df_spect.columns != target]
classes = df_spect[target].unique()

In [4]:
def accuracy(actual, predicted):
    return (actual == predicted).mean() * 100


def metrics(y_pred, y_true):
    tp, tn, fp, fn = 0, 0, 0, 0
    for true, pred in zip(y_true, y_pred):
#         pred = pred[0]
#         print(true, pred)
        if true == 0:
            if pred == 0:
                tn += 1
            else:
                fp += 1
        else:
            if pred == 1:
                tp += 1
            else:
                fn += 1
    precision, recall = None, None
    try:
        precision=tp/(tp+fp)
        recall=tp/(tp+fn)
    except:
        print("Divide by zero")    
    return precision,recall

In [5]:
def split_data(dataset, n_folds):
    dataset = dataset.sample(frac=1).reset_index(drop=True)
    dataset_split = list()
    fold_size = int(len(dataset) / n_folds)
#     print(fold_size)
    for i in range(n_folds):
#         print(dataset.loc[:, i * fold_size: (i + 1) * fold_size])
        dataset_split.append(dataset.iloc[i * fold_size: (i + 1) * fold_size])
    return dataset_split

In [6]:
def fit_data(data):
    probs = {}
    probcl = {}
    for x in classes:
        datacl = data[data[target]==x][features]
#         print(len(datacl))
        clsp = {}
        tot = len(datacl)
        for col in datacl.columns:
            colp = {}
            for val,cnt in datacl[col].value_counts().iteritems():
                pr = cnt/tot
                colp[val] = pr
            clsp[col] = colp
        probs[x] = clsp
        probcl[x] = len(datacl) / len(data)
    return probs, probcl

In [7]:
def calculate_probability(x, probcl, probs):
    probab = {}
    for cl in classes:
        pr = probcl[cl]
        for col,val in x.iteritems():
            try:
                pr *= probs[cl][col][val]
            except KeyError:
                pr = 0
        probab[cl] = pr
    return probab

def classify(x, probcl, probs):
    probab = calculate_probability(x, probcl, probs)
    mx = 0
    mxcl = ''
    for cl,pr in probab.items():
        if pr > mx:
            mx = pr
            mxcl = cl
    return mxcl

In [8]:
def evaluate_algorithm(dataset, n_folds,lr=0.1):
    folds = split_data(dataset, n_folds)
    f_acc, f_rec, f_pre = 0., 0., 0.
#     f=1
    for index in range(len(folds)):
        train = pd.concat([folds[i] for i in range(len(folds)) if i is not index])
        test = folds[index]
        probs, probcl = fit_data(train)
        predicted = list()
        for i in test.index:
            predicted.append(classify(test.loc[i, features], probcl, probs))
        predicted = np.array(predicted)
        actual = np.array(test.loc[:, target])
        acc = accuracy(actual, predicted)
#         print(acc, actual, predicted)
        precision, recall = metrics(actual, predicted)
        f_acc += acc
        f_rec += recall
        f_pre += precision
        print("Accuracy :" + str(acc), ", Precision :" + str(precision), ", Recall :" + str(recall))
#         f+=1
    print('Final:')
    print("Accuracy :" + str(f_acc / len(folds)),
          ", Precision :" + str(f_pre / len(folds)),
          ", Recall :" + str(f_rec / len(folds)))

In [13]:
evaluate_algorithm(df_spect, 10)

Accuracy :84.61538461538461 , Precision :0.85 , Recall :0.9444444444444444
Accuracy :92.3076923076923 , Precision :0.9523809523809523 , Recall :0.9523809523809523
Accuracy :76.92307692307693 , Precision :0.75 , Recall :1.0
Accuracy :84.61538461538461 , Precision :0.8571428571428571 , Recall :0.9473684210526315
Accuracy :80.76923076923077 , Precision :0.9090909090909091 , Recall :0.8695652173913043
Accuracy :65.38461538461539 , Precision :0.6842105263157895 , Recall :0.8125
Accuracy :69.23076923076923 , Precision :0.6111111111111112 , Recall :0.9166666666666666
Accuracy :84.61538461538461 , Precision :0.8823529411764706 , Recall :0.8823529411764706
Accuracy :73.07692307692307 , Precision :0.7142857142857143 , Recall :0.9375
Accuracy :88.46153846153845 , Precision :0.8695652173913043 , Recall :1.0
Final:
Accuracy :80.0 , Precision :0.8080140228895107 , Recall :0.926277864311247
