In [1]:
import numpy as np
import matplotlib.pyplot as plt

from data_handler import DataHandler
from log_regression import LogisticalRegression
from calculations import Calculations

TERMINATION_VALUE = 2^-32
ITERATIONS = 10000
LEARNING_RATE = 0.001
dh = DataHandler("spambase.data")
data = dh.parse_data_no_header()
data = dh.shuffle_data(data)
data_train, data_test = dh.split_data(data)

In [2]:
tX, tY = dh.getXY(data_train, -1, -1)
mean, std = dh.zscore_data(tX)
vX, vY = dh.getXY(data_test, -1, -1)
tX_, tPriors, tMeans, tVars = dh.dynamic_split(tX, tY, mean, std)
vX_, vPriors, vMeans, vVars = dh.dynamic_split(vX, vY, mean, std)
tX.shape, tY.shape, vX.shape, vY.shape, tPriors, vPriors

((3067, 57),
 (3067, 1),
 (1533, 57),
 (1533, 1),
 array([0.59667427, 0.40332573]),
 array([0.62426614, 0.37573386]))

In [8]:
def gnb(x_val, x_mean, x_var):
    equation_1 = 0
    equation_2 = 0
    if x_var < 0.0001:
        x_var = 0.0001
    equation_1 = 1/(np.sqrt(2 * np.pi * x_var))    

    e2denom = np.square(2 * x_var)
    e2num = np.square(x_val - x_mean)

    equation_2 = np.exp(-(e2num/e2denom))
        
    prob = equation_1 * equation_2
    return prob

def getPFC(priors, x, Y):
    probs_per_class = []
    total_probability = 0
    for i in range(len(np.unique(Y))):
        prior = priors[i]
        gnb_ = 1
        for j in range(x.shape[0]):
            gnb_ = gnb_ * gnb(x[j], tMeans[i][j], tVars[i][j])
        total_probability += (np.multiply(prior, gnb_))
        probs_per_class.append(np.multiply(prior, gnb_))
    return probs_per_class, total_probability

In [12]:
preds = []
classifiers = np.unique(vY)
for i in range(vX.shape[0]):
    probs, total_probability = getPFC(vPriors, vX[i,:], vY)
    probs = probs/(total_probability+0.0001)
    max_prob_ind = probs.argmax()
    preds.append(classifiers[max_prob_ind])

calc = Calculations(vY, preds)

acc, recall, fmeasure, precision = calc.evaluate()
acc, recall, fmeasure, precision 

(0.8408349641226354, 0.8888888888888888, 0.7398843930635838, 0.807570977917981)