In [1]:
import numpy as np
import matplotlib.pyplot as plt

from data_handler import DataHandler
from log_regression import LogisticalRegression
from calculations import Calculations

TERMINATION_VALUE = 2^-32
ITERATIONS = 10000
LEARNING_RATE = 0.001
dh = DataHandler("iris.data")
lr = LogisticalRegression(TERMINATION_VALUE, ITERATIONS, LEARNING_RATE)

In [2]:
def filter(tX, tY, classifier, classifier2):
    tY = np.char.decode(np.array(tY).astype(np.bytes_), 'UTF-8')
    Y = list()
    X = list()
    for j in range(tY.shape[0]):  
        if tY[j] == classifier:
            # class_num = list(binary_types).index(classifier)
            Y.append( 1 )
            X.append(  tX[j, :] )
        elif tY[j] == classifier2:
            # class_num = list(binary_types).index(classifier)
            Y.append( 0 )
            X.append(  tX[j, :] )

    return np.array(X), np.array(Y)

In [15]:
# Pull, Split, Score, and SplitXY Data
# X is an N row by D column matrix, where N is hte number of observations and D is the number of features.  
# Y is an N by 1 column vector.  
# W is a D by 1 column vector.
# datas = np.array(1,3)
# data[:,1] = getIrisData()
dataS = list()
data = dh.parse_data_multi()
data = dh.shuffle_data(data)

data_train, data_test = dh.split_data(data)

tX, tY = dh.getXY(data_train, 4, -1)
vX, vY = dh.getXY(data_test, 4, -1)

tX = np.array(tX, dtype=np.float64)
vX = np.array(tX, dtype=np.float64)

mean, std = dh.zscore_data(tX, tX)
tX = dh.apply_zscore(mean, std, tX)
vX = dh.apply_zscore(mean, std, vX)

b = 0
m, n = tX.shape
w = np.zeros((n,1))

binary_types = np.char.decode(np.unique(tY).astype(np.bytes_), 'UTF-8')

indexOfClassifier = 0
i = 2 #Position 3
# print(tY)
# 1 to 3
# 2 to 2
# 3 to 1
results = list()
for j in range(len(binary_types)):
    classifier = binary_types[i] # Positive Classifier (1)
    classifier2 = binary_types[j-i] # Negative Classifier (0)
    
    print("Assessing for: %s against %s" % (classifier, classifier2))
    
    batch_tX, batch_tY = filter(tX, tY, classifier, classifier2)
    batch_vX, batch_vY = filter(vX, vY, classifier, classifier2)
    batch_tY = batch_tY.reshape(batch_tX.shape[0], 1)
    batch_vY = batch_vY.reshape(batch_vX.shape[0], 1)
    
    weights, bias, losses = lr.calculate(w, b, batch_tX, batch_tY, batch_vX, batch_vY)
    wx_b_v = lr.linear_mod(weights, batch_vX, bias)
    P = lr.sigmoid(wx_b_v)
    
    results.append({
        "identifier": classifier,
        "probability": P,
        "batch": batch_vY
    })
    i -= 1

Assessing for: Iris-virginica against Iris-versicolor
Assessing for: Iris-versicolor against Iris-setosa
Assessing for: Iris-setosa against Iris-virginica


In [16]:
# vY = vY.reshape(1, vY.shape[0])
print(result)
print(result.shape)
print(vY)
print(vY.shape)
v_calc = Calculations(vY, result)

correct = np.sum(vY == result)/vY.shape[0]

print("Validation Accuracy: {} %".format(correct))

['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'
 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-setosa'
 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa'
 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa'
 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'
 'Iris-virginica' 'Iris-setosa' 'Iris-virginica' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'
 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setos

In [None]:
precisions = list()
recalls = list()
v_precision = list()
v_recall = list()
for i in range(10):
    threshold = i/10

    tPreds = lr.prediction(weights, bias, tX, threshold)
    vPreds = lr.prediction(weights, bias, vX, threshold)
    
    t_calc = Calculations(tY, tPreds)
    v_calc = Calculations(vY, vPreds)
    
    P, TP, TN, FP, FN = t_calc.setup()
    vP, vTP, vTN, vFP, vFN = v_calc.setup()

    precision = t_calc.precision(TP, FP)
    precisions.append(precision)
    
    recall = t_calc.recall(TP, FN)
    recalls.append(recall)
    
    vPrecision = v_calc.precision(vTP, vFP)
    v_precision.append(vPrecision)
    vRecall = t_calc.recall(vTP, vFN)
    v_recall.append(vRecall)

# plt.xlim([0.0,0.9])
# # # plt.xticks(recalls)
# plt.ylim([0.0,1.0])
plt.title("Precision-Recall Graph")
plt.plot(precisions, recalls, label="Training Precision-Recall")
plt.plot(v_precision, v_recall, label="Validation Precision-Recall")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.legend(loc='lower left')
plt.show()