In [10]:
import numpy as np
import math
import csv
from random import *
from sklearn.utils import shuffle

def readMatrix(filename):
    binary_classify = True
    data = []
    with open(filename, 'rb') as f:
        reader = csv.reader(f)
        next(reader, None)
        for row in reader:
            r = [row[0]]
            for i in range(4,len(row)):
                r.append(row[i])
            data.append(r)

    data = np.asarray(data)
    data = data.astype(np.float)
    np.random.shuffle(data)

    height = data.shape[0]
    width = data.shape[1]
    features = np.concatenate((np.ones((height, 1)), data[:,0:width-1]), axis=1)
    values = data[:,width-1:width]

    binary_values = np.sign(values)
    return features, binary_values    

    
def getCoefficients(A, b):
    return np.matmul(np.linalg.pinv(A), b)
    
    
def makePrediction(feat, coeff):
    return np.sign(np.matmul(feat,coeff))


def getConfusion(predictions, values):
    confusion = np.zeros((2,2))
    #TP FN
    #FP TN
    for pred_row, val_row in zip(predictions, values):
        pred = pred_row[0]
        val = val_row[0]
        if(pred == 1. and val == 1.):
            confusion[0,0] += 1 #TP
        elif(pred == 1. and val == -1.):
            confusion[1,0] += 1 #FP
        elif(pred == -1. and val == 1.): 
            confusion[0,1] += 1 #FN
        else:
            confusion[1,1] += 1 #TN
    return confusion
    
    
def main():    
    feat, val = readMatrix('copd_perc_all.csv')

    train_num = 7*len(feat)/10
    
    train_feat = feat[0:train_num,:]
    test_feat = feat[train_num:len(feat),:]
    
    train_val = val[0:train_num,:]
    test_val = val[train_num:len(feat),:]
    
    coefficients = getCoefficients(train_feat, train_val)
    train_pred = makePrediction(train_feat, coefficients)
    con_train = getConfusion(train_pred, train_val)
    
    test_pred = makePrediction(test_feat, coefficients)
    con_test = getConfusion(test_pred, test_val)
    
    print "Training confusion matrix"
    print "\ty-hat = 1\ty-hat = -1"
    print "y=1\t", con_train[0,0], "\t\t", con_train[0,1]
    print "y=-1\t", con_train[1,0], "\t\t", con_train[1,1]
    train_err = (con_train[1,0] + con_train[0,1])/np.sum(con_train)
    print "Training error: ", train_err, "%"
    
    print "Test confusion matrix"
    print "\ty-hat = 1\ty-hat = -1"
    print "y=1\t", con_test[0,0], "\t\t", con_test[0,1]
    print "y=-1\t", con_test[1,0], "\t\t", con_test[1,1]
    test_err = (con_test[1,0] + con_test[0,1])/np.sum(con_test)
    print "Test error: ", test_err, "%"

    return


if __name__ == '__main__':
    main()

    

Training confusion matrix
	y-hat = 1	y-hat = -1
y=1	1537.0 		1680.0
y=-1	945.0 		2486.0
Training error:  0.394855595668 %
Test confusion matrix
	y-hat = 1	y-hat = -1
y=1	721.0 		737.0
y=-1	419.0 		973.0
Test error:  0.405614035088 %
