In [30]:
import numpy as np
import math
import csv
from random import *
from sklearn.utils import shuffle

def readMatrix(filename):
    binary_classify = True
    data = []
    with open(filename, 'rb') as f:
        reader = csv.reader(f)
        next(reader, None)
        for row in reader:
            r = [row[0]]
            for i in range(4,len(row)):
                r.append(row[i])
            data.append(r)

    data = np.asarray(data)
    data = data.astype(np.float)
    np.random.shuffle(data)

    height = data.shape[0]
    width = data.shape[1]
    features = np.concatenate((np.ones((height, 1)), data[:,0:width-1]), axis=1)
    values = data[:,width-1:width]
    binary_values = np.full((len(values), 6), -1)
    classification = np.zeros(len(values))

    for i in range(len(values)):
        if values[i] < -0.2:
            binary_values[i, 0] = 1
            classification[i] = -2
        elif values[i] < -0.1:
            binary_values[i, 1] = 1
            classification[i] = -1
        elif values[i] < 0:
            binary_values[i, 2] = 1
            classification[i] = 0
        elif values[i] < 0.1:
            binary_values[i, 3] = 1
            classification[i] = 1
        elif values[i] < 0.2:
            binary_values[i, 4] = 1
            classification[i] = 2
        else:
            binary_values[i, 5] = 1
            classification[i] = 3
    return features, binary_values, classification   

    
def getCoefficients(A, b):
    return np.matmul(np.linalg.pinv(A), b)
    
    
def makePrediction(feat, coeff):
    return (np.matmul(feat,coeff))


def getConfusion(predictions, values):
    confusion = np.zeros((2,2))
    #TP FN
    #FP TN
    for pred_row, val_row in zip(predictions, values):
        pred = pred_row[0]
        val = val_row[0]
        if(pred == 1. and val == 1.):
            confusion[0,0] += 1 #TP
        elif(pred == 1. and val == -1.):
            confusion[1,0] += 1 #FP
        elif(pred == -1. and val == 1.): 
            confusion[0,1] += 1 #FN
        else:
            confusion[1,1] += 1 #TN
    return confusion
    
    
def main():    
    feat, binary, vals = readMatrix('copd_perc_all.csv')

    train_num = 7*len(feat)/10
    test_num = len(feat) - train_num
    
    train_feat = feat[0:train_num,:]
    test_feat = feat[train_num:len(feat),:]
    
    train_val = vals[0:train_num]
    test_val = vals[train_num:len(feat)]
    
    train_binary = binary[0:train_num,:]
    test_binary = binary[train_num:len(feat),:]
    
    coefficients = np.zeros((feat.shape[1], 6))
    confidence_train = np.zeros(train_binary.shape)
    train_pred = np.zeros(train_num)
    confidence_test = np.zeros(test_binary.shape)
    test_pred = np.zeros(test_num)
    
    for i in range(6):    
        coefficients[:,i] = getCoefficients(train_feat, train_binary[:, i])
        confidence_train[:, i] = makePrediction(train_feat, coefficients[:,i])
    train_pred = np.argmax(confidence_train, axis=1) - 2.0
    train_error = 0.0
    for i in range(train_num):
        train_error += (train_val[i] != train_pred[i])
    train_error /= train_num
    print "Train error = ", train_error
    
        
    for i in range(6):    
        confidence_test[:, i] = makePrediction(test_feat, coefficients[:,i])
    test_pred = np.argmax(confidence_test, axis=1) - 2.0
    test_error = 0.0
    for i in range(test_num):
        test_error += (test_val[i] != test_pred[i])
    test_error /= (test_num)
    print "Test error = ", test_error
    
    return


if __name__ == '__main__':
    main()

    

Train error =  0.663357400722
Test error =  0.657192982456
