From 306fc9f6e30e97ceb18ea5b83a02357590f5939d Mon Sep 17 00:00:00 2001 From: Boying Gong Date: Fri, 11 Dec 2015 11:38:44 -0800 Subject: [PATCH] integrate code for logistic --- code/scripts/logistic.py | 21 ++++--- code/utils/logistic_function.py | 102 ++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 35 deletions(-) diff --git a/code/scripts/logistic.py b/code/scripts/logistic.py index b61d8e0..72a080a 100644 --- a/code/scripts/logistic.py +++ b/code/scripts/logistic.py @@ -9,9 +9,7 @@ # Append path to sys sys.path.append(pathtofunction) -from logistic_function import plot_roc - -pathtofolder = '../../data/' +from logistic_function import create_confusion, getMin_thrs, plot_roc pathtofolder = '../../data/' @@ -19,7 +17,9 @@ beh_lambda = np.array([]) beh_score = np.array([]) val_score = np.array([]) -AUC_val = np.array([]) +Min_thrs = np.array([]) +AUC_smr = np.array([]) +fig = plt.figure(figsize=(20,20)) for i in np.arange(1, nsub+1): run1 = np.loadtxt(pathtofolder + 'ds005/sub0'+ str(i).zfill(2)+ '/behav/task001_run001/behavdata.txt', skiprows = 1) @@ -47,13 +47,16 @@ val_score = np.append(val_score, scores.mean()) # calculate the AUC and plot ROC curve for each subject logreg_proba = logreg.predict_proba(X) - fig, AUC = plot_roc(logreg_proba, y) - fig.savefig(pathtofolder + 'ds005/models/roc_curve_sub0'+ str(i).zfill(2)) - AUC_val = np.append(AUC_val, scores.mean()) - + confusion = create_confusion(logreg_proba, y) + addsub = fig.add_subplot(4, 4, i) + addsub, AUC = plot_roc(confusion, addsub) + Min_thrs = np.append(Min_thrs, getMin_thrs(confusion)) + AUC_smr = np.append(AUC_smr, AUC) np.savetxt(pathtofolder + 'ds005/models/lambda.txt', beh_lambda) np.savetxt(pathtofolder + 'ds005/models/reg_score.txt', beh_score) np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score) -np.savetxt(pathtofolder + 'ds005/models/AUC_val.txt', AUC_val) +np.savetxt(pathtofolder + 'ds005/models/Min_thrs.txt', Min_thrs.reshape(16,3)) +np.savetxt(pathtofolder + 'ds005/models/AUC_smr.txt', AUC_smr) +fig.savefig(pathtofolder + 'ds005/models/roc_curve') diff --git a/code/utils/logistic_function.py b/code/utils/logistic_function.py index 30cd974..baad02c 100644 --- a/code/utils/logistic_function.py +++ b/code/utils/logistic_function.py @@ -1,40 +1,90 @@ import numpy as np import matplotlib.pyplot as plt -def plot_roc(logreg_proba, y): - """ - function to plot the ROC (receiver operating characteristic) curve and - calculate the corresponding AUC (Area Under Curve). - Input: - logreg_proba: The estimate probability for each class calculated from - logistic regression model. - y: The actual class of response,. - Output: The ROC curve and and correspong AUC value. +def create_confusion(logreg_proba, y, thrs_inc=0.01): """ - - thresholds = np.linspace(1,0,101) - - ROC = np.zeros((101,2)) - - for i in range(101): - t = thresholds[i] - + Creates the confusion matrix based on various levels of discriminate + probability thresholds + + Parameters + ---------- + actual: Actual responses, 1-d array with values 0 or 1 + fitted: Fitted probabilities, 1-d array with values between 0 and 1 + thrs_inc: increment of threshold probability (default 0.05) + + Returns + ------- + Confusion Matrix : Array of dim (X, 5) where X is the number of different + thresholds + Column 1: Threshold value between 0, 1 + Columns 2-5 show counts for: + Column 2: True postive + Column 3: True negative + Column 4: False postive + Column 5: False negative + """ + thrs_array = np.linspace(0, 1, 1/thrs_inc +1) + confusion = np.ones((len(thrs_array), 5)) + confusion[:,0] = thrs_array + for i in range(int(1/thrs_inc +1)): + t = thrs_array[i] # Classifier / label agree and disagreements for current threshold. TP_t = np.logical_and( logreg_proba[:,1] > t, y==1 ).sum() TN_t = np.logical_and( logreg_proba[:,1] <=t, y==0 ).sum() FP_t = np.logical_and( logreg_proba[:,1] > t, y==0 ).sum() FN_t = np.logical_and( logreg_proba[:,1] <=t, y==1 ).sum() + confusion[i, 1:5] = [TP_t, TN_t, FP_t, FN_t] + return confusion + + +def getMin_thrs(confusion): + """ + Returns the threshold with the smallest number of wrong predictions + + Parameters: + ----------- + Confustion matrix: 2-d array with 5 columns + + Returns: + -------- + thrs: min threshold that gives minimum wrong predictions: columns 3 + + column 4 + false_pos: number of incorrect trues + false_neg: number of incorrect falses + """ + thrs_min = np.argmin(confusion[:,3]+ confusion[:,4]) + col_out = confusion[thrs_min, :] + thrs = col_out[0] + false_pos = col_out[3] + false_neg = col_out[4] + return thrs, false_pos, false_neg + +def plot_roc(confusion, fig): + """ + function to plot the ROC (receiver operating characteristic) curve and + calculate the corresponding AUC (Area Under Curve). + + Parameters: + ----------- + Confustion matrix: 2-d array with 5 columns + + Returns: + -------- + fig: The ROC curve + AUC: Correspong AUC value + """ + ROC = np.zeros((confusion.shape[0],2)) + for i in range(confusion.shape[0]): # Compute false positive rate for current threshold. - FPR_t = FP_t / float(FP_t + TN_t) + FPR_t = confusion[i, 3] / float(confusion[i, 3] + confusion[i, 2]) ROC[i,0] = FPR_t - + # Compute true positive rate for current threshold. - TPR_t = TP_t / float(TP_t + FN_t) + TPR_t = confusion[i, 1] / float(confusion[i, 1] + confusion[i, 4]) ROC[i,1] = TPR_t - + # Plot the ROC curve. - fig = plt.figure(figsize=(6,6)) plt.plot(ROC[:,0], ROC[:,1], lw=2) plt.xlim(-0.1,1.1) plt.ylim(-0.1,1.1) @@ -43,9 +93,9 @@ def plot_roc(logreg_proba, y): plt.grid() AUC = 0. - for i in range(100): + for i in range(confusion.shape[0]-1): AUC += (ROC[i+1,0]-ROC[i,0]) * (ROC[i+1,1]+ROC[i,1]) - AUC *= 0.5 - + AUC *= -0.5 + plt.title('ROC curve, AUC = %.4f'%AUC) - return fig, AUC \ No newline at end of file + return fig, AUC