Skip to content

Commit

Permalink
integrate code for logistic
Browse files Browse the repository at this point in the history
  • Loading branch information
boyinggong committed Dec 11, 2015
1 parent 4c69494 commit 306fc9f
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 35 deletions.
21 changes: 12 additions & 9 deletions code/scripts/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
# Append path to sys
sys.path.append(pathtofunction)

from logistic_function import plot_roc

pathtofolder = '../../data/'
from logistic_function import create_confusion, getMin_thrs, plot_roc

pathtofolder = '../../data/'

nsub = 16
beh_lambda = np.array([])
beh_score = np.array([])
val_score = np.array([])
AUC_val = np.array([])
Min_thrs = np.array([])
AUC_smr = np.array([])
fig = plt.figure(figsize=(20,20))
for i in np.arange(1, nsub+1):
run1 = np.loadtxt(pathtofolder + 'ds005/sub0'+ str(i).zfill(2)+
'/behav/task001_run001/behavdata.txt', skiprows = 1)
Expand Down Expand Up @@ -47,13 +47,16 @@
val_score = np.append(val_score, scores.mean())
# calculate the AUC and plot ROC curve for each subject
logreg_proba = logreg.predict_proba(X)
fig, AUC = plot_roc(logreg_proba, y)
fig.savefig(pathtofolder + 'ds005/models/roc_curve_sub0'+ str(i).zfill(2))
AUC_val = np.append(AUC_val, scores.mean())

confusion = create_confusion(logreg_proba, y)
addsub = fig.add_subplot(4, 4, i)
addsub, AUC = plot_roc(confusion, addsub)
Min_thrs = np.append(Min_thrs, getMin_thrs(confusion))
AUC_smr = np.append(AUC_smr, AUC)

np.savetxt(pathtofolder + 'ds005/models/lambda.txt', beh_lambda)
np.savetxt(pathtofolder + 'ds005/models/reg_score.txt', beh_score)
np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score)
np.savetxt(pathtofolder + 'ds005/models/AUC_val.txt', AUC_val)
np.savetxt(pathtofolder + 'ds005/models/Min_thrs.txt', Min_thrs.reshape(16,3))
np.savetxt(pathtofolder + 'ds005/models/AUC_smr.txt', AUC_smr)
fig.savefig(pathtofolder + 'ds005/models/roc_curve')

102 changes: 76 additions & 26 deletions code/utils/logistic_function.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,90 @@
import numpy as np
import matplotlib.pyplot as plt

def plot_roc(logreg_proba, y):
"""
function to plot the ROC (receiver operating characteristic) curve and
calculate the corresponding AUC (Area Under Curve).
Input:
logreg_proba: The estimate probability for each class calculated from
logistic regression model.
y: The actual class of response,.
Output: The ROC curve and and correspong AUC value.
def create_confusion(logreg_proba, y, thrs_inc=0.01):
"""

thresholds = np.linspace(1,0,101)

ROC = np.zeros((101,2))

for i in range(101):
t = thresholds[i]

Creates the confusion matrix based on various levels of discriminate
probability thresholds
Parameters
----------
actual: Actual responses, 1-d array with values 0 or 1
fitted: Fitted probabilities, 1-d array with values between 0 and 1
thrs_inc: increment of threshold probability (default 0.05)
Returns
-------
Confusion Matrix : Array of dim (X, 5) where X is the number of different
thresholds
Column 1: Threshold value between 0, 1
Columns 2-5 show counts for:
Column 2: True postive
Column 3: True negative
Column 4: False postive
Column 5: False negative
"""
thrs_array = np.linspace(0, 1, 1/thrs_inc +1)
confusion = np.ones((len(thrs_array), 5))
confusion[:,0] = thrs_array
for i in range(int(1/thrs_inc +1)):
t = thrs_array[i]
# Classifier / label agree and disagreements for current threshold.
TP_t = np.logical_and( logreg_proba[:,1] > t, y==1 ).sum()
TN_t = np.logical_and( logreg_proba[:,1] <=t, y==0 ).sum()
FP_t = np.logical_and( logreg_proba[:,1] > t, y==0 ).sum()
FN_t = np.logical_and( logreg_proba[:,1] <=t, y==1 ).sum()
confusion[i, 1:5] = [TP_t, TN_t, FP_t, FN_t]
return confusion


def getMin_thrs(confusion):
"""
Returns the threshold with the smallest number of wrong predictions
Parameters:
-----------
Confustion matrix: 2-d array with 5 columns
Returns:
--------
thrs: min threshold that gives minimum wrong predictions: columns 3 +
column 4
false_pos: number of incorrect trues
false_neg: number of incorrect falses
"""
thrs_min = np.argmin(confusion[:,3]+ confusion[:,4])
col_out = confusion[thrs_min, :]
thrs = col_out[0]
false_pos = col_out[3]
false_neg = col_out[4]
return thrs, false_pos, false_neg


def plot_roc(confusion, fig):
"""
function to plot the ROC (receiver operating characteristic) curve and
calculate the corresponding AUC (Area Under Curve).
Parameters:
-----------
Confustion matrix: 2-d array with 5 columns
Returns:
--------
fig: The ROC curve
AUC: Correspong AUC value
"""
ROC = np.zeros((confusion.shape[0],2))
for i in range(confusion.shape[0]):
# Compute false positive rate for current threshold.
FPR_t = FP_t / float(FP_t + TN_t)
FPR_t = confusion[i, 3] / float(confusion[i, 3] + confusion[i, 2])
ROC[i,0] = FPR_t

# Compute true positive rate for current threshold.
TPR_t = TP_t / float(TP_t + FN_t)
TPR_t = confusion[i, 1] / float(confusion[i, 1] + confusion[i, 4])
ROC[i,1] = TPR_t

# Plot the ROC curve.
fig = plt.figure(figsize=(6,6))
plt.plot(ROC[:,0], ROC[:,1], lw=2)
plt.xlim(-0.1,1.1)
plt.ylim(-0.1,1.1)
Expand All @@ -43,9 +93,9 @@ def plot_roc(logreg_proba, y):
plt.grid()

AUC = 0.
for i in range(100):
for i in range(confusion.shape[0]-1):
AUC += (ROC[i+1,0]-ROC[i,0]) * (ROC[i+1,1]+ROC[i,1])
AUC *= 0.5

AUC *= -0.5
plt.title('ROC curve, AUC = %.4f'%AUC)
return fig, AUC
return fig, AUC

0 comments on commit 306fc9f

Please sign in to comment.