Skip to content

Commit

Permalink
Merge pull request #110 from boyinggong/dev
Browse files Browse the repository at this point in the history
Merging for now, add subject label later
  • Loading branch information
BenjaminHsieh committed Dec 12, 2015
2 parents a2565ae + 306fc9f commit 122e406
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 3 deletions.
27 changes: 24 additions & 3 deletions code/scripts/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,24 @@
import matplotlib.pyplot as plt
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
import sys

# Path to function
pathtofunction = '../utils'
# Append path to sys
sys.path.append(pathtofunction)

from logistic_function import create_confusion, getMin_thrs, plot_roc

pathtofolder = '../../data/'

nsub = 16
beh_lambda = np.array([])
beh_score = np.array([])
val_score = np.array([])
Min_thrs = np.array([])
AUC_smr = np.array([])
fig = plt.figure(figsize=(20,20))
for i in np.arange(1, nsub+1):
run1 = np.loadtxt(pathtofolder + 'ds005/sub0'+ str(i).zfill(2)+
'/behav/task001_run001/behavdata.txt', skiprows = 1)
Expand All @@ -18,7 +29,7 @@
'/behav/task001_run003/behavdata.txt', skiprows = 1)
behav = np.concatenate((run1, run2, run3), axis=0)
behav = behav[np.logical_or.reduce([behav[:,5] == x for x in [0,1]])]
X = zip(np.ones(len(behav)), behav[:, 1],behav[:, 2])
X = zip(np.ones(len(behav)), behav[:, 1], behav[:, 2])
y = behav[:, 5]
logreg = LogisticRegression(C=1e5)
# C=1e5 specifies a regularization strength
Expand All @@ -34,8 +45,18 @@
scores = cross_val_score(LogisticRegression(), X, y,
scoring='accuracy', cv=10)
val_score = np.append(val_score, scores.mean())

# calculate the AUC and plot ROC curve for each subject
logreg_proba = logreg.predict_proba(X)
confusion = create_confusion(logreg_proba, y)
addsub = fig.add_subplot(4, 4, i)
addsub, AUC = plot_roc(confusion, addsub)
Min_thrs = np.append(Min_thrs, getMin_thrs(confusion))
AUC_smr = np.append(AUC_smr, AUC)

np.savetxt(pathtofolder + 'ds005/models/lambda.txt', beh_lambda)
np.savetxt(pathtofolder + 'ds005/models/reg_score.txt', beh_score)
np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score)
np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score)
np.savetxt(pathtofolder + 'ds005/models/Min_thrs.txt', Min_thrs.reshape(16,3))
np.savetxt(pathtofolder + 'ds005/models/AUC_smr.txt', AUC_smr)
fig.savefig(pathtofolder + 'ds005/models/roc_curve')

101 changes: 101 additions & 0 deletions code/utils/logistic_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import numpy as np
import matplotlib.pyplot as plt

def create_confusion(logreg_proba, y, thrs_inc=0.01):
"""
Creates the confusion matrix based on various levels of discriminate
probability thresholds
Parameters
----------
actual: Actual responses, 1-d array with values 0 or 1
fitted: Fitted probabilities, 1-d array with values between 0 and 1
thrs_inc: increment of threshold probability (default 0.05)
Returns
-------
Confusion Matrix : Array of dim (X, 5) where X is the number of different
thresholds
Column 1: Threshold value between 0, 1
Columns 2-5 show counts for:
Column 2: True postive
Column 3: True negative
Column 4: False postive
Column 5: False negative
"""
thrs_array = np.linspace(0, 1, 1/thrs_inc +1)
confusion = np.ones((len(thrs_array), 5))
confusion[:,0] = thrs_array
for i in range(int(1/thrs_inc +1)):
t = thrs_array[i]
# Classifier / label agree and disagreements for current threshold.
TP_t = np.logical_and( logreg_proba[:,1] > t, y==1 ).sum()
TN_t = np.logical_and( logreg_proba[:,1] <=t, y==0 ).sum()
FP_t = np.logical_and( logreg_proba[:,1] > t, y==0 ).sum()
FN_t = np.logical_and( logreg_proba[:,1] <=t, y==1 ).sum()
confusion[i, 1:5] = [TP_t, TN_t, FP_t, FN_t]
return confusion


def getMin_thrs(confusion):
"""
Returns the threshold with the smallest number of wrong predictions
Parameters:
-----------
Confustion matrix: 2-d array with 5 columns
Returns:
--------
thrs: min threshold that gives minimum wrong predictions: columns 3 +
column 4
false_pos: number of incorrect trues
false_neg: number of incorrect falses
"""
thrs_min = np.argmin(confusion[:,3]+ confusion[:,4])
col_out = confusion[thrs_min, :]
thrs = col_out[0]
false_pos = col_out[3]
false_neg = col_out[4]
return thrs, false_pos, false_neg


def plot_roc(confusion, fig):
"""
function to plot the ROC (receiver operating characteristic) curve and
calculate the corresponding AUC (Area Under Curve).
Parameters:
-----------
Confustion matrix: 2-d array with 5 columns
Returns:
--------
fig: The ROC curve
AUC: Correspong AUC value
"""
ROC = np.zeros((confusion.shape[0],2))
for i in range(confusion.shape[0]):
# Compute false positive rate for current threshold.
FPR_t = confusion[i, 3] / float(confusion[i, 3] + confusion[i, 2])
ROC[i,0] = FPR_t

# Compute true positive rate for current threshold.
TPR_t = confusion[i, 1] / float(confusion[i, 1] + confusion[i, 4])
ROC[i,1] = TPR_t

# Plot the ROC curve.
plt.plot(ROC[:,0], ROC[:,1], lw=2)
plt.xlim(-0.1,1.1)
plt.ylim(-0.1,1.1)
plt.xlabel('$FPR(t)$')
plt.ylabel('$TPR(t)$')
plt.grid()

AUC = 0.
for i in range(confusion.shape[0]-1):
AUC += (ROC[i+1,0]-ROC[i,0]) * (ROC[i+1,1]+ROC[i,1])
AUC *= -0.5

plt.title('ROC curve, AUC = %.4f'%AUC)
return fig, AUC

0 comments on commit 122e406

Please sign in to comment.