Skip to content

Commit

Permalink
Merge pull request #211 from mingujo/min-log_reg_re
Browse files Browse the repository at this point in the history
Min log reg re
  • Loading branch information
mingujo committed Dec 13, 2015
2 parents 85253f6 + 2a296ae commit 4b30121
Show file tree
Hide file tree
Showing 7 changed files with 491 additions and 81 deletions.
Original file line number Diff line number Diff line change
@@ -1,22 +1,49 @@
"""
Purpose:
-----------------------------------------------------------------------------------
We make the extraction of txt file to be easier by building these functions.
To explore the behavior data for each subject, we make it convenient by building these
functions. Especially, logistic regression on behavior data needs the data in pandas.
data_frame. This does it for you.
-----------------------------------------------------------------------------------
"""



import numpy as np
import pandas as pd
import os

project_location="../../"
data_location=project_location+"data/ds005/"
data_location=os.path.join(os.path.dirname(__file__), '../../../data/ds005/')


def load_in_dataframe(subject_number):
""" Return the subject behav data combining all 3 runs and excluding invalid data (-1) in data.frame
Parameters
----------
subject_number : int
Subject Number
Returns
-------
behav_total_run : data.frame
the subject's behav data (combining all 3 runs)
"""

#convert 3 behavior datas in 1 subject into data frames.
run1 = pd.read_table(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run001/behavdata.txt')
run2 = pd.read_table(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run002/behavdata.txt')
run3 = pd.read_table(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run003/behavdata.txt')

#append all the runs in one pandas data frame
r=run1.append(run2)
run_total=r.append(run3)

return run_total

"""load and combine 3 behavior datas from 3 runs"""
def load_behav_txt(subject_number):
""" Return the subject behav data combining all 3 runs and excluding invalid data (-1) in np.array
Parameters
----------
subject_number : int
Expand All @@ -30,9 +57,9 @@ def load_behav_txt(subject_number):
"""

#load texts
behav1=np.loadtxt(data_location+'sub00%s/behav/task001_run001/behavdata.txt'%(subject_number),skiprows=1)
behav2=np.loadtxt(data_location+'sub00%s/behav/task001_run002/behavdata.txt'%(subject_number),skiprows=1)
behav3=np.loadtxt(data_location+'sub00%s/behav/task001_run003/behavdata.txt'%(subject_number),skiprows=1)
behav1=np.loadtxt(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run001/behavdata.txt',skiprows=1)
behav2=np.loadtxt(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run002/behavdata.txt',skiprows=1)
behav3=np.loadtxt(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run003/behavdata.txt',skiprows=1)

#concatenate them to be 1
behav_total_run=np.concatenate((behav1,behav2,behav3),axis=0)
Expand All @@ -47,7 +74,6 @@ def load_behav_txt(subject_number):

def load_behav_text_one(subject_number, run_number):
""" Return the subject behav data (single run) in np.array
Parameters
----------
subject_number : int
Expand All @@ -62,7 +88,7 @@ def load_behav_text_one(subject_number, run_number):
"""

behav=np.loadtxt(data_location+'sub00%s/behav/task001_run00%s/behavdata.txt'%(subject_number,run_number),
behav=np.loadtxt(data_location+'sub'+str(subject_number).zfill(3)+'/behav/task001_run00%s/behavdata.txt'%(run_number),
skiprows=1)

#delete the rows that contain -1 in respcat (these are errors in experiment so we should take them out
Expand All @@ -73,36 +99,3 @@ def load_behav_text_one(subject_number, run_number):



def load_in_dataframe(subject_number):
""" Return the subject behav data combining all 3 runs and excluding invalid data (-1) in data.frame
Parameters
----------
subject_number : int
Subject Number
Returns
-------
behav_total_run : data.frame
the subject's behav data (combining all 3 runs)
"""

#convert 3 behavior datas in 1 subject into data frames.
run1 = pd.read_table(data_location+'sub00%s/behav/task001_run001/behavdata.txt'%(subject_number))
run2 = pd.read_table(data_location+'sub00%s/behav/task001_run002/behavdata.txt'%(subject_number))
run3 = pd.read_table(data_location+'sub00%s/behav/task001_run003/behavdata.txt'%(subject_number))

#append all the runs in one pandas data frame
r=run1.append(run2)
run_total=r.append(run3)

return run_total








79 changes: 46 additions & 33 deletions code/utils/logistic_reg.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
Purpose:
-----------------------------------------------------------------------------------
We try to capture the significance of gain and loss amount condition for each subjects.
We fit the logistic regression line based on their responses on the experiment. The slope
of the fitted line illustrates the subject's sensitivity on either gain or loss amount.
-----------------------------------------------------------------------------------
"""

import pandas as pd
import statsmodels.api as sm
import pylab as pl
Expand All @@ -12,8 +21,19 @@


def add_gainlossratio(run):

""" add gain/loss ratio column """
""" Return the behavioral data of a subject with added 'ratio' column
Parameters
----------
run : data.frame
Behavior data over 3 runs (combined) of a subject
Returns
-------
run : data.frame
behavioral data added the ratio column (ratio : gain/loss)
"""

gain = run.ix[:,1]
loss = run.ix[:,2]
Expand All @@ -22,14 +42,20 @@ def add_gainlossratio(run):
return run

def organize_columns(run):
""" move around the columns to get the data frame be ready for logistic regression
Parameters
----------
run : data.frame
behavioral data added the ratio column (ratio : gain/loss)
"""
Returns
-------
run_final : data.frame
behavioral data frame with organized columns
drop the unnecessary columns in the data frame,
and reorganize the columns for logistic regression
"""

"""

# drop onset column
a = run.drop('onset', 1)
Expand All @@ -55,9 +81,19 @@ def organize_columns(run):


def log_regression(run_final):

"""do logistic regression on train cols to predict the subject's decision"""
"""Do logistic regression on train cols to predict the subject's decision
Parameters
----------
run : data.frame
behavioral data frame with organized columns
Returns
-------
logit_pars : logistic regression result summary
the logistic regression result summary
"""
# do logistic regression
x = logit("respcat ~ gain + loss", run_final).fit()

Expand All @@ -67,30 +103,7 @@ def log_regression(run_final):
#store the parameters of logistic regression
logit_pars = x.params

### START TO PLOT ###
# calculate intercept and slope
intercept = -logit_pars['Intercept'] / logit_pars['gain']
slope = -logit_pars['loss'] / logit_pars['gain']

fig = plt.figure(figsize = (10, 8))

# plot gain and loss for respcat = 1(decides to gamble)
plt.plot(run_final[run_final['respcat'] == 1].values[:,2], run_final[run_final['respcat'] == 1].values[:,1], '.', label = "Gamble", mfc = 'None', mec='red')

# plot gain and loss for respcat = 0(decides to not gamble)
plt.plot(run_final[run_final['respcat'] == 0].values[:,2], run_final[run_final['respcat'] == 0].values[:,1], '.', label = "Not gamble", mfc = 'None', mec='blue')

# draw regression line
plt.plot(run_final['loss'], intercept + slope * run_final['loss'],'-', color = 'green')

plt.xlabel('Loss ($)')
plt.ylabel('Gain ($)')
plt.legend(loc='bottom right')
plt.axis([2, 23, 8, 41])
plt.title("Logistic Regression to predict 1(gamble) 0(not gamble) with gain and loss values\n")
plt.savefig('log_regression.png')
plt.show()
return
return logit_pars

if __name__ == '__main__':
a=add_gainlossratio(int(sys.argv[1]))
Expand Down
78 changes: 72 additions & 6 deletions code/utils/scripts/log_regression_script.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,76 @@
""" Linear Regression on Begavioral data """
"""
Purpose:
-----------------------------------------------------------------------------------
We try to capture the significance of gain and loss amount condition for each subjects.
We fit the logistic regression line based on their responses on the experiment. The slope
of the fitted line illustrates the subject's sensitivity on either gain or loss amount.
import sys
sys.path.append(".././utils")
This script outputs plots for each subject and combine them into one image of subplots.
-----------------------------------------------------------------------------------
"""

from __future__ import absolute_import, division, print_function
import sys, os
#TODO : later change this
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
import numpy as np
import matplotlib.pyplot as plt
import nibabel as nib
from logistic_reg import *
from organize_behavior_data import *



# Create the necessary directories if they do not exist
dirs = ['../../fig','../../fig/log_reg_behav']
for d in dirs:
if not os.path.exists(d):
os.makedirs(d)

# Locate the different paths
#TODO: the current location for this file project-epsilon/code/scripts
project_path = '../../../'
# TODO: change it to relevant path
data_path = project_path+'data/ds005/'
subject_list = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16']


fig = plt.figure()
for i,subject in enumerate(subject_list):
behav_df = load_in_dataframe(subject)
add_lambda = add_gainlossratio(behav_df)
columns_changed = organize_columns(add_lambda)
logit_pars = log_regression(columns_changed)

### START TO PLOT ###
# calculate intercept and slope
intercept = -logit_pars['Intercept'] / logit_pars['gain']
slope = -logit_pars['loss'] / logit_pars['gain']
#fig = plt.figure(figsize = (10, 8))
ax = fig.add_subplot(4, 4, i+1)
ax.set_title("Subject_%s_run001"%(str(i+1)), fontsize =10)
ax.set_axis_bgcolor('white')

# plot gain and loss for respcat = 1(decides to gamble)
l1, = ax.plot(behav_df[behav_df['respcat'] == 1].values[:,2], behav_df[behav_df['respcat'] == 1].values[:,1], '.', label = "Gamble", mfc = 'None', mec='red')

# plot gain and loss for respcat = 0(decides to not gamble)
l2, = ax.plot(behav_df[behav_df['respcat'] == 0].values[:,2], behav_df[behav_df['respcat'] == 0].values[:,1], '.', label = "Not gamble", mfc = 'None', mec='blue')

# draw regression line
ax.plot(behav_df['loss'], intercept + slope * behav_df['loss'],'-', color = 'green')

ax.set_xlabel('Loss ($)', fontsize =10)
ax.set_ylabel('Gain ($)', fontsize =10)
ax.set_xlim([2,23])
ax.set_ylim([8,41])
ax.tick_params(axis='x', labelsize=10)
ax.tick_params(axis='y', labelsize=10)

fig.legend((l1,l2), ('Gamble','Not Gamble'), loc = 'lower right', labelspacing = 0.5, fontsize = 10)
fig.tight_layout()
fig.subplots_adjust(top=0.90)
fig.suptitle("Fitted Logistic Regression Line (1(gamble) 0(not gamble) with gain and loss values\n", fontsize=12)
fig.savefig(dirs[1]+'/log_regression_behav_subplots.png',facecolor='white', edgecolor='white')


a=add_gainlossratio(behav_df)
b=organize_columns(a)
log_regression(b)
Loading

0 comments on commit 4b30121

Please sign in to comment.