berkeley-stat159 · briankleinqiu · Dec 12, 2015 · Nov 11, 2015 · Nov 19, 2015 · Nov 19, 2015
diff --git a/Makefile b/Makefile
@@ -1,15 +1,15 @@
-.PHONY: all clean coverage test
+.PHONY: all clean coverage test verbose
 
 all: clean
 
 clean:
 	find . -name "*.so" -o -name "*.pyc" -o -name "*.pyx.md5" | xargs rm -f
 
 coverage:
-	nosetests code/utils data --with-coverage --cover-package=data  --cover-package=utils
+	nosetests code/utils/tests data/tests --with-coverage --cover-package=data/data.py  --cover-package=code/utils/functions
 
 test:
-	nosetests code/utils data
+	nosetests code/utils/tests data/tests
 
 verbose:
-	nosetests -v code/utils data
+	nosetests -v code/utils/tests data/tests
diff --git a/README.md b/README.md
@@ -1,5 +1,13 @@
-# project-template
+# UC Berkeley Stat 159/259 Fall 2015
+## Project-Theta
+
 [![Build Status](https://travis-ci.org/berkeley-stat159/project-theta.svg?branch=master)](https://travis-ci.org/berkeley-stat159/project-theta?branch=master)
 [![Coverage Status](https://coveralls.io/repos/berkeley-stat159/project-theta/badge.svg?branch=master)](https://coveralls.io/r/berkeley-stat159/project-theta?branch=master)
 
-Fall 2015 group project
+_**Group members:**_ Siyao Chang ([`changsiyao`](https://github.com/changsiyao)) , Boying Gong ([`boyinggong`](https://github.com/boyinggong)), Benjamin Hsieh ([`BenjaminHsieh`](https://github.com/BenjaminHsieh)), Brian Qiu ([`brianqiu`](https://github.com/brianqiu)), Jiang Zhu ([`pigriver123`](https://github.com/pigriver123))
+
+_**Topic:**_ [The Neural Basis of Loss Aversion in Decision Making Under Risk] (https://openfmri.org/dataset/ds000005/)
+[SOME SUMMARY OF PAPER]
+
+## Instructions
+[NEED TO ADD]
diff --git a/code/Makefile b/code/Makefile
@@ -1,5 +1,7 @@
+.PHONY: test coverage
+
 test:
-	nosetests code/utils -w ..
+	nosetests code/utils/tests -w ..
 
 coverage:
-	nosetests code/utils -w .. --with-coverage --cover-package=utils 
+	nosetests code/utils/tests -w .. --with-coverage --cover-package=utils/functions
diff --git a/code/scripts/find_mask_threshold.py b/code/scripts/find_mask_threshold.py
@@ -2,7 +2,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import sys
-sys.path.append('../utils')
+sys.path.append('../utils/functions')
 import smooth_gaussian
 import os
 

diff --git a/code/scripts/findoutlier.py b/code/scripts/findoutlier.py
@@ -1,7 +1,7 @@
 import numpy as np
 import json
 import sys
-pathtofunction = '../utils'
+pathtofunction = '../utils/functions'
 # Append the sys path
 sys.path.append(pathtofunction)
 from outlierfunction import outlier

diff --git a/code/scripts/graph_lindiag_script.py b/code/scripts/graph_lindiag_script.py
@@ -4,7 +4,7 @@
 import sys
 
 # Path to function
-pathtofunction = '../utils'
+pathtofunction = '../utils/functions'
 # Append path to sys
 sys.path.append(pathtofunction)
 

diff --git a/code/scripts/graphoutlier.py b/code/scripts/graphoutlier.py
@@ -9,11 +9,17 @@
 # Paths
 pathtodata = '../../data/'
 pathtofig = '../../paper/figures/'
-pathtofunction = '../utils'
+pathtofunction = '../utils/functions'
+pathtographing = '../utils/graphing'
 # Append fuction path
 sys.path.append(pathtofunction)
 # Import function
-from graphoutlier_functions import loadnib_dict, loadtxt_dict, plot_dvars, plot_fd, plot_meanSig
+from basic_util import loadnib_dict, loadtxt_dict
+
+# Append graphing path
+sys.path.append(pathtographing)
+# Import function
+from graphoutlier_functions import plot_dvars, plot_fd, plot_meanSig
 
 
 # load outlier files

diff --git a/code/scripts/lme_script.py b/code/scripts/lme_script.py
@@ -1,32 +1,23 @@
 from __future__ import division
-from statsmodels.regression.mixed_linear_model import MixedLM
-from scipy import stats  
-import pandas as pd
-import matplotlib.pyplot as plt
 import nibabel as nib
 import numpy as np
-import os
-from scipy.stats import gamma
-import math
-import numpy.linalg as npl
 import json
 import sys
 
 # Path to function
-pathtofunction = '../utils'
+pathtofunction = '../utils/functions'
 # Append path to sys
 sys.path.append(pathtofunction)
 
 from behavtask_tr import events2neural_extend, merge_cond
-from regression_functions import hrf, getRegressor, calcBeta, calcMRSS, deleteOutliers
+from regression_functions import hrf, getRegressor, deleteOutliers
 from lme_functions import calcBetaLme, calcSigProp, calcAnov, anovStat
 
 n_vols=240
 TR=2
 tr_times = np.arange(0, 30, TR)
 hrf_at_trs = hrf(tr_times)
 
-
 pathtofolder = '../../data/'
 
 dvars_out = json.load(open(pathtofolder + "dvarsOutliers.txt"))
@@ -75,7 +66,7 @@
     sig_gain_prop[i-1], sig_loss_prop[i-1] = calcSigProp(beta, sig_level)
     write=pathtofolder + 'ds005/sub0'+str(i).zfill(2)+'/model/model001/onsets/sub0'+str(i).zfill(2)+'_lme_beta.txt'
     np.savetxt(write, beta)
-    anov_test = calcAnov(data_full, run_group)
+    anov_test = calcAnov(data_full, run_group, thrshd)
     anov_prop[i-1] = anovStat(anov_test)
 
 write=pathtofolder + 'ds005/models/lme_sig_gain_prop.txt'

diff --git a/code/scripts/logistic.py b/code/scripts/logistic.py
@@ -2,13 +2,24 @@
 import matplotlib.pyplot as plt
 from sklearn.cross_validation import cross_val_score
 from sklearn.linear_model import LogisticRegression
+import sys
+
+# Path to function
+pathtofunction = '../utils/functions'
+# Append path to sys
+sys.path.append(pathtofunction)
+
+from logistic_function import create_confusion, getMin_thrs, plot_roc
 
 pathtofolder = '../../data/'
 
 nsub = 16
 beh_lambda = np.array([])
 beh_score = np.array([])
 val_score = np.array([])
+Min_thrs = np.array([])
+AUC_smr = np.array([])
+fig = plt.figure(figsize=(20,20))
 for i in np.arange(1, nsub+1):
     run1 = np.loadtxt(pathtofolder + 'ds005/sub0'+ str(i).zfill(2)+
                       '/behav/task001_run001/behavdata.txt', skiprows = 1)
@@ -18,7 +29,7 @@
                       '/behav/task001_run003/behavdata.txt', skiprows = 1)
     behav = np.concatenate((run1, run2, run3), axis=0)
     behav = behav[np.logical_or.reduce([behav[:,5] == x for x in [0,1]])]
-    X = zip(np.ones(len(behav)), behav[:, 1],behav[:, 2])
+    X = zip(np.ones(len(behav)), behav[:, 1], behav[:, 2])
     y = behav[:, 5]
     logreg = LogisticRegression(C=1e5)
     # C=1e5 specifies a regularization strength
@@ -34,8 +45,27 @@
     scores = cross_val_score(LogisticRegression(), X, y, 
         scoring='accuracy', cv=10)
     val_score = np.append(val_score, scores.mean())
-
+    # calculate the AUC and plot ROC curve for each subject
+    logreg_proba = logreg.predict_proba(X)
+    confusion = create_confusion(logreg_proba, y)
+    addsub = fig.add_subplot(4, 4, i)
+    addsub, ROC, AUC = plot_roc(confusion, addsub)
+    # Plot the ROC curve.
+    plt.plot(ROC[:,0], ROC[:,1], lw=2)
+    plt.xlim(-0.1,1.1)
+    plt.ylim(-0.1,1.1)
+    plt.xlabel('$FPR(t)$')
+    plt.ylabel('$TPR(t)$')
+    plt.grid()
+    plt.title('subject '+ str(i)+', AUC = %.4f'%AUC)
+    #------------------------------------------------------------------------#
+    Min_thrs = np.append(Min_thrs, getMin_thrs(confusion))
+    AUC_smr = np.append(AUC_smr, AUC)
 
 np.savetxt(pathtofolder + 'ds005/models/lambda.txt', beh_lambda)
 np.savetxt(pathtofolder + 'ds005/models/reg_score.txt', beh_score)
-np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score)
+np.savetxt(pathtofolder + 'ds005/models/cross_val_score.txt', val_score)
+np.savetxt(pathtofolder + 'ds005/models/Min_thrs.txt', Min_thrs.reshape(16,3))
+np.savetxt(pathtofolder + 'ds005/models/AUC_smr.txt', AUC_smr)
+fig.savefig(pathtofolder + 'ds005/models/roc_curve')
+
diff --git a/code/utils/make_smooth_graphs.py → code/scripts/make_smooth_graphs.py b/code/utils/make_smooth_graphs.py → code/scripts/make_smooth_graphs.py
@@ -1,6 +1,8 @@
 #import numpy as np
 import matplotlib.pyplot as plt
 import nibabel as nib
+import sys
+sys.path.append('../utils/functions')
 import smooth_gaussian
 
 #possibly put in os function to specify path later
@@ -9,6 +11,7 @@
 #script i left it in utils
 
 
+
 img = nib.load("../../data/ds005/sub001/BOLD/task001_run001/bold.nii.gz")
 data = img.get_data()
 
@@ -29,7 +32,7 @@
 ax4.set_title('fwhm = 3mm')
 
 
-plt.savefig("smoothed_images.png")
+plt.savefig("../../paper/figures/smoothed_images.png")
 plt.close()
 
 

diff --git a/code/scripts/regression_script.py b/code/scripts/regression_script.py
@@ -5,7 +5,7 @@
 import sys
 
 # Path to function
-pathtofunction = '../utils'
+pathtofunction = '../utils/functions'
 # Append path to sys
 sys.path.append(pathtofunction)
 

diff --git a/code/utils/functions/basic_util.py b/code/utils/functions/basic_util.py
@@ -0,0 +1,53 @@
+"""
+A collection of basic utility functions for use in outlier detection/graphing
+of fd, dvars, and meanSignal, see graphoutlier_functions.py
+"""
+
+import numpy as np
+import nibabel as nib
+
+
+def loadtxt_dict(file_name, fig_name):
+    """
+    Input: 
+        Txt file specified by file_name
+        Name of figure file wish to output
+
+    Output:
+        Dictonary that attached to file_name the np array after loading 
+    file_name
+    """
+    data = np.loadtxt(file_name)
+    dict_out = {fig_name: data}
+    return dict_out
+
+def loadnib_dict(file_name, fig_name):
+    """
+    Input: 
+        Bold file (bold.nii, bold.nii.gz) specified by file_name 
+        Name of figure file wish to output
+
+    Output:
+    Dictonary that attached to file_name the np array after loading file_name
+    """
+    img = nib.load(file_name)
+    data = img.get_data()
+    dict_out = {fig_name: data}
+    return dict_out
+
+
+# Calculate mean
+def vol_mean(data):
+    """ Return mean across voxels for $D `data`
+
+    Input:
+        np array of data
+    Output: np array of dim (T,)
+        mean of data across all but the last dimension
+    """
+    mean_list = []
+    # Loop over the each volume and outputs the mean of each dimension
+    for i in range(data.shape[-1]):
+        mean = np.mean(data[...,i])
+        mean_list.append(mean)
+    return np.asarray(mean_list)
diff --git a/code/utils/behavtask_tr.py → code/utils/functions/behavtask_tr.py b/code/utils/behavtask_tr.py → code/utils/functions/behavtask_tr.py
@@ -70,10 +70,3 @@ def events2neural_extend(behav_task, tr, n_trs):
         time_course[on:on + dur,9] = RT
     return time_course
 
-def plot_time_course(time_course):
-    """
-    Simple function to plot time_course, an array from return of 
-    events2neural_extend
-    """
-    plt.plot(time_course[:,0])    
-    plt.show()
diff --git a/code/utils/lme_functions.py → code/utils/functions/lme_functions.py b/code/utils/lme_functions.py → code/utils/functions/lme_functions.py
@@ -2,18 +2,9 @@
 from statsmodels.regression.mixed_linear_model import MixedLM
 from scipy import stats  
 import pandas as pd
-import matplotlib.pyplot as plt
-import nibabel as nib
 import numpy as np
-from behavtask_tr import events2neural_extend, merge_cond
-from regression_functions import hrf, getRegressor, calcBeta, calcMRSS, deleteOutliers
-import os
-from scipy.stats import gamma
-import math
-import numpy.linalg as npl
-import json
 
-def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_group, thrshd):
+def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_group, thrshd=None):
     """ 
     function to calculate beta parameters.
     Input: data from bold file, two list of gain, loss regressor values
@@ -27,15 +18,24 @@ def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_gro
     fml = "bold ~ gain + loss"
     for k in np.arange(0,time_by_vox.shape[1]):
         ## set a threshold to idenfity the voxels inside the brain
-        if (np.mean(time_by_vox[:,k]) <= 400):
-            beta[k, :] = [0, 0, 0, 0, 0]
+        if thrshd != None:
+            if (np.mean(time_by_vox[:,k]) <= thrshd):
+                beta[k, :] = [0, 0, 0, 0, 0]
+            else:
+                dt = pd.DataFrame({'gain':gain_full,'loss':loss_full,'run_group':run_group,
+                              'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
+                mod_lme = MixedLM.from_formula(fml, dt, groups=dt["run_group"])
+                lme_result = mod_lme.fit()
+                beta[k, :] = [lme_result.fe_params["gain"], lme_result.pvalues["gain"], 
+                      lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
         else:
             dt = pd.DataFrame({'gain':gain_full,'loss':loss_full,'run_group':run_group,
-                              'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
+                          'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
             mod_lme = MixedLM.from_formula(fml, dt, groups=dt["run_group"])
             lme_result = mod_lme.fit()
             beta[k, :] = [lme_result.fe_params["gain"], lme_result.pvalues["gain"], 
-                      lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
+                  lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
+
     return beta
 
 
@@ -55,7 +55,7 @@ def calcSigProp(beta, sig_level):
     return sig_gain_prop, sig_loss_prop
 
 
-def calcAnov(data_full, run_group):
+def calcAnov(data_full, run_group, thrshold = None):
     """ 
     function to do ANOVA test between runs
     Input: data from bold file, dummy variable indicating the groups
@@ -66,23 +66,30 @@ def calcAnov(data_full, run_group):
     anov_test = np.empty([time_by_vox.shape[1],2])
     for k in np.arange(0,time_by_vox.shape[1]):
         ## set a threshold to idenfity the voxels inside the brain
-        if (np.mean(time_by_vox[:,k]) <= 400):
-            anov_test[k, :] = [0, 0]
+        if thrshold != None:
+            if (np.mean(time_by_vox[:,k]) <= thrshold):
+                anov_test[k, :] = [0, 0]
+            else:
+                anov_test[k, :]  = stats.f_oneway(time_by_vox[:,k][run_group==1], 
+                                                            time_by_vox[:,k][run_group==2],
+                                                            time_by_vox[:,k][run_group==3])
         else:
             anov_test[k, :]  = stats.f_oneway(time_by_vox[:,k][run_group==1], 
-                                                            time_by_vox[:,k][run_group==2],
-                                                            time_by_vox[:,k][run_group==3])  
+                                                        time_by_vox[:,k][run_group==2],
+                                                        time_by_vox[:,k][run_group==3])
+
     return anov_test
 
 
 def anovStat(anov_test):
     """ 
-    function to do ANOVA test between runs
-    Input: data from bold file, dummy variable indicating the groups
-    Output: F test value and p value of anova test
+    function to do check proportions of p-values not equal to 0 and significant
+
+    Input: Result fo calcAnov: array of size (k,2) where k is number of voxels not equal 0
+    Output: Proportions of p-values(voxels) significant (but not 0)
     """    
     mask = anov_test[:, 1] != 0
     nzcount = mask.sum()
     p_value = anov_test[mask, 1]
-    prop_sig = np.sum(p_value <= 0.05/nzcount)/nzcount
+    prop_sig = np.sum(p_value <= 0.05/nzcount)/nzcount # Bonferroni correction
     return prop_sig