Merge branch 'dev' of https://github.com/berkeley-stat159/project-theta…

… into placeholder
berkeley-stat159 · Dec 12, 2015 · 40788a2 · 40788a2
2 parents c66ac0a + 349e1b8
commit 40788a2
Show file tree

Hide file tree

Showing 7 changed files with 203 additions and 32 deletions.
diff --git a/code/scripts/lme_script.py b/code/scripts/lme_script.py
@@ -1,14 +1,6 @@
 from __future__ import division
-from statsmodels.regression.mixed_linear_model import MixedLM
-from scipy import stats  
-import pandas as pd
-import matplotlib.pyplot as plt
 import nibabel as nib
 import numpy as np
-import os
-from scipy.stats import gamma
-import math
-import numpy.linalg as npl
 import json
 import sys
 
@@ -26,7 +18,6 @@
 tr_times = np.arange(0, 30, TR)
 hrf_at_trs = hrf(tr_times)
 
-
 pathtofolder = '../../data/'
 
 dvars_out = json.load(open(pathtofolder + "dvarsOutliers.txt"))
@@ -75,7 +66,7 @@
     sig_gain_prop[i-1], sig_loss_prop[i-1] = calcSigProp(beta, sig_level)
     write=pathtofolder + 'ds005/sub0'+str(i).zfill(2)+'/model/model001/onsets/sub0'+str(i).zfill(2)+'_lme_beta.txt'
     np.savetxt(write, beta)
-    anov_test = calcAnov(data_full, run_group)
+    anov_test = calcAnov(data_full, run_group, thrshd)
     anov_prop[i-1] = anovStat(anov_test)
 
 write=pathtofolder + 'ds005/models/lme_sig_gain_prop.txt'

diff --git a/code/utils/make_smooth_graphs.py → code/scripts/make_smooth_graphs.py b/code/utils/make_smooth_graphs.py → code/scripts/make_smooth_graphs.py
@@ -1,6 +1,8 @@
 #import numpy as np
 import matplotlib.pyplot as plt
 import nibabel as nib
+import sys
+sys.path.append('../utils')
 import smooth_gaussian
 
 #possibly put in os function to specify path later
@@ -9,6 +11,7 @@
 #script i left it in utils
 
 
+
 img = nib.load("../../data/ds005/sub001/BOLD/task001_run001/bold.nii.gz")
 data = img.get_data()
 
@@ -29,7 +32,7 @@
 ax4.set_title('fwhm = 3mm')
 
 
-plt.savefig("smoothed_images.png")
+plt.savefig("../../paper/figures/smoothed_images.png")
 plt.close()
 
 

diff --git a/code/utils/lme_functions.py b/code/utils/lme_functions.py
@@ -2,18 +2,11 @@
 from statsmodels.regression.mixed_linear_model import MixedLM
 from scipy import stats  
 import pandas as pd
-import matplotlib.pyplot as plt
-import nibabel as nib
 import numpy as np
 from behavtask_tr import events2neural_extend, merge_cond
 from regression_functions import hrf, getRegressor, calcBeta, calcMRSS, deleteOutliers
-import os
-from scipy.stats import gamma
-import math
-import numpy.linalg as npl
-import json
 
-def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_group, thrshd):
+def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_group, thrshd=None):
     """ 
     function to calculate beta parameters.
     Input: data from bold file, two list of gain, loss regressor values
@@ -27,15 +20,24 @@ def calcBetaLme(data_full, gain_full, loss_full, linear_full, quad_full, run_gro
     fml = "bold ~ gain + loss"
     for k in np.arange(0,time_by_vox.shape[1]):
         ## set a threshold to idenfity the voxels inside the brain
-        if (np.mean(time_by_vox[:,k]) <= 400):
-            beta[k, :] = [0, 0, 0, 0, 0]
+        if thrshd != None:
+            if (np.mean(time_by_vox[:,k]) <= thrshd):
+                beta[k, :] = [0, 0, 0, 0, 0]
+            else:
+                dt = pd.DataFrame({'gain':gain_full,'loss':loss_full,'run_group':run_group,
+                              'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
+                mod_lme = MixedLM.from_formula(fml, dt, groups=dt["run_group"])
+                lme_result = mod_lme.fit()
+                beta[k, :] = [lme_result.fe_params["gain"], lme_result.pvalues["gain"], 
+                      lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
         else:
             dt = pd.DataFrame({'gain':gain_full,'loss':loss_full,'run_group':run_group,
-                              'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
+                          'ldrift':linear_full,'qdrift':quad_full,'bold':time_by_vox[:,k]})
             mod_lme = MixedLM.from_formula(fml, dt, groups=dt["run_group"])
             lme_result = mod_lme.fit()
             beta[k, :] = [lme_result.fe_params["gain"], lme_result.pvalues["gain"], 
-                      lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
+                  lme_result.fe_params["loss"], lme_result.pvalues["loss"], lme_result.converged]
+
     return beta
 
 
@@ -55,7 +57,7 @@ def calcSigProp(beta, sig_level):
     return sig_gain_prop, sig_loss_prop
 
 
-def calcAnov(data_full, run_group):
+def calcAnov(data_full, run_group, thrshold = None):
     """ 
     function to do ANOVA test between runs
     Input: data from bold file, dummy variable indicating the groups
@@ -66,23 +68,30 @@ def calcAnov(data_full, run_group):
     anov_test = np.empty([time_by_vox.shape[1],2])
     for k in np.arange(0,time_by_vox.shape[1]):
         ## set a threshold to idenfity the voxels inside the brain
-        if (np.mean(time_by_vox[:,k]) <= 400):
-            anov_test[k, :] = [0, 0]
+        if thrshold != None:
+            if (np.mean(time_by_vox[:,k]) <= thrshold):
+                anov_test[k, :] = [0, 0]
+            else:
+                anov_test[k, :]  = stats.f_oneway(time_by_vox[:,k][run_group==1], 
+                                                            time_by_vox[:,k][run_group==2],
+                                                            time_by_vox[:,k][run_group==3])
         else:
             anov_test[k, :]  = stats.f_oneway(time_by_vox[:,k][run_group==1], 
-                                                            time_by_vox[:,k][run_group==2],
-                                                            time_by_vox[:,k][run_group==3])  
+                                                        time_by_vox[:,k][run_group==2],
+                                                        time_by_vox[:,k][run_group==3])
+
     return anov_test
 
 
 def anovStat(anov_test):
     """ 
-    function to do ANOVA test between runs
-    Input: data from bold file, dummy variable indicating the groups
-    Output: F test value and p value of anova test
+    function to do check proportions of p-values not equal to 0 and significant
+
+    Input: Result fo calcAnov: array of size (k,2) where k is number of voxels not equal 0
+    Output: Proportions of p-values(voxels) significant (but not 0)
     """    
     mask = anov_test[:, 1] != 0
     nzcount = mask.sum()
     p_value = anov_test[mask, 1]
-    prop_sig = np.sum(p_value <= 0.05/nzcount)/nzcount
+    prop_sig = np.sum(p_value <= 0.05/nzcount)/nzcount # Bonferroni correction
     return prop_sig
diff --git a/code/utils/tests/test_lme_functions.py b/code/utils/tests/test_lme_functions.py
@@ -0,0 +1,120 @@
+"""
+Test lme_function module the following functions:
+    calcAnov
+    calcBetaLme
+    calcSigProp
+    anovStat
+
+Run with::
+    **Run from project-theta directory or code directory  with 'make test'
+
+"""
+# Loading modules.
+from __future__ import absolute_import, division, print_function
+import numpy as np
+import sys
+from scipy import stats
+from sklearn import linear_model
+from numpy.testing import assert_almost_equal, assert_allclose
+
+
+# Append function path
+sys.path.append('..')
+
+# Path to the first subject, first run, this is used as the test data for 
+# getGainLoss:
+pathtotest = 'code/utils/tests/' 
+
+# Load graph_functions:
+from lme_functions import calcBetaLme, calcSigProp, calcAnov, anovStat
+
+
+
+def test_calcBetaLme():
+    # Test data with large n = 1500
+    X = np.ones((2000, 4))
+    X[:, 0] = np.random.normal(0, 1, 2000)
+    X[:, 1] = np.random.normal(2, 2, 2000)
+    X[:, 2] = np.linspace(-1,1,2000)
+    X[:, 3] = X[:,2]**2
+    Y = np.random.normal(3,1,2000)
+    # Create linear regression object
+    regr = linear_model.LinearRegression()
+    # Train the model using the training sets
+    regr.fit(X, Y)
+    test_betas = regr.coef_
+    # My function, should produce same results if groups are all the same:
+    lme = calcBetaLme(Y, X[:,0], X[:,1], X[:,2], X[:,3], np.repeat(1,2000))
+    # Compare betas
+    my_betas = lme.ravel()[[0,2]]
+    assert max(abs(my_betas - test_betas[:2])) < 0.005
+
+def test_calcSigProp():
+    # Set up test betas
+    t_beta = np.array([[0, 0, 0, 0, 0], [0.4, 0.03, 0.1, 1, 0.17], [2, 0.1, 0.09, 0.2, 0.88], [1, 1.2, 0.9, 0.4, 0.51], [0.31, 0.22, 0.16, 0.05, 0.02]])
+    sig_level = 0.1
+    # Get significant level of 2nd column after reshaping to (5,5)
+    # 0, 0.03, 0.1, 1.2, 0.22
+    notzero_beta = t_beta[t_beta !=0].reshape(-1,5)
+    t_psig_gain = sum(notzero_beta[:,1]<=sig_level)/len(notzero_beta[:,1])
+    # Get significant level of 4th column 
+    # 0, 1, 0.2, 0.4, 0.05
+    t_psig_loss = sum(notzero_beta[:,3]<=sig_level)/len(notzero_beta[:,3]) 
+
+   # My function
+    my_psig_gain, my_psig_loss = calcSigProp(t_beta, sig_level)
+
+    # Assert
+    assert_almost_equal(my_psig_gain, t_psig_gain)
+    assert_almost_equal(my_psig_loss, t_psig_loss)
+
+def test_calcAnova():
+    # Test dataset
+    t_data = np.reshape(np.random.normal(0,1,8), (1,1,1,8))
+    run_group = np.array([1, 2, 1, 3, 3, 2, 2, 1])
+    # split into runs
+    d1 = np.reshape(t_data, (-1, 8)).T[:,0][run_group == 1]
+    d2 = np.reshape(t_data, (-1, 8)).T[:,0][run_group == 2]
+    d3 = np.reshape(t_data, (-1, 8)).T[:,0][run_group == 3]
+    groups = np.array([d1,d2,d3])
+    def ANOVA(G):
+        # variation within groups
+        SSD_W = 0
+        for g in G:
+            SSD_W += np.sum([(i-np.mean(g))**2 for i in g])
+        # a bit awkward, just flattening the list of lists
+        # to get the mean and N
+        T = list()
+        for g in G:
+            T.extend(g)
+        m = np.mean(T)
+
+        # variation between groups (X for 'cross')
+        SSD_X = 0
+        for g in G:
+            SSD_X += len(g)*(np.mean(g)-m)**2
+        N = len(T)  
+        k = len(G)  
+        MS_W = SSD_W*1.0/(N-k)
+        MS_X = SSD_X*1.0/(k-1)
+        F_stat = MS_X/MS_W
+        pval = 1-stats.f.cdf(F_stat, k-1, N-k)
+        return np.array([F_stat, pval])
+
+    test_anova = ANOVA(groups)
+    # My function
+    my_anova = calcAnov(t_data, run_group).ravel()
+
+    # Assert
+    assert_allclose(test_anova, my_anova)
+
+def test_anovStat():
+    # create a test dataset
+    t_data = np.reshape(np.array([2, 0.1, 3, 0.04, 2.1, 0.01, 0, 0, 1.2, 0.05, 2.2, 2]), (-1, 2))
+    # Significance level chosen to be 0.05, 0.05/5 = 0.01 after bonferroni correction
+    # Should give: 0.01 significant out of 0.1, 0.04, 0.01, 0.05, 2
+    test_prop = 1/5
+    # my function
+    my_prop = anovStat(t_data)
+    # Assert
+    assert_allclose(test_prop, my_prop)
diff --git a/code/utils/tests/test_smoothing.py b/code/utils/tests/test_smoothing.py
@@ -0,0 +1,46 @@
+"""
+Test smooth_gaussian module the following functions:
+    fwhm2sigma
+    smooth_time_series
+    smooth_spatial
+
+Run with::
+    **Run from project-theta directory or code directory  with 'make test'
+
+"""
+# Loading modules.
+from __future__ import absolute_import, division, print_function
+import numpy as np
+import sys
+from numpy.testing import assert_allclose
+
+# Append function path
+sys.path.append('..')
+
+# Path to the first subject, first run, this is used as the test data for 
+# getGainLoss:
+pathtotest = 'code/utils/tests/' 
+
+# Load graph_functions:
+from smooth_gaussian import fwhm2sigma, smooth_spatial, smooth_time_series
+
+def test_smooth():
+    # Test data:
+    t_data = np.reshape(np.random.normal(0,1,64), (2,2,4,4))
+    # No smooth
+    fwhm_0 = 0
+    t_fwhm0 = fwhm2sigma(fwhm_0)
+    my_no_ssmooth = smooth_spatial(t_data, t_fwhm0)
+    my_no_tsmooth = smooth_time_series(t_data, t_fwhm0)
+    # Assert the same of as original data
+    assert_allclose(t_data, my_no_ssmooth)
+    assert_allclose(t_data, my_no_tsmooth)
+
+    # Now with smoothing
+    fwhm_1 = 5
+    t_fwhm1 = fwhm2sigma(fwhm_1)
+    my_ssmooth = smooth_spatial(t_data, t_fwhm1)
+    my_tsmooth = smooth_time_series(t_data, t_fwhm1)
+    assert (t_data != my_ssmooth).all()
+    assert (t_data !=  my_tsmooth).all()
+
diff --git a/paper/figures/smoothed_images.png b/paper/figures/smoothed_images.png
diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,5 @@ scipy==0.16.0
 matplotlib==1.4.3
 nibabel==2.0.1
 scikit_learn==0.15.2
+pandas==0.17.0
+statsmodels==0.6.1