From 6dfdaee7df819a88e6d33964cd3c885b6ac15184 Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sat, 12 Dec 2015 20:03:11 -0800 Subject: [PATCH 1/7] updated ilnear_regression --- .../{ => functions}/linear_regression.py | 111 +++++++++++++++++- 1 file changed, 108 insertions(+), 3 deletions(-) rename code/utils/{ => functions}/linear_regression.py (66%) diff --git a/code/utils/linear_regression.py b/code/utils/functions/linear_regression.py similarity index 66% rename from code/utils/linear_regression.py rename to code/utils/functions/linear_regression.py index 24cfee5..bf87852 100644 --- a/code/utils/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -2,6 +2,8 @@ import pandas as pd +import sys, os +sys.path.append(os.path.join(os.path.dirname(__file__), "./")) #import statsmodels.api as sm import statsmodels.formula.api as smf import pylab as pl @@ -11,6 +13,8 @@ from scipy.stats import t import numpy.linalg as npl import math +import logistic_reg +from logistic_reg import * @@ -41,7 +45,7 @@ def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): except IOError: - print "Can't find files in such directory! Please enter the directory where you store ds005 dataset!" + print ("Can't find files in such directory! Please enter the directory where you store ds005 dataset!") return run_1=run1.append(run2) @@ -106,6 +110,25 @@ def combine_all_data(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): return (all_data) + + +def load_each_subject(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): + + all_subjects = ['001', '002', '003', '004', '005', '006', '007', + '008', '009', '010', '011', '012', '013', '014', '015', '016'] + + l = [] + + for i in all_subjects: + l.append(load_data(i, data_dir)) + + for i in range(len(all_subjects)): + l[i]['ratio'] = l[i]['gain'] / l[i]['loss'] + + return l + + + """ To combine all the behavioral data @@ -163,9 +186,9 @@ def linear_regression(data, y, *arg): for i in range(1,p): print('==============================================================') print(arg[i-1]) - print 'Coefficient: ' + str(beta[i]), 'p-value: ' + str(pvalues[i]) + print ('Coefficient: ' + str(beta[i]), 'p-value: ' + str(pvalues[i])) - return + return beta, pvalues """ def linear_regression_RT_with_gain_and_loss(data): @@ -230,7 +253,89 @@ def linear_regression_fast(data, formula): +def simple_regression_plot(data, dep_var, exp_var): + y = data[dep_var] + x = data[exp_var] + n = len(data) + # Design X matrix + X = np.column_stack((np.ones(n), x)) + # Get the beta + B = npl.pinv(X).dot(y) + # Get the regression line + x_vals = [0, max(x)] + y_vals = [my_line(0), my_line(max(x))] + # Plot the simple linear regression + plt.plot(x, y, '+') + plt.xlabel('ratio (gain/loss)') + plt.ylabel('Response Time') + plt.plot(x_vals, y_vals) + plt.title('Ratio vs Response Time with predicted line') + return + + + +def beta_statistics(): + + data = load_each_subject() + + betas = [0] * len(data) + pvalues = [0] * len(data) + + for i in range(len(data)): + betas[i], pvalues[i] = linear_regression(data[i], 'RT', 'gain', 'loss') + + lambdas = [] + for i in range(len(data)): + lambdas.append( math.log(betas[i][2] / betas[i][1]) ) + + return + +def plot_neural_and_behav_loss_aversion(data, beta = None): + + all_subjects = ['001', '002', '003', '004', '005', '006', '007', + '008', '009', '010', '011', '012', '013', '014', '015', '016'] + + lambdas = [] + loss_aversion = [] + + for i in range(len(all_subjects)): + + a = add_gainlossratio(data[i]) + b = organize_columns(a) + x = logit("respcat ~ gain + loss", b).fit() + logit_pars = x.params + ratio = -logit_pars['loss'] / logit_pars['gain'] + lambdas.append( math.log(ratio) ) + loss_aversion.append( (-logit_pars['loss']) - logit_pars['gain'] ) # This will be changed! + + + X = np.column_stack((np.ones(16), loss_aversion)) + + + + B = npl.pinv(X).dot(lambdas) + + def my_line(x, B = B): + # Best prediction + return B[0] + B[1] * x + + x_vals = [0, max(loss_aversion)] + y_vals = [my_line(0), my_line(max(loss_aversion))] + + plt.plot(loss_aversion, lambdas, '+') + plt.plot(x_vals, y_vals) + + #plt.xlabel('negative loss beta - gain beta') + #plt.ylabel('log of lambda') + + plt.title("Scatterplot of correspondence between neural \nloss aversion and behavioral loss aversion") + plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]') + plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]') + + plt.show() + + return From e9793b9c499b2ae390cc985f9e1f269d0dffbc35 Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sat, 12 Dec 2015 20:03:53 -0800 Subject: [PATCH 2/7] added test file for lin-reg --- code/utils/tests/test_linear_regression.py | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 code/utils/tests/test_linear_regression.py diff --git a/code/utils/tests/test_linear_regression.py b/code/utils/tests/test_linear_regression.py new file mode 100644 index 0000000..9008c3c --- /dev/null +++ b/code/utils/tests/test_linear_regression.py @@ -0,0 +1,74 @@ +""" + +Test linear_regression.py + +This is a test module. + +It is designed to be run the with the "nose" testing package (via the +"nosetests" script. + +Nose will look for any functions with "test" in their names, and run them. + +Nose reports any errors, or any failures. + +So we use the tests to check that the results of our function are (still) as we +expect. + + +""" + + +# Python 3 compatibility +from __future__ import absolute_import, division, print_function +import pandas as pd +import numpy as np +from numpy.testing import assert_almost_equal +import sys, os, pdb + +#Specicy the path for functions +sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/")) +import linear_regression +from linear_regression import * + + + + +def test_linear_regression(): + + + # Create a data frame + d = {'y' : pd.Series([95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), + 'x1' : pd.Series([85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), + 'x2' : pd.Series([10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']), + } + + + df = pd.DataFrame(d) + + + beta1, pvalues1 = linear_regression(df, 'y', 'x1') + beta2, pvalues2 = linear_regression(df, 'y', 'x2') + beta3, pvalues3 = linear_regression(df, 'y', 'x1', 'x2') + + + expected_beta1 = np.array([ 0.69128736, 1.00610931]) # Calculated by hands + expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844]) # Calculated by hands + expected_beta2 = np.array([ 2.92830189, 9.03773585]) # Calculated by hands + expected_p2 = [0.64660353670191761, 1.2010523101013017e-05] # Calculated by hands + expected_beta3 = np.array([-0.01554384, 0.20355359, 7.55525124]) # Calculated by hands + expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418]) # Calculated by hands + + assert_almost_equal(expected_beta1, beta1) + assert_almost_equal(expected_p1, pvalues1) + assert_almost_equal(expected_beta2, beta2) + assert_almost_equal(expected_p2, pvalues2) + assert_almost_equal(expected_beta3, beta3) + assert_almost_equal(expected_p3, pvalues3) + + + + + + + + From 2120cb95b7941ae5ac676ef3073b5650806c4eee Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sun, 13 Dec 2015 13:25:16 -0800 Subject: [PATCH 3/7] updated plot function --- code/utils/functions/linear_regression.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py index bf87852..79f7901 100644 --- a/code/utils/functions/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -323,6 +323,9 @@ def my_line(x, B = B): x_vals = [0, max(loss_aversion)] y_vals = [my_line(0), my_line(max(loss_aversion))] + fig = plt.figure() + ax = fig.add_subplot(111) + plt.plot(loss_aversion, lambdas, '+') plt.plot(x_vals, y_vals) @@ -333,6 +336,10 @@ def my_line(x, B = B): plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]') plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]') + for xy in zip(loss_aversion, lambdas): + ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='offset points') + + plt.grid() plt.show() return From 267d2f31b3578ad4366f5283cb2bf382aaf32adc Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sun, 13 Dec 2015 15:14:06 -0800 Subject: [PATCH 4/7] update test files for lin --- code/utils/functions/linear_regression.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py index 79f7901..719db59 100644 --- a/code/utils/functions/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -328,17 +328,11 @@ def my_line(x, B = B): plt.plot(loss_aversion, lambdas, '+') plt.plot(x_vals, y_vals) - - #plt.xlabel('negative loss beta - gain beta') - #plt.ylabel('log of lambda') plt.title("Scatterplot of correspondence between neural \nloss aversion and behavioral loss aversion") plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]') plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]') - for xy in zip(loss_aversion, lambdas): - ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='offset points') - plt.grid() plt.show() From 0c9a0a7735e1c057dde3360cb00822b4ab85cedc Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sun, 13 Dec 2015 15:55:54 -0800 Subject: [PATCH 5/7] rewrite functions for linear regression --- code/utils/functions/linear_regression.py | 129 +----------------- .../utils/scripts/linear_regression_script.py | 71 +++++++++- 2 files changed, 77 insertions(+), 123 deletions(-) diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py index 719db59..50da141 100644 --- a/code/utils/functions/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -19,7 +19,6 @@ - """ Parameters: @@ -34,19 +33,13 @@ def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): # Get the directory where data is stored - data_location= data_dir + 'ds005/sub' + subject - - try: + data_location = data_dir + 'ds005/sub' + subject - # Load the data for each run - run1 = pd.read_table(data_location+"/behav/task001_run001/behavdata.txt") - run2 = pd.read_table(data_location+"/behav/task001_run002/behavdata.txt") - run3 = pd.read_table(data_location+"/behav/task001_run003/behavdata.txt") + # Load the data for each run + run1 = pd.read_table(data_location+"/behav/task001_run001/behavdata.txt") + run2 = pd.read_table(data_location+"/behav/task001_run002/behavdata.txt") + run3 = pd.read_table(data_location+"/behav/task001_run003/behavdata.txt") - except IOError: - - print ("Can't find files in such directory! Please enter the directory where you store ds005 dataset!") - return run_1=run1.append(run2) run_total=run_1.append(run3) #append all the data frames of run @@ -54,83 +47,9 @@ def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): return(run_total) -""" -To combine all the behavioral data - -Parameters: - - data_dir: The working directory that you store your data - -Return: - - all_data: the behavioral data that contains 16 subjects, each subject has 3 runs """ -def combine_all_data(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): - - # Get all the subjects - all_subjects = ['001', '002', '003', '004', '005', '006', '007', - '008', '009', '010', '011', '012', '013', '014', '015', '016'] - - all_data = [] - - # Loop for 16 subjects to get the data - for i in all_subjects: - temp = load_data(i, data_dir) - - # if there's no such dataset, then stop running the loop and return nothing - if temp is None: - return - - # combine all the data - all_data.append(temp) - - # Concat the data - all_data = pd.concat(all_data) - - # Calculate the difference for future use - all_data['diff'] = all_data['gain'] - all_data['loss'] - - # Calculate the ratio for future use - all_data['ratio'] = all_data['gain'] / all_data['loss'] - - - all_data['log_gain'] = np.log(all_data['gain']) - - all_data['log_loss'] = np.log(all_data['loss']) - - all_data['log_diff'] = np.log(all_data['diff']) - - all_data['log_ratio'] = np.log(all_data['ratio']) - - # # Remove the rows that have respcat == -1 - #(meaning that the individual chooses to not participate the activity) - all_data = all_data[np.logical_not(all_data['respcat'] == -1)] - - return (all_data) - - - - -def load_each_subject(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"): - - all_subjects = ['001', '002', '003', '004', '005', '006', '007', - '008', '009', '010', '011', '012', '013', '014', '015', '016'] - - l = [] - - for i in all_subjects: - l.append(load_data(i, data_dir)) - - for i in range(len(all_subjects)): - l[i]['ratio'] = l[i]['gain'] / l[i]['loss'] - - return l - - - -""" -To combine all the behavioral data +To perform linear regression Parameters: @@ -239,20 +158,6 @@ def linear_regression_RT_with_gain_and_loss(data): """ - -def linear_regression_fast(data, formula): - #plt.plot(data['diff'], data['RT'], '+') - # It seems like there's no siginificant relationship between diff and Response time - # When we do the regression, it shows that difference between gain and loss does not really matter either. - - - est = smf.ols(formula = formula, data=data).fit() - est.summary() - - return est - - - def simple_regression_plot(data, dep_var, exp_var): y = data[dep_var] x = data[exp_var] @@ -274,27 +179,7 @@ def simple_regression_plot(data, dep_var, exp_var): -def beta_statistics(): - - data = load_each_subject() - - betas = [0] * len(data) - pvalues = [0] * len(data) - - for i in range(len(data)): - betas[i], pvalues[i] = linear_regression(data[i], 'RT', 'gain', 'loss') - - lambdas = [] - - for i in range(len(data)): - lambdas.append( math.log(betas[i][2] / betas[i][1]) ) - - return - -def plot_neural_and_behav_loss_aversion(data, beta = None): - - all_subjects = ['001', '002', '003', '004', '005', '006', '007', - '008', '009', '010', '011', '012', '013', '014', '015', '016'] +def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None): lambdas = [] loss_aversion = [] diff --git a/code/utils/scripts/linear_regression_script.py b/code/utils/scripts/linear_regression_script.py index fde7320..227229c 100644 --- a/code/utils/scripts/linear_regression_script.py +++ b/code/utils/scripts/linear_regression_script.py @@ -5,7 +5,74 @@ from linear_regression import * # Get the data -data = combine_all_data() +all_subjects= ['001', '002' ,'003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016'] +data_dir = "/Users/macbookpro/Desktop/stat159_Project/" + + + +######################## +# combine all the data # +######################## + + +all_data = [] + +# Loop for 16 subjects to get the data +for i in all_subjects: + temp = load_data(i, data_dir) + # if there's no such dataset, then stop running the loop and return nothing + if temp is None: + return + + + +all_data.append(temp) + +# Concat the data +all_data = pd.concat(all_data) + +# Calculate the difference for future use +all_data['diff'] = all_data['gain'] - all_data['loss'] + +# Calculate the ratio for future use +all_data['ratio'] = all_data['gain'] / all_data['loss'] + +all_data['log_gain'] = np.log(all_data['gain']) + +all_data['log_loss'] = np.log(all_data['loss']) + +all_data['log_diff'] = np.log(all_data['diff']) + +all_data['log_ratio'] = np.log(all_data['ratio']) + +# # Remove the rows that have respcat == -1 +#(meaning that the individual chooses to not participate the activity) +all_data = all_data[np.logical_not(all_data['respcat'] == -1)] + + +######################## +# Load each subject # +######################## + +data_each = [] + +for i in all_subjects: + data_each.append(load_data(i, data_dir)) + +for i in range(len(all_subjects)): + data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss'] + + + + + + + +####################### +# Peform regression # +####################### + +data = all_data # Run the linear_regression function to get the summary linear_regression(data, 'RT', 'gain', 'loss') @@ -13,3 +80,5 @@ linear_regression(data, 'RT', 'ratio') linear_regression(data, 'RT', 'diff') + + From e73a674f6b438256c9aa06ea4b063dac22da7be4 Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sun, 13 Dec 2015 16:06:45 -0800 Subject: [PATCH 6/7] updated test files --- code/utils/functions/linear_regression.py | 3 ++- code/utils/scripts/linear_regression_script.py | 15 +++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py index 50da141..99bfab3 100644 --- a/code/utils/functions/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -155,7 +155,7 @@ def linear_regression_RT_with_gain_and_loss(data): print 'Coefficient: ' + str(beta[2]), 'p-value: ' + str(pval_loss) return -""" + def simple_regression_plot(data, dep_var, exp_var): @@ -176,6 +176,7 @@ def simple_regression_plot(data, dep_var, exp_var): plt.plot(x_vals, y_vals) plt.title('Ratio vs Response Time with predicted line') return +""" diff --git a/code/utils/scripts/linear_regression_script.py b/code/utils/scripts/linear_regression_script.py index 227229c..201b48c 100644 --- a/code/utils/scripts/linear_regression_script.py +++ b/code/utils/scripts/linear_regression_script.py @@ -6,8 +6,9 @@ # Get the data all_subjects= ['001', '002' ,'003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016'] -data_dir = "/Users/macbookpro/Desktop/stat159_Project/" - +# data_dir = "/Users/macbookpro/Desktop/stat159_Project/" +project_path = '../../../' +data_dir = project_path+'data/' ######################## @@ -64,10 +65,6 @@ - - - - ####################### # Peform regression # ####################### @@ -82,3 +79,9 @@ linear_regression(data, 'RT', 'diff') +####################### +# Plot # +####################### + + + From 448842adc61713fce735b85c0bfd8a77e9e7e5c5 Mon Sep 17 00:00:00 2001 From: Timothy Yu Date: Sun, 13 Dec 2015 16:11:17 -0800 Subject: [PATCH 7/7] updated lin --- code/utils/functions/linear_regression.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py index 99bfab3..ed71510 100644 --- a/code/utils/functions/linear_regression.py +++ b/code/utils/functions/linear_regression.py @@ -176,11 +176,14 @@ def simple_regression_plot(data, dep_var, exp_var): plt.plot(x_vals, y_vals) plt.title('Ratio vs Response Time with predicted line') return -""" -def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None): + + + + + def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None): lambdas = [] loss_aversion = [] @@ -223,6 +226,11 @@ def my_line(x, B = B): plt.show() return +""" + + + +