From 6dfdaee7df819a88e6d33964cd3c885b6ac15184 Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-145.airbears2.1918.berkeley.edu>
Date: Sat, 12 Dec 2015 20:03:11 -0800
Subject: [PATCH 1/7] updated ilnear_regression

---
 .../{ => functions}/linear_regression.py      | 111 +++++++++++++++++-
 1 file changed, 108 insertions(+), 3 deletions(-)
 rename code/utils/{ => functions}/linear_regression.py (66%)

diff --git a/code/utils/linear_regression.py b/code/utils/functions/linear_regression.py
similarity index 66%
rename from code/utils/linear_regression.py
rename to code/utils/functions/linear_regression.py
index 24cfee5..bf87852 100644
--- a/code/utils/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -2,6 +2,8 @@
 
 
 import pandas as pd
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), "./"))
 #import statsmodels.api as sm
 import statsmodels.formula.api as smf
 import pylab as pl
@@ -11,6 +13,8 @@
 from scipy.stats import t 
 import numpy.linalg as npl
 import math
+import logistic_reg
+from logistic_reg import *
 
 
 
@@ -41,7 +45,7 @@ def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
 
 	except IOError:
 
-		print "Can't find files in such directory! Please enter the directory where you store ds005 dataset!"
+		print ("Can't find files in such directory! Please enter the directory where you store ds005 dataset!")
 		return
 
 	run_1=run1.append(run2)
@@ -106,6 +110,25 @@ def combine_all_data(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
 	return (all_data)
 
 
+
+
+def load_each_subject(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
+
+	all_subjects = ['001', '002', '003', '004', '005', '006', '007', 
+	'008', '009', '010', '011', '012', '013', '014', '015', '016']
+
+	l = []
+
+	for i in all_subjects:
+		l.append(load_data(i, data_dir))
+
+	for i in range(len(all_subjects)):
+		l[i]['ratio'] = l[i]['gain'] / l[i]['loss']
+
+	return l
+
+
+
 """
 To combine all the behavioral data
 
@@ -163,9 +186,9 @@ def linear_regression(data, y, *arg):
 	for i in range(1,p):
 		print('==============================================================')
 		print(arg[i-1])
-		print 'Coefficient: ' + str(beta[i]), 'p-value: ' + str(pvalues[i])
+		print ('Coefficient: ' + str(beta[i]), 'p-value: ' + str(pvalues[i]))
 
-	return
+	return beta, pvalues
 
 """
 def linear_regression_RT_with_gain_and_loss(data):
@@ -230,7 +253,89 @@ def linear_regression_fast(data, formula):
 
 
 
+def simple_regression_plot(data, dep_var, exp_var):
+	y = data[dep_var]
+	x = data[exp_var]
+	n = len(data)
+	# Design X matrix
+	X = np.column_stack((np.ones(n), x))
+	# Get the beta
+	B = npl.pinv(X).dot(y)
+	# Get the regression line
+	x_vals = [0, max(x)]
+	y_vals = [my_line(0), my_line(max(x))]
+	# Plot the simple linear regression
+	plt.plot(x, y, '+')
+	plt.xlabel('ratio (gain/loss)')
+	plt.ylabel('Response Time')
+	plt.plot(x_vals, y_vals)
+	plt.title('Ratio vs Response Time with predicted line')
+	return
+
+
+
+def beta_statistics():
+
+	data = load_each_subject()
+
+	betas = [0] * len(data)
+	pvalues = [0] * len(data)
+
+	for i in range(len(data)):
+		betas[i], pvalues[i] = linear_regression(data[i], 'RT', 'gain', 'loss')
+
+	lambdas = []
 
+	for i in range(len(data)):
+		lambdas.append( math.log(betas[i][2] / betas[i][1])    )
+
+	return
+
+def plot_neural_and_behav_loss_aversion(data, beta = None):
+
+	all_subjects = ['001', '002', '003', '004', '005', '006', '007', 
+	'008', '009', '010', '011', '012', '013', '014', '015', '016']
+
+	lambdas = []
+	loss_aversion = []
+
+	for i in range(len(all_subjects)):
+
+		a = add_gainlossratio(data[i])
+		b = organize_columns(a)
+		x = logit("respcat ~ gain + loss", b).fit()
+		logit_pars = x.params
+		ratio =  -logit_pars['loss'] / logit_pars['gain'] 
+		lambdas.append( math.log(ratio) )
+		loss_aversion.append( (-logit_pars['loss']) - logit_pars['gain'] ) # This will be changed!
+
+
+	X = np.column_stack((np.ones(16), loss_aversion))
+
+
+	
+	B = npl.pinv(X).dot(lambdas)
+
+	def my_line(x, B = B):
+    	# Best prediction 
+		return B[0] + B[1] * x
+		
+	x_vals = [0, max(loss_aversion)]
+	y_vals = [my_line(0), my_line(max(loss_aversion))]
+
+	plt.plot(loss_aversion, lambdas, '+') 
+	plt.plot(x_vals, y_vals)
+
+	#plt.xlabel('negative loss beta - gain beta')
+	#plt.ylabel('log of lambda')
+	
+	plt.title("Scatterplot of correspondence between neural \nloss aversion and behavioral loss aversion")
+	plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]')
+	plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]')
+
+	plt.show()
+
+	return
 
 
 

From e9793b9c499b2ae390cc985f9e1f269d0dffbc35 Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-145.airbears2.1918.berkeley.edu>
Date: Sat, 12 Dec 2015 20:03:53 -0800
Subject: [PATCH 2/7] added test file for lin-reg

---
 code/utils/tests/test_linear_regression.py | 74 ++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 code/utils/tests/test_linear_regression.py

diff --git a/code/utils/tests/test_linear_regression.py b/code/utils/tests/test_linear_regression.py
new file mode 100644
index 0000000..9008c3c
--- /dev/null
+++ b/code/utils/tests/test_linear_regression.py
@@ -0,0 +1,74 @@
+""" 
+
+Test linear_regression.py 
+
+This is a test module.
+
+It is designed to be run the with the "nose" testing package (via the
+"nosetests" script.
+
+Nose will look for any functions with "test" in their names, and run them.
+
+Nose reports any errors, or any failures.
+
+So we use the tests to check that the results of our function are (still) as we
+expect.
+
+
+"""
+
+
+# Python 3 compatibility
+from __future__ import absolute_import, division, print_function
+import pandas as pd
+import numpy as np
+from numpy.testing import assert_almost_equal
+import sys, os, pdb
+
+#Specicy the path for functions
+sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/"))
+import linear_regression
+from linear_regression import *
+
+
+
+
+def test_linear_regression():
+
+
+	# Create a data frame
+	d = {'y' : pd.Series([95, 85, 80, 75, 70, 65, 60, 55, 50, 45], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
+		 'x1' : pd.Series([85, 95, 70, 65, 70, 60, 64, 60, 51, 49], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
+		 'x2' : pd.Series([10, 8.8, 8.4, 7.5, 7.4, 7.2, 7.0, 6.4, 5.3, 4], index=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
+		 }
+
+
+	df = pd.DataFrame(d)
+
+
+	beta1, pvalues1 = linear_regression(df, 'y', 'x1')
+	beta2, pvalues2 = linear_regression(df, 'y', 'x2')
+	beta3, pvalues3 = linear_regression(df, 'y', 'x1', 'x2')
+
+
+	expected_beta1 = np.array([ 0.69128736,  1.00610931]) # Calculated by hands
+	expected_p1 = np.array([0.95669000991385234, 0.00082441892685309844]) # Calculated by hands
+	expected_beta2 = np.array([ 2.92830189,  9.03773585]) # Calculated by hands
+	expected_p2 = [0.64660353670191761, 1.2010523101013017e-05] # Calculated by hands
+	expected_beta3 = np.array([-0.01554384,  0.20355359,  7.55525124]) # Calculated by hands
+	expected_p3 = np.array([0.99826544217405555, 0.37237722050579208, 0.0049816157477362418]) # Calculated by hands
+
+	assert_almost_equal(expected_beta1, beta1)
+	assert_almost_equal(expected_p1, pvalues1)
+	assert_almost_equal(expected_beta2, beta2)
+	assert_almost_equal(expected_p2, pvalues2)
+	assert_almost_equal(expected_beta3, beta3)
+	assert_almost_equal(expected_p3, pvalues3)
+
+
+
+
+
+
+
+

From 2120cb95b7941ae5ac676ef3073b5650806c4eee Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-25-222.airbears2.1918.berkeley.edu>
Date: Sun, 13 Dec 2015 13:25:16 -0800
Subject: [PATCH 3/7] updated plot function

---
 code/utils/functions/linear_regression.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py
index bf87852..79f7901 100644
--- a/code/utils/functions/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -323,6 +323,9 @@ def my_line(x, B = B):
 	x_vals = [0, max(loss_aversion)]
 	y_vals = [my_line(0), my_line(max(loss_aversion))]
 
+	fig = plt.figure()
+	ax = fig.add_subplot(111)
+
 	plt.plot(loss_aversion, lambdas, '+') 
 	plt.plot(x_vals, y_vals)
 
@@ -333,6 +336,10 @@ def my_line(x, B = B):
 	plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]')
 	plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]')
 
+	for xy in zip(loss_aversion, lambdas):                                               
+    	ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='offset points')
+
+	plt.grid()
 	plt.show()
 
 	return

From 267d2f31b3578ad4366f5283cb2bf382aaf32adc Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-68.airbears2.1918.berkeley.edu>
Date: Sun, 13 Dec 2015 15:14:06 -0800
Subject: [PATCH 4/7] update test files for lin

---
 code/utils/functions/linear_regression.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py
index 79f7901..719db59 100644
--- a/code/utils/functions/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -328,17 +328,11 @@ def my_line(x, B = B):
 
 	plt.plot(loss_aversion, lambdas, '+') 
 	plt.plot(x_vals, y_vals)
-
-	#plt.xlabel('negative loss beta - gain beta')
-	#plt.ylabel('log of lambda')
 	
 	plt.title("Scatterplot of correspondence between neural \nloss aversion and behavioral loss aversion")
 	plt.xlabel(r'Neural loss aversion [-($\beta[loss]) - \beta[gain]$]')
 	plt.ylabel(r'Behavioral loss aversion [ln($\lambda)$]')
 
-	for xy in zip(loss_aversion, lambdas):                                               
-    	ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='offset points')
-
 	plt.grid()
 	plt.show()
 

From 0c9a0a7735e1c057dde3360cb00822b4ab85cedc Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-68.airbears2.1918.berkeley.edu>
Date: Sun, 13 Dec 2015 15:55:54 -0800
Subject: [PATCH 5/7] rewrite functions for linear regression

---
 code/utils/functions/linear_regression.py     | 129 +-----------------
 .../utils/scripts/linear_regression_script.py |  71 +++++++++-
 2 files changed, 77 insertions(+), 123 deletions(-)

diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py
index 719db59..50da141 100644
--- a/code/utils/functions/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -19,7 +19,6 @@
 
 
 
-
 """
 Parameters:
 
@@ -34,19 +33,13 @@
 def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
 	
 	# Get the directory where data is stored
-	data_location= data_dir + 'ds005/sub' + subject
-
-	try:
+	data_location = data_dir + 'ds005/sub' + subject
 
-		# Load the data for each run
-		run1 = pd.read_table(data_location+"/behav/task001_run001/behavdata.txt")
-		run2 = pd.read_table(data_location+"/behav/task001_run002/behavdata.txt")
-		run3 = pd.read_table(data_location+"/behav/task001_run003/behavdata.txt")
+	# Load the data for each run
+	run1 = pd.read_table(data_location+"/behav/task001_run001/behavdata.txt")
+	run2 = pd.read_table(data_location+"/behav/task001_run002/behavdata.txt")
+	run3 = pd.read_table(data_location+"/behav/task001_run003/behavdata.txt")
 
-	except IOError:
-
-		print ("Can't find files in such directory! Please enter the directory where you store ds005 dataset!")
-		return
 
 	run_1=run1.append(run2)
 	run_total=run_1.append(run3) #append all the data frames of run
@@ -54,83 +47,9 @@ def load_data(subject, data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
 	return(run_total)
 
 
-"""
-To combine all the behavioral data
-
-Parameters:
-
-	data_dir: The working directory that you store your data
-
-Return:
-	
-	all_data: the behavioral data that contains 16 subjects, each subject has 3 runs
 
 """
-def combine_all_data(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
-
-	# Get all the subjects
-	all_subjects = ['001', '002', '003', '004', '005', '006', '007', 
-	'008', '009', '010', '011', '012', '013', '014', '015', '016']
-
-	all_data = []
-
-	# Loop for 16 subjects to get the data
-	for i in all_subjects:
-		temp = load_data(i, data_dir)
-		
-		# if there's no such dataset, then stop running the loop and return nothing
-		if temp is None:
-			return
-
-		# combine all the data
-		all_data.append(temp)
-
-	# Concat the data
-	all_data = pd.concat(all_data)
-
-	# Calculate the difference for future use
-	all_data['diff'] = all_data['gain'] - all_data['loss']
-
-	# Calculate the ratio for future use
-	all_data['ratio'] = all_data['gain'] / all_data['loss']
-
-
-	all_data['log_gain'] = np.log(all_data['gain'])
-
-	all_data['log_loss'] = np.log(all_data['loss'])
-
-	all_data['log_diff'] = np.log(all_data['diff'])
-
-	all_data['log_ratio'] = np.log(all_data['ratio'])
-
-	# # Remove the rows that have respcat == -1 
-	#(meaning that the individual chooses to not participate the activity)
-	all_data = all_data[np.logical_not(all_data['respcat'] == -1)]
-
-	return (all_data)
-
-
-
-
-def load_each_subject(data_dir = "/Users/macbookpro/Desktop/stat159_Project/"):
-
-	all_subjects = ['001', '002', '003', '004', '005', '006', '007', 
-	'008', '009', '010', '011', '012', '013', '014', '015', '016']
-
-	l = []
-
-	for i in all_subjects:
-		l.append(load_data(i, data_dir))
-
-	for i in range(len(all_subjects)):
-		l[i]['ratio'] = l[i]['gain'] / l[i]['loss']
-
-	return l
-
-
-
-"""
-To combine all the behavioral data
+To perform linear regression
 
 Parameters:
 
@@ -239,20 +158,6 @@ def linear_regression_RT_with_gain_and_loss(data):
 """
 
 
-
-def linear_regression_fast(data, formula):
-	#plt.plot(data['diff'], data['RT'], '+')
-	# It seems like there's no siginificant relationship between diff and Response time
-	# When we do the regression, it shows that difference between gain and loss does not really matter either.
-
-
-	est = smf.ols(formula = formula, data=data).fit()
-	est.summary()
-
-	return est
-
-
-
 def simple_regression_plot(data, dep_var, exp_var):
 	y = data[dep_var]
 	x = data[exp_var]
@@ -274,27 +179,7 @@ def simple_regression_plot(data, dep_var, exp_var):
 
 
 
-def beta_statistics():
-
-	data = load_each_subject()
-
-	betas = [0] * len(data)
-	pvalues = [0] * len(data)
-
-	for i in range(len(data)):
-		betas[i], pvalues[i] = linear_regression(data[i], 'RT', 'gain', 'loss')
-
-	lambdas = []
-
-	for i in range(len(data)):
-		lambdas.append( math.log(betas[i][2] / betas[i][1])    )
-
-	return
-
-def plot_neural_and_behav_loss_aversion(data, beta = None):
-
-	all_subjects = ['001', '002', '003', '004', '005', '006', '007', 
-	'008', '009', '010', '011', '012', '013', '014', '015', '016']
+def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None):
 
 	lambdas = []
 	loss_aversion = []
diff --git a/code/utils/scripts/linear_regression_script.py b/code/utils/scripts/linear_regression_script.py
index fde7320..227229c 100644
--- a/code/utils/scripts/linear_regression_script.py
+++ b/code/utils/scripts/linear_regression_script.py
@@ -5,7 +5,74 @@
 from linear_regression import *
 
 # Get the data
-data = combine_all_data()
+all_subjects= ['001', '002' ,'003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016']
+data_dir = "/Users/macbookpro/Desktop/stat159_Project/"
+
+
+
+########################
+# combine all the data #
+########################
+
+
+all_data = []
+
+# Loop for 16 subjects to get the data
+for i in all_subjects:
+	temp = load_data(i, data_dir)
+	# if there's no such dataset, then stop running the loop and return nothing
+	if temp is None:
+		return
+
+
+
+all_data.append(temp)
+
+# Concat the data
+all_data = pd.concat(all_data)
+
+# Calculate the difference for future use
+all_data['diff'] = all_data['gain'] - all_data['loss']
+
+# Calculate the ratio for future use
+all_data['ratio'] = all_data['gain'] / all_data['loss']
+
+all_data['log_gain'] = np.log(all_data['gain'])
+
+all_data['log_loss'] = np.log(all_data['loss'])
+
+all_data['log_diff'] = np.log(all_data['diff'])
+
+all_data['log_ratio'] = np.log(all_data['ratio'])
+
+# # Remove the rows that have respcat == -1 
+#(meaning that the individual chooses to not participate the activity)
+all_data = all_data[np.logical_not(all_data['respcat'] == -1)]
+
+
+########################
+#  Load each subject   #
+########################
+
+data_each = []
+
+for i in all_subjects:
+	data_each.append(load_data(i, data_dir))
+
+for i in range(len(all_subjects)):
+	data_each[i]['ratio'] = data_each[i]['gain'] / data_each[i]['loss']
+
+
+
+
+
+
+
+#######################
+#  Peform regression  #
+#######################
+
+data = all_data
 
 # Run the linear_regression function to get the summary
 linear_regression(data, 'RT', 'gain', 'loss')
@@ -13,3 +80,5 @@
 linear_regression(data, 'RT', 'ratio')
 
 linear_regression(data, 'RT', 'diff')
+
+

From e73a674f6b438256c9aa06ea4b063dac22da7be4 Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-68.airbears2.1918.berkeley.edu>
Date: Sun, 13 Dec 2015 16:06:45 -0800
Subject: [PATCH 6/7] updated test files

---
 code/utils/functions/linear_regression.py      |  3 ++-
 code/utils/scripts/linear_regression_script.py | 15 +++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py
index 50da141..99bfab3 100644
--- a/code/utils/functions/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -155,7 +155,7 @@ def linear_regression_RT_with_gain_and_loss(data):
 	print 'Coefficient: ' + str(beta[2]), 'p-value: ' + str(pval_loss)
 
 	return
-"""
+
 
 
 def simple_regression_plot(data, dep_var, exp_var):
@@ -176,6 +176,7 @@ def simple_regression_plot(data, dep_var, exp_var):
 	plt.plot(x_vals, y_vals)
 	plt.title('Ratio vs Response Time with predicted line')
 	return
+"""
 
 
 
diff --git a/code/utils/scripts/linear_regression_script.py b/code/utils/scripts/linear_regression_script.py
index 227229c..201b48c 100644
--- a/code/utils/scripts/linear_regression_script.py
+++ b/code/utils/scripts/linear_regression_script.py
@@ -6,8 +6,9 @@
 
 # Get the data
 all_subjects= ['001', '002' ,'003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016']
-data_dir = "/Users/macbookpro/Desktop/stat159_Project/"
-
+# data_dir = "/Users/macbookpro/Desktop/stat159_Project/"
+project_path = '../../../'
+data_dir = project_path+'data/'
 
 
 ########################
@@ -64,10 +65,6 @@
 
 
 
-
-
-
-
 #######################
 #  Peform regression  #
 #######################
@@ -82,3 +79,9 @@
 linear_regression(data, 'RT', 'diff')
 
 
+#######################
+#        Plot         #
+#######################
+
+
+

From 448842adc61713fce735b85c0bfd8a77e9e7e5c5 Mon Sep 17 00:00:00 2001
From: Timothy Yu
 <macbookpro@airbears2-10-142-18-68.airbears2.1918.berkeley.edu>
Date: Sun, 13 Dec 2015 16:11:17 -0800
Subject: [PATCH 7/7] updated lin

---
 code/utils/functions/linear_regression.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/code/utils/functions/linear_regression.py b/code/utils/functions/linear_regression.py
index 99bfab3..ed71510 100644
--- a/code/utils/functions/linear_regression.py
+++ b/code/utils/functions/linear_regression.py
@@ -176,11 +176,14 @@ def simple_regression_plot(data, dep_var, exp_var):
 	plt.plot(x_vals, y_vals)
 	plt.title('Ratio vs Response Time with predicted line')
 	return
-"""
 
 
 
-def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None):
+
+
+	
+
+	def plot_neural_and_behav_loss_aversion(all_subjects, data, beta = None):
 
 	lambdas = []
 	loss_aversion = []
@@ -223,6 +226,11 @@ def my_line(x, B = B):
 	plt.show()
 
 	return
+"""
+
+
+
+