# Hypothesis Testing on Subject Performance (Two-Feature)
-----
This notebook refers to the work in the "two-feature" manuscript (under review, Jan 2016). This is to test the null hypothesis on subject performance from Experiments 1, 2, 3 and 5, which is subject performance is not affected by change magnitude or conditions.

In [3]:
import fnmatch # file name matching
import os
from scipy.io import loadmat

from pandas import DataFrame, Series
import pandas as pd
import scipy as sp
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

# 1. Load Data
## Experiment 1: Condition B vs. Condition C
### _N_=4
- Condition B: orientation and color both are relevant
- Condition C orientation: only orientation is present, and relevant
- Condition C color: only color is present, and relevant

In [71]:
# set file path
pathname = 'two_feature_data/Exp1_4/'
subject_names = ['AAR','HS0','JSK','KL0','LLT','RRS','WK0','WR0']
exp_ids = [3,4,8,9] # 3: orientation (1F), 4: color (1F), 8 and 9: 2F
exp_code = 2

In [163]:
# Create a DataFrame from mat files by looping over subjects (subject_names)
def concat_matfiles(exp_code,exp_ids,pathname,subject_names,exp_num=0):
    file_names = os.listdir(pathname)
    
    df = DataFrame()
    for exp_id in exp_ids:
        temp2 = DataFrame()
        for subject_name in subject_names:

            # Create a tag to find files based on a subject name
            if exp_num == 0:
                subject_tag = subject_name + str(exp_code) + '_' + str(exp_id) + '_'
            else:
                subject_tag = subject_name + str(exp_code) + '_' + str(exp_code) + '_' + str(exp_id) + '_'
            subject_files = fnmatch.filter(file_names, subject_tag + '*.mat') # Filter the files

            # Loop over filtered files
            temp1 = DataFrame()
            for subject_file in subject_files:

                # Convert a MATLAB matrix to a DataFrame
                temp0 = DataFrame(loadmat(pathname+subject_file)['datamatrix'])
                temp1 = pd.concat([temp1,temp0],ignore_index=True) # Concatenate DataFrames

            # Create a column to assign subject names
            temp1['Name'] = DataFrame([subject_name]*len(temp1))
            temp2 = pd.concat([temp2,temp1],ignore_index=True) # Concatenate

            # Create a column to assign experiment index
            if exp_id == 3:
                feat_name = 'Ori'
            elif exp_id == 4:
                feat_name = 'Col'
            elif exp_id == 5:
                feat_name = 'Ori_all'
            elif exp_id == 6:
                feat_name = 'Col_all'
            elif exp_code == 6 and exp_id == 1:
                feat_name = 'Ori_one'
            elif exp_code == 6 and exp_id == 2:
                feat_name = 'Col_one'
                
            temp2['Feature'] = DataFrame([feat_name]*len(temp2))
                
        # Concatenate all subject data to create a single DataFrame            
        df = pd.concat([df,temp2],ignore_index=True)
    return df

In [164]:
df = concat_matfiles(exp_code,exp_ids,pathname,subject_names)
df1 = df[['Name','Feature',1,2,8]]
df1.columns = ['Subject','Feature','Delta_Ori','Delta_Col','Correct']

In [74]:
def remap_colors(old_delta):
    if old_delta <= 180:
        new_delta = old_delta
    else:
        new_delta = 360 - old_delta
    new_delta = new_delta/2.0
    return new_delta

In [165]:
df1.Delta_Col = df1.Delta_Col.apply((lambda x: remap_colors(x)))
df1.describe()

Unnamed: 0,Delta_Ori,Delta_Col,Correct
count,19200.0,19200.0,19200.0
mean,-0.359018,22.242943,0.35125
std,36.911073,28.901904,0.477373
min,-90.0,0.0,0.0
25%,-0.752089,0.0,0.0
50%,0.0,0.0,0.0
75%,0.250696,44.0,1.0
max,90.0,90.0,1.0


## Experiment 2: Condition B vs. Condition A
- Condition B: orientation and color both are relevant
- Condition A orientation: both features are present, but orientation is relevant
- Condition A color: both features are present, but color is relevant

### _N_=8

In [166]:
pathname = 'two_feature_data/Exp1_8/'
subject_names = ['AAR','HJK','HS0','JSK','JYP','KL0','MBC','WK0']
exp_ids = [3,4,8,9] # 3: orientation (1F), 4: color (1F), 8 and 9: 2F
exp_code = 4

In [167]:
df = concat_matfiles(exp_code,exp_ids,pathname,subject_names)
df2 = df[['Name','Feature',1,2,8]]
df2.columns = ['Subject','Feature','Delta_Ori','Delta_Col','Correct']

In [168]:
df2.Delta_Col = df2.Delta_Col.apply((lambda x: remap_colors(x)))
df2.describe()

Unnamed: 0,Delta_Ori,Delta_Col,Correct
count,19200.0,19200.0,19200.0
mean,-0.359018,22.242943,0.35125
std,36.911073,28.901904,0.477373
min,-90.0,0.0,0.0
25%,-0.752089,0.0,0.0
50%,0.0,0.0,0.0
75%,0.250696,44.0,1.0
max,90.0,90.0,1.0


# Experiment 3: Condition A vs. Condition C vs. Condition D
- Condition A: one relevant feature ('two_feature_data/Exp1_4/')
- Condition C: one relevant feature, one changing irrelevant feature ('two_feature_data/Exp3_one_change')
- Condition D: one relevant feature, all changing irrelevant feature ('two_feature_data/Exp3_all_change')

In [169]:
pathnameC = 'one_change_lab/'
pathnameAD = 'two_feature_data/Exp3_all_change/' # this contains both Conditions A and D
subject_namesC = ['HJK','HS0','LLT','WK0','WZ0']
subject_namesAD = ['HJK','HS0','JSK','LLT','LN0','MBC','WK0','WZ0']

# there are only 3 subjects who did all experiments
common_subjects = set(subject_namesC) & set(subject_namesAD)
print common_subjects

set(['WK0', 'HS0', 'HJK', 'WZ0', 'LLT'])


In [170]:
exp_idsAD = [3,4,5,6] # 5: orientation + irrelevant color, 6: color + irrelevant orientation
exp_idsC = [1,2] # 1: orientation, 2: color
exp_codeAD = 3
exp_codeC = 6

In [174]:
dfAD = concat_matfiles(exp_codeAD,exp_idsAD,pathnameAD,common_subjects)
dfC = concat_matfiles(exp_codeC,exp_idsC,pathnameC,common_subjects,exp_num=1)
df = pd.concat([dfAD,dfC],ignore_index=True)
df3 = df[['Name','Feature',1,2,8]]
df3.columns = ['Subject','Feature','Delta_Ori','Delta_Col','Correct']

In [178]:
df3.Delta_Col = df3.Delta_Col.apply((lambda x: remap_colors(x)))
df3.describe()

Unnamed: 0,Delta_Ori,Delta_Col,Correct
count,18000.0,18000.0,18000.0
mean,-0.576351,30.277167,0.6875
std,42.391533,30.165285,0.463525
min,-90.0,0.0,0.0
25%,-23.816156,0.0,0.0
50%,0.0,23.0,1.0
75%,21.309192,57.0,1.0
max,90.0,90.0,1.0
