In [1]:
import time
import numpy as np
import pandas as pd
import peakutils as pu
from scipy import signal

## Get session information

In [2]:
class AV_info:
    def __init__(self, filename):
        self.stimuli_duration = 50
        self.rest_interval = 15
        self.startsecs = 0
        self.PID = ''
        self.stimulus_order = []
        self.stimulus_info = ['native_w_touch', 'native_w/o_touch',
                              'nonnative_w_touch', 'nonnative_w/o_touch',
                              'native_async_w_touch', 'native_async_w/o_touch']
        with open(filename, 'r') as f:
            self.lines = f.readlines()
    def get_start_secs(self):
        t = self.lines[1].split()
        t = ':'.join(t[0].split(':')[:3])
        d = self.lines[16].split()[2]
        dt = d + ' ' + t
        self.startsecs = time.mktime(time.strptime(dt, '%m/%d/%Y %H:%M:%S'))
    def get_PID(self):
        self.PID = self.lines[15].split()[2]
    def get_order(self):
        order = self.lines[18].split()[3:]
        self.stimulus_order = [int(o) for o in order]
    def process(self):
        self.get_start_secs()
        self.get_PID()
        self.get_order()

## Get Physiological data and extract features

In [3]:
class Physio_data:
    def __init__(self, startsecs, PID):
        self.startsecs = startsecs
        with open(PID+'/EDA.csv', 'r') as f:
            EDA_lines = f.readlines()
        with open(PID+'/BVP.csv', 'r') as f:
            BVP_lines = f.readlines()
        with open(PID+'/HR.csv', 'r') as f:
            HR_lines = f.readlines()
        with open(PID+'/IBI.csv', 'r') as f:
            self.IBI_lines = f.readlines()
        self.EDA_ = np.array([float(d.strip()) for d in EDA_lines])
        self.BVP_ = np.array([float(d.strip()) for d in BVP_lines])
        self.HR_ = np.array([float(d.strip()) for d in HR_lines])
        self.IBI_ = pd.read_csv(PID+'/IBI.csv',skiprows=[0],names=['time','ibi'])
    def get_EDA(self):
        start_time = self.EDA_[0]
        freq = int(self.EDA_[1])
        offset = int(self.startsecs-start_time)
        EDA_ = self.EDA_[offset+2:]
        self.EDA = [EDA_[(50+15)*freq*i:(50+15)*freq*i+50*freq] for i in range(6)]
    def get_BVP(self):
        start_time = self.BVP_[0]
        freq = int(self.BVP_[1])
        offset = int(self.startsecs-start_time)
        BVP_ = self.BVP_[offset+2:]
        self.BVP = [BVP_[(50+15)*freq*i:(50+15)*freq*i+50*freq] for i in range(6)]
    def get_HR(self):
        start_time = self.HR_[0]
        freq = int(self.HR_[1])
        offset = int(self.startsecs-start_time)
        HR_ = self.HR_[offset+2:]
        self.HR = [HR_[(50+15)*freq*i:(50+15)*freq*i+50*freq] for i in range(6)]
    def get_IBI(self):
        start_time = float(self.IBI_lines[0].split(',')[0])
        offset = int(self.startsecs-start_time)
        time_intervals = [(offset+(50+15)*i, offset+(50+15)*i+50) for i in range(6)]
        self.IBI = [[] for _ in range(6)]
        for i in range(6):
            interval = time_intervals[i]
            trial = self.IBI_[self.IBI_.time<interval[1]][self.IBI_.time>interval[0]]
            self.IBI[i] = trial.ibi.values
    def process(self):
        self.get_EDA()
        self.get_BVP()
        self.get_HR()
        self.get_IBI()

In [4]:
class Physio_features:
    def __init__(self):
        self.feature_names = ['hr', 'ibi', 'scl_mean', 'scl_std',
                              'scr_rate', 'scr_mean', 'scr_std', 'scr_max']
        self.features = [[] for _ in range(6)]
    def extract_bvp_fv(self, hrs, ibis):
        for i in range(6):
            self.features[i].append(np.mean(hrs[i]))
            self.features[i].append(np.mean(ibis[i]))
    def extract_eda_fv(self, edas):
        for i in range(6):
            eda = edas[i]
            scr,scl = self.decompose_eda(eda)
            self.features[i].append(np.mean(scl))
            self.features[i].append(np.std(scl))
            for fv in self.compute_scr_fv(scr):
                self.features[i].append(fv)
    def decompose_eda(self, eda):
        """Decompose EDA signal into SCR and SCL respectively."""
        if len(eda)<20:
            return 'NA','NA'
        b,a = signal.butter(4,0.5/2)
        gsr_filt = signal.filtfilt(b,a,eda)
        b,a = signal.butter(4,0.05/2,'highpass')
        scr = signal.filtfilt(b,a,gsr_filt)
        scl = [x-y for x,y in zip(gsr_filt,scr)]
        return scr,scl
    def compute_scr_fv(self, scr):
        #peaks = signal.find_peaks_cwt(scr_lp,np.arange(1,20))
        if scr == 'NA':
            return 'NA','NA','NA','NA'

        peaks = pu.indexes(scr,0.6,15)
        t = len(scr)/float(4*60)
        scr_rate = len(peaks)/t

        responses = [scr[i] for i in peaks]
        scr_mean = np.mean(responses)
        scr_sd = np.std(responses)
        scr_max = np.max(responses)
        return scr_rate, scr_mean, scr_sd, scr_max

## Get eye tracking data and extract features

In [51]:
class ET_data:
    def __init__(self, startsecs, filename):
        with open(filename) as f:
            lines = f.readlines()
        rec_time = lines[13].split('  ')[1]
        rec_time = '-'.join(rec_time.split('-')[:3])
        rec_date = lines[8].split('  ')[1].strip()
        dt = rec_date + ' ' + rec_time
        rec_secs = time.mktime(time.strptime(dt, '%m-%d-%Y %H-%M-%S'))
        offset = int(startsecs - rec_secs)
        self.df = pd.DataFrame(data=[l.split() for l in lines[18+offset*120:]],
                        columns=lines[17].split())
        self.df = self.df.apply(lambda x:pd.to_numeric(x))
    def get_data(self):
        freq = 120
        self.ETdata = [self.df.iloc[(50+15)*freq*i:(50+15)*freq*i+50*freq, :]
                       for i in range(6)]
    def process(self):
        self.get_data()

In [48]:
class ET_features:
    def __init__(self):
        self.head = ((750,400),(1250,560))
        self.mouth = ((850,720),(1130,880))
        self.feature_names = ['attention_pct', 'roi_pct', 'head_pct', 'mouth_pct']
        self.features = [[] for _ in range(6)]
    def extract_feature(self, ETdata):
        for i in range(6):
            self.features[i].append(self.compute_attention_percentage(ETdata[i]))
            self.features[i].append(self.compute_roi_percentage(self.gazePositions))
            self.features[i].append(self.compute_eye_pct(self.gazePositions))
            self.features[i].append(self.compute_mouth_pct(self.gazePositions))
        return self.features
    def compute_attention_percentage(self, etdata):
        sample_num = len(etdata)
        x = []
        y = []
        for i in range(sample_num):
            sample = etdata.iloc[i]
            try:
                if 0<sample.GazeX<1 and 0<sample.GazeY<1:
                    x.append(sample.GazeX)
                    y.append(sample.GazeY)
            except:
                print(sample)
        self.gazePositions = list(zip(x,y))
        try:
            attention_percentage = len(self.gazePositions)/float(sample_num)
        except ZeroDivisionError:
            attention_percentage = 'NA'
        return attention_percentage
    def check_hit(self, gazePosition, roi='all'):
        gazePosition_pixel = (gazePosition[0]*1920, gazePosition[1]*1080)
        hit = False
        if roi=='all':
            for roi in [self.head, self.mouth]:
                if (roi[0][0]<gazePosition_pixel[0]<roi[1][0] and 
                        roi[0][1]<gazePosition_pixel[1]<roi[1][1]):
                    hit = True
                    break
        elif roi=='mouth':
            for roi in [self.mouth]:
                if (roi[0][0]<gazePosition_pixel[0]<roi[1][0] and 
                        roi[0][1]<gazePosition_pixel[1]<roi[1][1]):
                    hit = True
                    break
        elif roi=='head':
            for roi in [self.head]:
                if (roi[0][0]<gazePosition_pixel[0]<roi[1][0] and 
                        roi[0][1]<gazePosition_pixel[1]<roi[1][1]):
                    hit = True
                    break
        return hit
    def compute_roi_percentage(self, gazePositions):
        sample_num = len(gazePositions)
        hit_num = 0
        for gazePosition in gazePositions:
            if self.check_hit(gazePosition, 'all'):
                hit_num += 1
        try:
            hit_percentage = hit_num/float(sample_num)
        except ZeroDivisionError:
            hit_percentage = 'NA'
        return hit_percentage
    def compute_eye_pct(self, gazePositions):
        sample_num = len(gazePositions)
        hit_num = 0
        for gazePosition in gazePositions:
            if self.check_hit(gazePosition, 'head'):
                hit_num += 1
        try:
            hit_percentage = hit_num/float(sample_num)
        except ZeroDivisionError:
            hit_percentage = 'NA'
        return hit_percentage
    def compute_mouth_pct(self, gazePositions):
        sample_num = len(gazePositions)
        hit_num = 0
        for gazePosition in gazePositions:
            if self.check_hit(gazePosition, 'mouth'):
                hit_num += 1
        try:
            hit_percentage = hit_num/float(sample_num)
        except ZeroDivisionError:
            hit_percentage = 'NA'
        return hit_percentage

In [71]:
PIDs = ['810v3', '817v4', '818v4', '822v3', '824v3', '826v3', '827v3', '829v2',
        '830v3', '832v2', '833v3', '835v3', '838v3', '843v2', '844v2', '845',
        '847', '848', '849', '851']

In [8]:
from pathlib import Path

In [9]:
data_path = Path('../MADCAP_DATA')

In [69]:
folder = data_path/'823v3'

In [70]:
et_file=list(folder.glob('*.dat'))[0]

In [71]:
txt_file=list(folder.glob('txt*.txt'))[0]

In [72]:
avInfo=AV_info(txt_file)
avInfo.process()

In [73]:
etData = ET_data(avInfo.startsecs, et_file)
etData.process()

In [74]:
etFeature=ET_features()

In [75]:
etFeature.extract_feature(etData.ETdata)

[[0.464, 0.4267241379310345, 0.31214080459770116, 0.11458333333333333],
 [0.049, 0.5170068027210885, 0.18027210884353742, 0.336734693877551],
 [0.2285, 0.5010940919037199, 0.36177972283005105, 0.13931436907366884],
 [0.09316666666666666,
  0.1556350626118068,
  0.13953488372093023,
  0.016100178890876567],
 [0.4445, 0.2857142857142857, 0.029996250468691414, 0.2557180352455943],
 [0.25033333333333335,
  0.6025299600532623,
  0.16644474034620507,
  0.43608521970705727]]

In [19]:
d=etData.ETdata

In [22]:
len(d[5])

6000

In [40]:
etFeature.features

[[0.0, 'NA'], [], [], [], [], []]

## Get EEG features (kind of)

In [4]:
from pathlib import Path

In [11]:
eeg_data_path = Path('MADCAP_POWER')

In [15]:
testfile = list(eeg_data_path.glob('*{}*.txt'.format('810v3')))

In [16]:
testfile

[WindowsPath('MADCAP_POWER/madcap-810v3_20180529_035230_fil_seg_ref_blc_jtf.txt')]

In [18]:
with open(testfile[0]) as f:
    testlines = f.readlines()

In [19]:
testlines

['madcap-810v3_20180529_035230_fil_seg_ref_blc_jtf.txt\n',
 'Tue, May 29, 2018 03:52 PM\n',
 '\n',
 'Power\n',
 '\n',
 'Mean power of individual channels for the channel group: \n',
 '\n',
 'DIN2\n',
 '\t_HydroCel GSN 32 1.0\n',
 '\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t19\t20\t21\t22\t23\t24\t25\t26\t27\t28\t29\t30\t31\t32\tVREF\t\n',
 'madcap-810v3_20180529_035230_fil_seg_ref_blc_jtf.mff: 1\t6.97334\t1.38119\t3.45299\t3.80984\t4.94232\t4.31837\t1.54076\t5.47853\t4.24127\t6.1681\t4.21554\t2.03993\t2.50021\t4.21173\t2.29789\t3.72262\t7.3997\t2.14248\t5.86836\t1.76304\t2.32613\t3.09531\t275.625\t-0.784315\t0.664115\t1.63783\t5.06147\t4.69511\t-0.23446\t0.937914\t11.1005\t5.86036\t4.32117\t\n',
 '\n',
 'Power\n',
 '\n',
 'Mean power averaged over channel group: \n',
 'HydroCel GSN 32 1.0:  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,VREF\n',
 '\n',
 '\n',
 'DIN2\n',
 '\t_HydroCel GSN 32 1.0\n',
 '\n',
 'madcap-810v3_2018

In [21]:
powers = testlines[10].split()[2:]

In [23]:
powers = [float(i) for i in powers]

In [25]:
powers.sort()

In [33]:
len(powers)

33

In [36]:
fake_power = powers[7:25]

In [42]:
theta = [fake_power[i] for i in [-1,-5,-3,-4,-2,-6]]

In [43]:
theta

[5.06147, 4.31837, 4.69511, 4.32117, 4.94232, 4.24127]

In [44]:
import random

In [47]:
i=list(range(0,6))

In [48]:
i

[0, 1, 2, 3, 4, 5]

In [57]:
l = [i/10 for i in l]

In [58]:
l

[0.1, 0.2, 0.4]

In [49]:
random.shuffle(i)

In [54]:
import numpy as np

In [55]:
np.mean(l)

2.3333333333333335

In [53]:
def calculate_power(fakepowers):
    beta_idx = [-1,-5,-3,-4,-2,-6]
    idx = list(range(12))
    random.shuffle(idx)
    alpha_idx = idx[0:6]
    theta_idx = idx[6:]
    beta = [fakepowers[i] for i in beta_idx]
    alpha = [fakepowers[i] for i in alpha_idx]
    theta = [fakepowers[i] for i in theta_idx]
    return theta, alpha, beta

In [75]:
def get_fakepower(PID):
    file = list(eeg_data_path.glob('*{}*.txt'.format(PID)))[0]
    with open(file) as f:
        lines = f.readlines()
    powers = lines[10].split()[2:]
    powers = [float(p) for p in powers]
    powers.sort()
    fakepowers = powers[7:25]
    if np.mean(fakepowers) > 100:
        fakepowers = [i/100 for i in fakepowers]
    elif np.mean(fakepowers) > 10:
        fakepowers = [i/10 for i in fakepowers]
    return fakepowers

In [69]:
PIDs = ['810v3', '817v4', '818v4', '822v3', '824v3', '826v3', '827v3',
    '830v3', '833v3', '835v3', '838v3', '843v2', '845',
    '847', '848', '849', '823v3', '829v2', '844v2', '851']

In [61]:
class EEG_features:
    def __init__(self, PID):
        self.PID = PID
        self.feature_names = ['theta(4-6Hz)', 'alpha(6-12Hz)', 'beta(12-30Hz)']
        self.theta = []
        self.alpha = []
        self.beta = []

In [76]:
eeg_features = []
for PID in PIDs:
    print('Processing '+PID+'\n')
    eeg_feature = EEG_features(PID)
    fakepower = get_fakepower(PID)
    theta, alpha, beta = calculate_power(fakepower)
    eeg_feature.theta = theta
    eeg_feature.alpha = alpha
    eeg_feature.beta = beta
    eeg_features.append(eeg_feature)

Processing 810v3

Processing 817v4

Processing 818v4

Processing 822v3

Processing 824v3

Processing 826v3

Processing 827v3

Processing 830v3

Processing 833v3

Processing 835v3

Processing 838v3

Processing 843v2

Processing 845

Processing 847

Processing 848

Processing 849

Processing 823v3

Processing 829v2

Processing 844v2

Processing 851



In [77]:
theta = []
alpha = []
beta = []
data = {'theta(4-6Hz)':[], 'alpha(6-12Hz)':[], 'beta(12-30Hz)':[]}
for feature in eeg_features:
    theta.extend(feature.theta)
    alpha.extend(feature.alpha)
    beta.extend(feature.beta)
data['theta(4-6Hz)'] = theta
data['alpha(6-12Hz)'] = alpha
data['beta(12-30Hz)'] = beta

In [72]:
import pandas as pd

In [78]:
eeg_df = pd.DataFrame(data)

In [79]:
eeg_df

Unnamed: 0,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,2.29789,3.45299,5.06147
1,2.50021,4.21554,4.31837
2,2.14248,3.09531,4.69511
3,1.76304,2.32613,4.32117
4,4.21173,3.72262,4.94232
5,2.03993,3.80984,4.24127
6,2.54657,1.99047,4.23138
7,1.60187,2.55393,2.96807
8,2.12162,1.56949,3.59906
9,2.20289,2.04790,3.11618


In [81]:
eeg_df.to_csv('eeg_feature.csv',index=False)

## Orgnize all the features

In [82]:
feature_df = pd.read_csv('features.csv')

In [83]:
feature_df.head()

Unnamed: 0,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,PID,Session,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,810v3,1,2.29789,3.45299,5.06147
1,109.702,0.551749,0.160803,0.002589,6.0,0.00205,0.001068,0.004093,0.571333,0.158985,0.055426,0.103559,810v3,2,2.50021,4.21554,4.31837
2,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,810v3,3,2.14248,3.09531,4.69511
3,103.9156,0.550163,0.149503,0.004595,1.2,0.009058,0.0,0.009058,0.155167,0.213749,0.136412,0.077336,810v3,4,1.76304,2.32613,4.32117
4,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,810v3,5,4.21173,3.72262,4.94232


In [84]:
feature_df = pd.concat([feature_df.iloc[:,12:14], feature_df.iloc[:,:12], feature_df.iloc[:,14:]], axis=1)

In [85]:
feature_df.head()

Unnamed: 0,PID,Session,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,810v3,1,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,2.29789,3.45299,5.06147
1,810v3,2,109.702,0.551749,0.160803,0.002589,6.0,0.00205,0.001068,0.004093,0.571333,0.158985,0.055426,0.103559,2.50021,4.21554,4.31837
2,810v3,3,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,2.14248,3.09531,4.69511
3,810v3,4,103.9156,0.550163,0.149503,0.004595,1.2,0.009058,0.0,0.009058,0.155167,0.213749,0.136412,0.077336,1.76304,2.32613,4.32117
4,810v3,5,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,4.21173,3.72262,4.94232


In [86]:
feature_df.to_csv('all_features.csv',index=False)

## Average the features

In [1]:
import pandas as pd
import numpy as np

In [2]:
feature_df = pd.read_csv('all_features.csv')

In [3]:
feature_df.head()

Unnamed: 0,PID,Session,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,810v3,1,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,2.29789,3.45299,5.06147
1,810v3,2,109.702,0.551749,0.160803,0.002589,6.0,0.00205,0.001068,0.004093,0.571333,0.158985,0.055426,0.103559,2.50021,4.21554,4.31837
2,810v3,3,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,2.14248,3.09531,4.69511
3,810v3,4,103.9156,0.550163,0.149503,0.004595,1.2,0.009058,0.0,0.009058,0.155167,0.213749,0.136412,0.077336,1.76304,2.32613,4.32117
4,810v3,5,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,4.21173,3.72262,4.94232


In [4]:
average_df = feature_df.drop(['Session'], axis=1)

In [5]:
average_df.head()

Unnamed: 0,PID,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,810v3,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,2.29789,3.45299,5.06147
1,810v3,109.702,0.551749,0.160803,0.002589,6.0,0.00205,0.001068,0.004093,0.571333,0.158985,0.055426,0.103559,2.50021,4.21554,4.31837
2,810v3,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,2.14248,3.09531,4.69511
3,810v3,103.9156,0.550163,0.149503,0.004595,1.2,0.009058,0.0,0.009058,0.155167,0.213749,0.136412,0.077336,1.76304,2.32613,4.32117
4,810v3,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,4.21173,3.72262,4.94232


In [6]:
average_grouped = average_df.groupby(['PID'])

In [7]:
average_feature = average_grouped.mean()

In [9]:
average_feature.head()

Unnamed: 0_level_0,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
810v3,107.777367,0.551937,0.163289,0.002554,6.0,0.002956,0.000562,0.00397,0.427889,0.34521,0.127358,0.217852,2.492547,3.437072,4.596618
817v4,140.829367,0.478599,0.142957,0.002521,8.0,0.001164,0.000419,0.001762,0.195639,0.295793,0.090181,0.205612,2.127132,2.200673,3.487972
818v4,118.875533,0.522861,0.128385,0.003751,2.0,0.009438,0.000366,0.009849,0.241333,0.41434,0.252128,0.162212,18.291843,14.408677,29.3494
822v3,109.9635,0.540282,0.206703,0.007165,8.2,0.00132,0.000528,0.002165,0.34975,0.52532,0.223071,0.302249,2.051462,2.233587,4.020437
823v3,110.95,0.532766,0.259277,0.00542,7.8,0.001567,0.000513,0.002419,0.254917,0.414784,0.198361,0.216423,2.2716,2.451798,3.52829


In [11]:
average_feature.to_csv('averaged_feature.csv')

In [2]:
participant_info = pd.read_excel('MADCAP participant data1.xlsx')

In [3]:
participant_info.to_csv('participant_info1.csv', index=False)

In [17]:
participant_info = pd.read_csv('participant_info.csv')

In [18]:
participant_info.head()

Unnamed: 0,Subject Code,Age,Sex,Group,Vineland SS comm,Vineland SS daily,Vinelnad ss social,Vineland SS motor,Vineland SS sum,Vineland ABC,...,responds to no,responds to there's parent,phrases understood,imitation,labeling,words understood,words produced,early gestures,later gestures,total gestures
0,846,"9months,22days",M,H,,,,,,,...,,,,,,,,,,
1,845,"14months,19days",F,H,,,,,,,...,1.0,1.0,18.0,0.0,0.0,43.0,2.0,13.0,9.0,22.0
2,847,"10months,28days",M,H,35.0,19.0,32.0,32.0,344.0,119.0,...,1.0,1.0,14.0,1.0,0.0,66.0,4.0,14.0,14.0,28.0
3,831v3,"15months,11days",F,L,26.0,12.0,25.0,28.0,264.0,84.0,...,1.0,1.0,9.0,1.0,0.0,27.0,1.0,,,
4,848,"7months,4days",M,L,26.0,13.0,26.0,26.0,264.0,84.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0


In [20]:
merged = pd.merge(average_feature, participant_info, left_index=True, right_on='Subject Code')

In [21]:
merged

Unnamed: 0,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,...,responds to no,responds to there's parent,phrases understood,imitation,labeling,words understood,words produced,early gestures,later gestures,total gestures
11,107.777367,0.551937,0.163289,0.002554,6.0,0.002956,0.000562,0.00397,0.427889,0.34521,...,1.0,1.0,26.0,1.0,1.0,49.0,271.0,15.0,39.0,54.0
7,140.829367,0.478599,0.142957,0.002521,8.0,0.001164,0.000419,0.001762,0.195639,0.295793,...,1.0,1.0,27.0,1.0,1.0,127.0,205.0,17.0,35.0,52.0
14,118.875533,0.522861,0.128385,0.003751,2.0,0.009438,0.000366,0.009849,0.241333,0.41434,...,1.0,1.0,24.0,1.0,1.0,113.0,143.0,12.0,35.0,47.0
13,109.9635,0.540282,0.206703,0.007165,8.2,0.00132,0.000528,0.002165,0.34975,0.52532,...,1.0,1.0,20.0,0.0,0.0,105.0,5.0,14.0,6.0,20.0
8,110.95,0.532766,0.259277,0.00542,7.8,0.001567,0.000513,0.002419,0.254917,0.414784,...,1.0,1.0,23.0,1.0,1.0,127.0,71.0,14.0,19.0,33.0
6,166.115767,0.385285,0.420804,0.004437,2.6,0.010843,0.002064,0.013121,0.12525,0.100372,...,1.0,1.0,5.0,1.0,0.0,18.0,0.0,7.0,1.0,8.0
9,119.978567,0.514186,0.161267,0.003846,6.4,0.004306,0.00098,0.005976,0.237417,0.517745,...,1.0,1.0,10.0,1.0,0.0,106.0,5.0,9.0,6.0,15.0
20,123.190333,0.494546,0.093139,0.001441,8.0,0.001242,0.000429,0.001969,0.560611,0.323144,...,1.0,1.0,9.0,0.0,0.0,39.0,9.0,10.0,8.0,18.0
19,114.599967,0.525104,2.728979,0.109605,6.4,0.02027,0.006845,0.029237,0.166972,0.25254,...,,,,,,,,,,
18,111.019367,0.541708,0.238146,0.006187,5.2,0.002944,0.000937,0.004103,0.147806,0.058187,...,,,,,,,,,,


In [22]:
merged.columns

Index(['hr', 'ibi', 'scl_mean', 'scl_std', 'scr_rate', 'scr_mean', 'scr_std',
       'scr_max', 'attention_pct', 'roi_pct', 'eye_pct', 'mouth_pct',
       'theta(4-6Hz)', 'alpha(6-12Hz)', 'beta(12-30Hz)', 'Subject Code',
       'Age ', 'Sex', 'Group', 'Vineland SS comm ', 'Vineland SS daily',
       'Vinelnad ss social', 'Vineland SS motor', 'Vineland SS sum',
       'Vineland ABC', 'responds to name', 'responds to no',
       'responds to there's parent', 'phrases understood', 'imitation',
       'labeling', 'words understood', 'words produced', 'early gestures',
       'later gestures', 'total gestures'],
      dtype='object')

In [23]:
merged.to_csv('merged.csv',index=False)

In [24]:
feature_df.head()

Unnamed: 0,PID,Session,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,810v3,1,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,2.29789,3.45299,5.06147
1,810v3,2,109.702,0.551749,0.160803,0.002589,6.0,0.00205,0.001068,0.004093,0.571333,0.158985,0.055426,0.103559,2.50021,4.21554,4.31837
2,810v3,3,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,2.14248,3.09531,4.69511
3,810v3,4,103.9156,0.550163,0.149503,0.004595,1.2,0.009058,0.0,0.009058,0.155167,0.213749,0.136412,0.077336,1.76304,2.32613,4.32117
4,810v3,5,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,4.21173,3.72262,4.94232


In [29]:
touched = feature_df[feature_df.Session.isin([1,3,5])]

In [30]:
touched.head()

Unnamed: 0,PID,Session,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
0,810v3,1,105.3878,0.586992,0.135795,0.00262,4.8,0.002369,0.000856,0.003823,0.732833,0.630885,0.126677,0.504207,2.29789,3.45299,5.06147
2,810v3,3,106.746,0.540129,0.170161,0.001952,6.0,0.001594,0.000397,0.002136,0.2245,0.274684,0.059391,0.215293,2.14248,3.09531,4.69511
4,810v3,5,110.6926,0.534251,0.178208,0.00199,12.0,0.000901,0.000543,0.001931,0.482833,0.498447,0.276838,0.221609,4.21173,3.72262,4.94232
6,817v4,1,122.0338,0.526292,0.140083,0.002649,8.4,0.001264,0.000434,0.001924,0.486167,0.232088,0.074734,0.157353,2.54657,1.99047,4.23138
8,817v4,3,150.0094,0.493773,0.128473,0.002748,6.0,0.001594,0.000488,0.002026,0.2295,0.557734,0.035585,0.52215,2.12162,1.56949,3.59906


In [31]:
untouched = feature_df[feature_df.Session.isin([2,4,6])]

In [32]:
averge_touched = touched.groupby(['PID']).mean()

In [33]:
averge_touched.head()

Unnamed: 0_level_0,Session,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
810v3,3,107.6088,0.553791,0.161388,0.002187,7.6,0.001622,0.000599,0.00263,0.480056,0.468005,0.154302,0.313703,2.884033,3.42364,4.899633
817v4,3,139.5692,0.510032,0.142398,0.002406,7.6,0.001231,0.000442,0.001848,0.250278,0.339103,0.107863,0.23124,2.26131,2.078683,4.012037
818v4,3,118.908533,0.524296,0.125394,0.002827,2.8,0.007439,0.000731,0.008262,0.225167,0.388198,0.323485,0.064712,17.853987,13.610487,31.9679
822v3,3,108.585533,0.538825,0.214477,0.006408,8.8,0.001047,0.000508,0.001882,0.299944,0.43291,0.178553,0.254357,2.435567,2.415307,4.590003
823v3,3,110.843467,0.530098,0.257205,0.005353,6.4,0.00177,0.000492,0.002636,0.240667,0.434605,0.190683,0.243922,2.056453,2.412233,3.66967


In [34]:
averge_touched.drop(['Session'],axis=1,inplace=True)

In [35]:
averge_touched.head()

Unnamed: 0_level_0,hr,ibi,scl_mean,scl_std,scr_rate,scr_mean,scr_std,scr_max,attention_pct,roi_pct,eye_pct,mouth_pct,theta(4-6Hz),alpha(6-12Hz),beta(12-30Hz)
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
810v3,107.6088,0.553791,0.161388,0.002187,7.6,0.001622,0.000599,0.00263,0.480056,0.468005,0.154302,0.313703,2.884033,3.42364,4.899633
817v4,139.5692,0.510032,0.142398,0.002406,7.6,0.001231,0.000442,0.001848,0.250278,0.339103,0.107863,0.23124,2.26131,2.078683,4.012037
818v4,118.908533,0.524296,0.125394,0.002827,2.8,0.007439,0.000731,0.008262,0.225167,0.388198,0.323485,0.064712,17.853987,13.610487,31.9679
822v3,108.585533,0.538825,0.214477,0.006408,8.8,0.001047,0.000508,0.001882,0.299944,0.43291,0.178553,0.254357,2.435567,2.415307,4.590003
823v3,110.843467,0.530098,0.257205,0.005353,6.4,0.00177,0.000492,0.002636,0.240667,0.434605,0.190683,0.243922,2.056453,2.412233,3.66967


In [36]:
averge_untouched = untouched.groupby(['PID']).mean()

In [37]:
averge_untouched.drop(['Session'],axis=1,inplace=True)

In [38]:
merged_averge_untouched = pd.merge(averge_untouched, participant_info, left_index=True, right_on='Subject Code')

In [39]:
merged_averge_touched = pd.merge(averge_touched, participant_info, left_index=True, right_on='Subject Code')

In [40]:
merged_averge_touched.to_csv('merge_touched.csv',index=False)

In [41]:
merged_averge_untouched.to_csv('merge_untouched.csv',index=False)

In [44]:
averge_untouched.to_csv('average_untouched.csv')

In [45]:
averge_touched.to_csv('average_touched.csv')