In [45]:
import numpy as np
import pandas as pd
import os

In [57]:
# test
files = os.listdir('../../../Data/0_hvh/Raw/')
print(files[2])
column_names = ['idx', 'gi', 'mi', 'status', 's1', 'color', 'response', 'bp', 'wp', 'rt', 'time', 'ip']
keepers = ['subject', 'color', 'bp', 'wp', 'response', 'rt']
data = pd.read_csv('../../../Data/0_hvh/Raw/' + files[2], names=column_names)
data = data.loc[data.status.isin(['in progress', 'win', 'draw']), :]
smap = dict(zip(data.ip.unique(), (0, 1)))
data.loc[:, 'subject'] = data.ip.map(smap)
data.loc[:, 'bl'] = [len(data.loc[i, 'bp'] + data.loc[i, 'wp']) for i in data.index.values]
valids = (data.response < 36) & (data.bl < 72)
data = data.loc[valids, keepers]

AG_SQ_040814_RAW.csv


In [133]:
class Aggregator():
    def __init__(self, directory):
        self.get_npiece = lambda p: np.array(list(p)).astype(int).sum()
        self.directory = directory
        self.column_names = ['idx', 'gi', 'mi', 'status', 's1', 'color', 'response', 'bp', 'wp', 'rt', 'time', 'ip']
        self.keepers = ['gi', 'color', 'bp', 'wp', 'response', 'rt', 'time', 'subject']
        self.model_keepers = ['subject', 'color', 'bp', 'wp', 'response', 'rt']
        
        self.files = self.get_files(self.directory)
        self.data = self.load_data()
        
    def get_files(self, directory):
        fs = os.listdir(directory)
        fs = [f for f in fs if f[-3:] == 'csv']
        return fs
    
    def load_file(self, fname):
        data = pd.read_csv(self.directory + fname, names=self.column_names)
        data = data.loc[data.status.isin(['in progress', 'win', 'draw']), :]
        smap = dict(zip(data.ip.unique(), (0, 1)))
        data.loc[:, 'subject'] = data.ip.map(smap)
        data.loc[:, 'bl'] = [len(data.loc[i, 'bp'] + data.loc[i, 'wp']) for i in data.index.values]
        return data.loc[(data.response < 36) & (data.bl <= 72), self.keepers]
        
    def load_data(self):
        D = []
        for i, f in enumerate(self.files):
            d = self.load_file(f)
            d.loc[:, 'subject'] += 2*i
            D.append(d)
        return pd.concat(D).reset_index(drop=True)
    
    def format_data(self):

        self.data.loc[:, 'color'] = self.data.color.map({'B':0, 'W':1})
        
        def correct_position(i, c):
            p, r = self.data.loc[i, [c, 'response']]
            p = list(p)
            p[r] = '0'
            return ''.join(p)
        
        for i in self.data.loc[self.data.color==0, :].index.values:
            self.data.loc[i, 'bp'] = correct_position(i, 'bp')
        for i in self.data.loc[self.data.color==1, :].index.values:
            self.data.loc[i, 'wp'] = correct_position(i, 'wp')
            
        self.data.loc[:, 'pdif'] = self.data.bp.map(self.get_npiece) - self.data.wp.map(self.get_npiece)
        self.data = self.data.loc[self.data.color == self.data.pdif, self.keepers].reset_index(drop=True)
        

In [136]:
data = Aggregator('../../../Data/0_hvh/Raw/')
data.format_data()

data.data[data.model_keepers].to_csv('../../../Data/0_hvh/Clean/_summaries/model_input.csv', index=False, header=False, sep=',')