In [None]:
import pandas as pd
import os

data_dir = os.path.expanduser('~/Google Drive/Bas Zahy Gianni - Games/Data/4_rcn')

trained_files = [f for f in os.listdir(os.path.join(data_dir, 'Trained_Game')) if f[-3:]=='csv']
untrained_files = [f for f in os.listdir(os.path.join(data_dir, 'Untrained_Game')) if f[-3:]=='csv']

In [None]:
def reconstruct_boards(grouped):
    """Reconstructs a trial using the move/color records"""
    bp, wp = ('0'*36, ) * 2
    
    new_pos = []
    for i in grouped.index.values:
        row = grouped.loc[i]
        color, zet = row[['color', 'zet']]
        
        if color == 0:
            p = bp
            op = wp
        else:
            p = wp
            op = bp
            
        newp = list(p)
        newop = list(op)
        
        if newp[zet] == '1':
            newp[zet] = '0'
        elif newop[zet] == '1':
            newop[zet] = '0'
            newp[zet] = '1'
        else:
            newp[zet] = '1'
            
        if color == 0:
            bp = ''.join(newp)
            wp = ''.join(newop)
        else:
            wp = ''.join(newp)
            bp = ''.join(newop)
            
        new_pos.append((bp, wp))
        
    return pd.DataFrame(new_pos, columns=['bp', 'wp'])

def clean_reconstruction_data(filename, trained=True):
    
    # 0: Load data, correct game index field
    names = ['idx', 'subid', 'color', 'gi', 'mi', 'status', 'bp', 'wp', 'zet', 'rt', 'ts', 'mt', 'mx']
    directory = 'Trained_Game' if trained else 'Untrained_Game'
    df = pd.read_csv(os.path.join(data_dir, directory, filename), names=names)
    df['subject'] = filename.split('_')[1][:-4]
    df['trained'] = trained
    reconi = df['status']=='reconi'
    df['gi'] = np.nan
    df.loc[reconi, 'gi'] = np.arange(len(df.loc[reconi]))
    df['gi'] = df['gi'].fillna(method='ffill', downcast='infer')
    
    
    # 1: Boards recorded "0" inputs wrong; reconstruct them from moves
    reconm = df['status'] == 'recon'
    reconf = df['status'] == 'reconf'
    recond = df.loc[reconm].groupby('gi').apply(reconstruct_boards)
    assembled = recond.reset_index()

    df['bp correct'] = df['bp']
    df['wp correct'] = df['wp']
    df.loc[reconm, 'bp correct'] = assembled['bp'].values
    df.loc[reconm, 'wp correct'] = assembled['wp'].values
    df.loc[reconf, 'bp correct'] = np.nan
    df.loc[reconf, 'wp correct'] = np.nan
    df['bp correct'] = df['bp correct'].fillna(method='ffill')
    df['wp correct'] = df['wp correct'].fillna(method='ffill')

    # 2: Response times are goofed; reconstruct from timestamps
    df['rt correct'] = df['ts'] - df['ts'].shift(1)
    df.loc[df['status'] == 'eyecal', 'rt correct'] = df.loc[df['status'] == 'eyecal', 'rt']
    
    # 3: Rename and drop columns (this can be combined above when done debugging)
    df['rt'] = df['rt correct']
    board_filter = df['status'].isin(['recon', 'reconf'])
    df.loc[board_filter, 'bp'] = df.loc[board_filter, 'bp correct']
    df.loc[board_filter, 'wp'] = df.loc[board_filter, 'wp correct']
    df = df[['subject', 'trained', 'color', 'gi', 'mi', 'status', 'bp', 'wp', 'zet', 'rt']]

    return df

for trained in trained_files:
    df = clean_reconstruction_data(trained, trained=True)
    filename = 'trained {subject}.csv'.format(subject=trained.split('_')[1][:-4])
    df.to_csv(os.path.join(data_dir, 'Clean/Game', filename), index=False)

for untrained in untrained_files:
    df = clean_reconstruction_data(untrained, trained=False)
    filename = 'untrained {subject}.csv'.format(subject=untrained.split('_')[1][:-4])
    df.to_csv(os.path.join(data_dir, 'Clean/Game', filename), index=False)