In [None]:
import pandas as pd
import numpy as np
import os

os.chdir('/Users/gianni/Google Drive/Bas Zahy Gianni - Games/Data')

In [None]:
oc = [
    'index', 'subject', 'color', 'gi', 'mi', 
    'status', 'bp', 'wp', 'response', 'rt',
    'time', 'mouse_t', 'mouse_x'
]

fc = [
    'subject', 'is_comp', 'color', 'status',
    'bp', 'wp', 'response', 'rt', 'gi', 'mi',
    'computer', 'human', 'time'
]

mc = ['subject', 'color', 'bp', 'wp', 'response', 'rt', 'condition']

class Data():
    """ Data is the primary object for holding experimental data. It also contains functions
        for the loading, cleaning, augmentation, and export of the data tables. """

    def __init__(self, folder):
        self.data = self.load(folder)

    def load_file(self, folder, file_name, mouse=False):
        """ Initial preparation of data for individual files """
        print(file_name[:-4])

        # load file, drop nuissance columns, remove non-observations
        drop_cols = ['index'] if mouse else ['index', 'mouse_t', 'mouse_x']
        data = pd.read_csv(folder + file_name, names=oc).drop(drop_cols, axis=1)
        drop_status = (data.status != 'dummy') &  (data.status != 'ready') & (data.status != 'draw offer')
        data = data.loc[drop_status, :].copy().reset_index(drop=True)

        # assign unique subject label (from filename) and create separate cols for humans and computers
        sub_filter = data.rt > 0
        comp_filter = data.rt == 0
        first_move_filter = data.bp.map(lambda x: np.array(list(x)).astype(int).sum()==1) #(data.mi == 0) & (data.gi%2 == 0)
        second_move_filter = data.bp.map(lambda x: np.array(list(x)).astype(int).sum()==2) #(data.mi == 1) & (data.gi%2 == 0)
        condition_filter = (data.rt>0)&(data.status == 'playing')
        data.loc[condition_filter, 'condition'] = data.loc[condition_filter, 'subject'].map(lambda x: x[-1])
        data.loc[:, 'condition'] = data.loc[:, 'condition'].fillna(method='ffill')
        
        data.loc[data.rt > 0, 'subject'] = file_name[:-4]
        data.loc[:, 'human'] = file_name[:-4]
        data.loc[:, 'computer'] = np.nan
        data.loc[comp_filter, 'computer'] = data.loc[comp_filter, 'subject']
        data.loc[first_move_filter, 'computer'] = data.loc[second_move_filter, 'computer']
        data.loc[:, 'computer'] = data.loc[:, 'computer'].fillna(method='ffill')
        data.loc[0, 'computer'] = data.loc[1, 'computer']

        return data

    def load(self, folder):
        """ Calls other functions to corrale data and some support information """
        self.exp_name = folder
        files = os.listdir(folder + '/Raw/')
        files = [f for f in files if f[-3:] == 'csv']
#         files =[f for f in files if f[:-4] != 'HH']
        self.subjects = [f[:-4] for f in files]
        self.subject_dict = dict(zip(self.subjects, np.arange(len(self.subjects))))
        data = pd.concat([self.load_file(folder + '/Raw/', f) for f in files])
        data = data.reset_index(drop=True)
        data = self.clean(data)

        return data

    def clean(self, df):
        """ Performs further cleaning that can be done on all data collectively """

        # anonymize subjects
        sub_filter = df.rt > 0 # filter computers out
        df.loc[sub_filter, 'subject'] = df.loc[sub_filter, 'subject'].map(self.subject_dict)
        df.loc[:, 'human'] = df.loc[:, 'human'].map(self.subject_dict)

        # give computers identifiable names
        comp_filter = df.rt == 0
        df.loc[comp_filter, 'subject'] = df.loc[comp_filter, 'subject'].astype(int) + 1000
        df.loc[pd.notnull(df.computer), 'computer'] = df.loc[pd.notnull(df.computer), 'computer'].astype(int) + 1000

        # force remove response from board
        for i in df.loc[df.status != 'EVAL', :].index.values:
            if df.loc[i,"color"] == 0:
                l = list(df.loc[i,"bp"])
                l[df.loc[i, "response"]] = '0'
                df.loc[i,"bp"] = ''.join(l)
            else:
                l = list(df.loc[i,"wp"])
                l[df.loc[i,"response"]] = '0'
                df.loc[i,"wp"] = ''.join(l)

        # force correct colors
        count_pieces = lambda x: np.array([np.array(list(df.loc[i, x])).astype(int).sum() for i in df.index.values])
        df.loc[:, 'color'] = count_pieces('bp') - count_pieces('wp')
        df.loc[:, 'color'] = df.loc[:, 'color'].astype(int).astype(str)

        # add is_comp
        is_computer = lambda x: "0" if x > 0 else "1"
        df.loc[:, 'is_comp'] = df.loc[:, 'rt'].map(is_computer)

        # correct move index in games
        df.loc[df.status.isin(['playing', 'win', 'draw', 'timeout']), 'mi'] = df.loc[df.status.isin(['playing', 'win', 'draw']), 'mi'] - 1
        return df

    def export_individuals(self, folder):
        for s, i in self.subject_dict.items():
            c = self.data.human == i
            d = self.data.loc[c, :].reset_index(drop=True)
            d = d.reindex_axis(self.full_output_columns, axis=1)
            d.to_csv(folder + '/Clean/' + s + '.csv', index=False)

        return None

    def export(self, folder):
        f = folder + 'Clean/_summaries/'
        E = self.data.loc[self.data.status.isin(['playing', 'win', 'draw', 'timeout']), :]
        E.loc[:, fc].to_csv(f + 'all_fields.csv', index=False)
        E.loc[:, mc].to_csv(f + 'model_fields.csv', index=False)
        
        return None

In [None]:
D = Data('./5_tai')

D.export('./5_tai/')