In [None]:
import numpy as np
import pandas as pd
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os

In [None]:
def load_data(filename,participant_id):
    df = pd.read_csv(direc + filename,header=None,names=['participant_id','i','ts','info'])
    events = [json.loads(e) for e in df[df['participant_id']==participant_id]['info']]
    return sorted(events,key=lambda e:e['event_time'])

def get_events_with_condition(f,condition):
    return [e for e in f if condition(e)]

def get_event_with_condition(f,condition):
    x = get_events_with_condition(f,condition)
    if len(x)==1:
        return x[0]
    else:
        return {}
    
def get_events_with_type(f,event_type):
    return get_events_with_condition(f,lambda e: e['event_type'].replace('_',' ')==event_type.replace('_',' '))

def get_event_with_type(f,event_type):
    return get_event_with_condition(f,lambda e: e['event_type'].replace('_',' ')==event_type.replace('_',' '))

def check_event_counts(data,num_games):
    event_count_dict = defaultdict(lambda: 0, dict(zip(*np.unique([e['event_type'] for e in data],return_counts=True))))
    print(dict(event_count_dict))
    assert('start game' in event_count_dict and event_count_dict['start game']==num_games)
    assert('end game' in event_count_dict and event_count_dict['end game']==num_games)
    assert(event_count_dict['user move']==event_count_dict['your turn'])
    assert(event_count_dict['opponent move']==event_count_dict['waiting for opponent'])
    assert(event_count_dict['opponent win'] + event_count_dict['user win'] + event_count_dict['draw'] == event_count_dict['start game'])

In [None]:
# directory where your data is stored
#direc = 'C:/Users/groganj/OneDrive - Nexus365/Other/MTurk/' #'C:/Users/svo/Documents/FourinarowData/'
direc = 'C:/Users/groganj/Documents/Work/Experiments/Results/FourInARow/'
#name of the user whose data you want to analyze
usernames = ['debugDGQRE9:debugR28GMK'] #['109:R_1NurOLY6ru1XwrL','R_10pkDu4x8ARiMHf','R_cOwNfKZ17YDGcGR']
#name of your data file
filename = 'trialdata.csv' #'Pilotdata_8yo.csv'
numGames = 20 + 2 # main + practice

In [None]:
data = [load_data(filename,username) for username in usernames]

In [None]:
#the second argument is the number of games that ought to exist for this user
#if this function failes an assertion, something is wrong with your data
#this function will print the count of all the event type in the data
for d in data:
    check_event_counts(d,numGames)

In [None]:
#helper functions that encode boards as and moves as integers for the model fitting pipeline
def encode_board(pieces):
    return np.sum([2**i for i,p in enumerate(pieces) if p=='1']).astype(np.int64)

def encode_move(m):
    return 2**m

In [None]:
def display_board(bp,wp,move,color):
    fig = plt.figure()
    ax = fig.add_subplot(111,aspect='equal')
    rec = patches.Rectangle((-0.5,-0.5),9,4,color="gray",fill=True,zorder=1)
    ax.add_patch(rec)
    ax.vlines(np.arange(-0.5,9.5,1),-0.5,3.5)
    ax.hlines(np.arange(-0.5,4.5,1),-0.5,8.5)
    for i in range(36):
        if(bp[i]=='1'):
            circ = patches.Circle((i%9,3-i//9),0.33,color="black",fill=True)
            circ = ax.add_patch(circ)
        if(wp[i]=='1'):
            circ = patches.Circle((i%9,3-i//9),0.33,color="white",fill=True)
            circ = ax.add_patch(circ)
    circ = patches.Circle((move%9,3-move//9),0.33,color=color,fill=False)
    circ = ax.add_patch(circ)
    ax.axis('off')
    plt.show()

In [None]:
def get_parsed_data(data):
    your_turn_events = get_events_with_type(data,'your turn')
    user_move_events = get_events_with_type(data,'user move')
    assert(len(your_turn_events)==len(user_move_events))
    return [(e['event_info']['bp'],e['event_info']['wp'],e['event_info']['tile'],
             e['event_info']['user_color'],(e['event_time']-e_your_turn['event_time'])/1000)
            for e_your_turn,e in zip(your_turn_events,user_move_events)]

In [None]:
def save_data(data):
    if not os.path.exists(direc + 'raw/'):
        os.mkdir(direc + 'raw/')
    if not os.path.exists(direc + 'splits/'):
        os.mkdir(direc + 'splits/')
    for i,(d,username) in enumerate(zip(data,usernames)):
        parsed_data = get_parsed_data(d)
        with open(direc + 'raw/' + username.split(':')[0] + '.csv','w') as f:
            df = pd.DataFrame([(encode_board(bp),encode_board(wp),c.upper(),encode_move(m),rt,username.split(':')[0]) for bp,wp,m,c,rt in parsed_data])
            f.write(df.to_csv(None, index = False, header=False,sep='\t',line_terminator ='\n')[:-1])
        group = (5*(np.random.permutation(len(parsed_data))/len(parsed_data))).astype(int)+1
        if not os.path.exists(direc + 'splits/' + str(i+1)):
            os.mkdir(direc + 'splits/' + str(i+1))
        with open(direc + 'splits/' + str(i+1) + '/data.csv','w') as f:
            df = pd.DataFrame([(encode_board(bp),encode_board(wp),c.upper(),encode_move(m),rt,g,username.split(':')[0]) for (bp,wp,m,c,rt),g in zip(parsed_data,group)])
            f.write(df.to_csv(None, index = False, header=False,sep='\t',line_terminator ='\n')[:-1])
        for g in range(1,6):
            with open(direc + 'splits/' + str(i+1) + '/' + str(g) + '.csv','w') as f:
                df = pd.DataFrame([(encode_board(bp),encode_board(wp),c.upper(),encode_move(m),rt,g,username.split(':')[0]) for (bp,wp,m,c,rt),g in zip(parsed_data,group)])
                f.write(df[df[5]==g].to_csv(None, index = False, header=False,sep='\t',line_terminator ='\n')[:-1])


In [None]:
save_data(data)

In [None]:
for d in data:
    for bp,wp,m,c,rt in get_parsed_data(d):
        assert(len(bp.split('1'))-len(bp.split('1')) in [0,1])

In [None]:
# this will print an image per move - very long
for d in data:
    for bp,wp,m,c,rt in get_parsed_data(d):
        print(bp,wp,m,c,rt)
        display_board(bp,wp,m,c)

In [None]:
[len(get_parsed_data(d)) for d in data]

In [None]:
print(os.path)

In [None]:
# 