In [32]:
import pandas as pd
import numpy as np
import os, glob
from collections import deque
from itertools import combinations
from helper_funcs import bucket_remap,process_board,get_attributes

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns',None)

class hl_model():
   
    def __init__(self,exp_dir,rules):
        # Gather data associated with experiment
        self.data_import(exp_dir,rules)

        # Model configuration
        # List of features to be used for the underlying models
        #self.model_features = ['shape','color','row','col','quadrant','cell','bucket1','bucket2','bucket3','bucket4']
        self.model_features = ['shape','color']
        # Types of feature combinations to be used (1->unary, 2->binary combinations, etc.)
        self.combination_types = [1]
        #self.init_models()
    
    # Utility for returning a tuple of all feature dimensions in a tuple of features
    def calc_dim(self,feat_arr):
        dims = tuple(self.feature_info[feat]['input_space'] for feat in feat_arr)
        return dims

    # In the RL model, much of this information is handled in the environment class
    def init_env_information(self):
        # Store all information about how to process each feature in following dictionary
        self.feature_info = {'shape':{'input_space':4,'col_name':'shape_ind'},
                        'color':{'input_space':4, 'col_name':'color_ind'}}
                    #    'move_row':{'input_space':self.board_size, 'func':get_row,'reference':1},
                    #    'move_col':{'input_space':self.board_size, 'func':get_col,'reference':1},
                    #    'row':{'input_space':self.board_size, 'func':get_row,'reference':0},
                    #    'col':{'input_space':self.board_size, 'func':get_col,'reference':0},
                    #    'quadrant':{'input_space':4,'func':get_quadrant,'reference':None},
                    #    'cell':{'input_space':self.board_size*self.board_size,'func':get_cell,'reference':None},
                    #    'bucket1':{'input_space':self.bucket_space+1,'func':get_bucket,'reference':1},
                    #    'bucket2':{'input_space':(self.bucket_space+1)**2,'func':get_bucket,'reference':2},
                    #    'bucket3':{'input_space':(self.bucket_space+1)**3,'func':get_bucket,'reference':3},
                    #    'bucket4':{'input_space':(self.bucket_space+1)**4,'func':get_bucket,'reference':4}}

        # Set up lists for storing past actions and board states
        self.reduced_move_list=deque([],50)
        self.board_list=deque([],50)

    # Import relevant player files
    def data_import(self,exp_dir,rules):
        # Initialize list for concatenating player records
        df_list =[]
        # Create a list containing all csv's in the given directory
        csv_list = glob.glob("**/*.csv",root_dir=exp_dir, recursive=True)

        # Loop over the files
        for item in csv_list[:2]:
            # Construct the import path and read data
            import_path = os.path.join(exp_dir,item)
            df = pd.read_csv(import_path)

            # Only work with data from a player's first encounter with the game
            series_num = df['seriesNo'].unique()[0]
            if series_num != 0:
                continue

            # Check the contained rules for multiple rules/players
            rules = df["#ruleSetName"].unique()
            player = df['playerId'].unique()
            if len(player)>1:
                print('------ERROR-----')
                print("File contains more than one player: ",import_path)
                print('----------------')
                break
            if len(rules)>1:
                print('------ERROR-----')
                print("Error reading the rules, multiple rules in file: ",import_path)
                print('----------------')
                break
            
            # Get rid of finger slips (assumed when the player grabs a movable piece but misses putting it into a bucket)
            finger_slips = (df.code == 0)&(df.bx.isna())&(df.by.isna())
            df = df[~finger_slips]
            # Reset index immediately
            df.reset_index(drop=True,inplace=True)
            # Set the move number to be the corrected index
            df['move']=df.index

            # Add processed columns
            df['bucket']=df.copy().apply(lambda x: bucket_remap(x['by'],x['bx']),axis=1)
            df['proc_board']=df.copy().apply(lambda x: process_board(x['board']),axis=1)
            df[['shape','color','shape_ind','color_ind','id','cell']]=df.copy().apply(lambda x: get_attributes(x['proc_board'],x['y'],x['x']),axis=1,result_type='expand')
            # df[['shape0','shape1','shape2','shape3','color0','color1','color2','color3',
            #     'c1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11','c12',
            #     'c13','c14','c15','c16','c17','c18','c19','c20','c21','c22','c23','c24',
            #     'c25','c26','c27','c28','c29','c30','c31','c32','c33','c34','c35','c36']] = df.copy().apply(lambda x:calc_availability(x['proc_board'],shape_order,color_order),axis=1,result_type='expand')

            # Column cleanup
            df.rename(columns = {'orderInSeries':'episode','playerId':'player'},inplace=True)
            df.drop(columns=['#ruleSetName','seriesNo','precedingRules','timestamp',
                             'episodeId','experimentPlan','trialListId','board','p0','by','bx',
                             'moveNo'],axis=1,inplace=True)
            
            df_list.append(df)

            # Set for debugging
            self.debug_df = df.copy()

        # Concatenate into a single dataframe
        self.main_df = pd.concat(df_list,verify_integrity=True,ignore_index=True)

    def init_models(self):

        # Set up information about the environment
        self.init_env_information()

        # Initialize a list of all feature combinations to be used and populate iteratively using the combinations tool
        # Result will be a list of tuples of feature strings
        self.feature_combinations = []
        for r in self.combination_types:
            self.feature_combinations.extend(combinations(self.model_features,r))
        
        # Construct a list of models, each of which is constructed with a feature tuple and a dimension tuple
        self.models=[human_bandit_model(feat_arr,self.calc_dim(feat_arr)) for feat_arr in self.feature_combinations]

    # Debugging utility for presenting models and associated q values
    def present_models(self):
        for model in self.models:
            print(model.feats,model.feat_dims)
            print(model.q_values)

    def player_train(self,player):
        # Train on a player's experience
        df = self.main_df.query("player==@player").copy()

        # Initialize models for this player
        self.init_models()

        # Loop over the rows of this player's experience
        for row in df.iterrows():
            print(row)

    def experiment_train(self):
        # Establish player list
        players = self.main_df.player.unique()

        # Loop over the players
        for player in players:
            print("Experience for player {}".format(player))
            self.player_train(player)

    def select_action(self,states):
        pass
    
    def get_credibilities(self):
        pass

class human_bandit_model():
    def __init__(self,feats,dims):
        # Initialize bandit q-table and credibility
        self.init_q_value = 0
        # Establish rolling memory queues for each model with configurable memory
        self.memory = 15
        self.state_memory=deque([],self.memory)
        self.action_memory=deque([],self.memory)
        self.reward_memory=deque([],self.memory)
        # Set up model features
        self.feats = feats
        self.feat_dims = dims
        self.in_dim, self.out_dim = np.prod(self.feat_dims), 4
        self.q_values = np.full((self.in_dim,self.out_dim),self.init_q_value,dtype=np.int8)
        self.credibility=0

    def propose_action(self,state_dict_list):
        piece_count = len(state_dict_list)
        self.state_list = []
        for i,piece in enumerate(state_dict_list):
            states = tuple(state_dict_list[i][feat] for feat in self.feats)
            state = np.ravel_multi_index(states,self.feat_dims)
            self.state_list.append(state)
        q_vals = self.q_values[self.state_list,:]
        selection = np.random.choice(np.flatnonzero(q_vals==q_vals.max()))
        bucket = selection % 4
        piece_index = selection // 4
        selected_piece = state_dict_list[piece_index]
        return (bucket,selected_piece['move_row'],selected_piece['move_col'],piece_index)
    
    def return_credibility(self):
        return self.credibility
        
    def learn(self,action,piece_index,reward):
        if len(self.state_list)==0:
            print("error!")
            breakpoint()
        #breakpoint()
        state = self.state_list[piece_index]
        old_val = self.q_values[state,action]
        self.q_values[state,action]=int(reward)
        if old_val!=self.init_q_value:
            if old_val==int(reward):
                self.credibility+=1
            else:
                self.credibility-=20

    def return_qvals(self):
        return np.copy(self.q_values)

In [34]:
exp_path = "/Users/eric/data_analysis/ambiguity4/ep/1_1_color_3m_cua"
rules = ["1_1_color_3m_cua"]
model = hl_model(exp_path,rules)
model.experiment_train()
#display(model.main_df)

Experience for player A23G1L7KYHK9F2
(0, player                                           A23G1L7KYHK9F2
episode                                                       0
y                                                             6
x                                                             1
code                                                          0
move                                                          0
bucket                                                        0
proc_board    [{'id': 13438, 'color': 'RED', 'shape': 'CIRCL...
shape                                                      STAR
color                                                       RED
shape_ind                                                     3
color_ind                                                     0
id                                                        13446
cell                                                         31
Name: 0, dtype: object)
(1, player                             