In [1]:
import pandas as pd
import scipy.stats

# Analyzing CamelUp Bias
- Simulated 50,000 games
- Four players that perform hierarchically random moves
- Analyze biases of game itself without taking strategy into account

In [2]:
results = pd.read_csv("../game_logs/randombot_4players.csv")
results.head()

Unnamed: 0,game_id,round_id,action_type,active_player,bet_type,camel,camel_c_0_location,camel_c_0_stack_location,camel_c_1_location,camel_c_1_stack_location,...,player_1_trap_location,player_1_trap_type,player_2_coins,player_2_trap_location,player_2_trap_type,player_3_coins,player_3_trap_location,player_3_trap_type,trap_location,trap_type
0,0,0,,,,,1,1,2,1,...,,,2,,,2,,,,
1,0,1,move_trap,0.0,,,1,1,2,1,...,,,2,,,2,,,10.0,1.0
2,0,2,game_bet,1.0,win,c_3,1,1,2,1,...,,,2,,,2,,,,
3,0,3,round_winner_bet,2.0,,c_0,1,1,2,1,...,,,2,,,2,,,,
4,0,4,move_camel,3.0,,c_1,1,1,5,0,...,,,2,,,3,,,,


## Correlation Between Game Start and End Positions
As a first step, we want to analyze whether there is a correlation between camel positions at the beginning and the end of each game.

### Camel position at the beginning and end of games correlates negatively
Comparing the relative places of camels at the beginning and the end of game shows a very minor negative correlation, e.g. camels starting in last place were more likely to win the game and camels starting in first place were more likely to lose.

In [3]:
# Get position at the beginning and end of each game
# Camel position is determined by stack and board location
pos_beg = results.sort_values("round_id").groupby("game_id").first()
pos_beg["camel_c_0_position_beginning"] = pos_beg["camel_c_0_location"].astype(str) + \
                                          pos_beg["camel_c_0_stack_location"].astype(str)
pos_beg["camel_c_1_position_beginning"] = pos_beg["camel_c_1_location"].astype(str) + \
                                          pos_beg["camel_c_1_stack_location"].astype(str)
pos_beg["camel_c_2_position_beginning"] = pos_beg["camel_c_2_location"].astype(str) + \
                                          pos_beg["camel_c_2_stack_location"].astype(str)
pos_beg["camel_c_3_position_beginning"] = pos_beg["camel_c_3_location"].astype(str) + \
                                          pos_beg["camel_c_3_stack_location"].astype(str)
pos_beg["camel_c_4_position_beginning"] = pos_beg["camel_c_4_location"].astype(str) + \
                                          pos_beg["camel_c_4_stack_location"].astype(str)
pos_beg = pos_beg[[
    "camel_c_0_position_beginning", "camel_c_1_position_beginning", 
    "camel_c_2_position_beginning", "camel_c_3_position_beginning", 
    "camel_c_4_position_beginning"]]

pos_beg = pos_beg.rank(axis=1, ascending=False).astype(int)

pos_end = results.sort_values("round_id").groupby("game_id").last()
pos_end["camel_c_0_position_end"] = pos_end["camel_c_0_location"].astype(str) + \
                                    pos_end["camel_c_0_stack_location"].astype(str)
pos_end["camel_c_1_position_end"] = pos_end["camel_c_1_location"].astype(str) + \
                                    pos_end["camel_c_1_stack_location"].astype(str)
pos_end["camel_c_2_position_end"] = pos_end["camel_c_2_location"].astype(str) + \
                                    pos_end["camel_c_2_stack_location"].astype(str)
pos_end["camel_c_3_position_end"] = pos_end["camel_c_3_location"].astype(str) + \
                                    pos_end["camel_c_3_stack_location"].astype(str)
pos_end["camel_c_4_position_end"] = pos_end["camel_c_4_location"].astype(str) + \
                                    pos_end["camel_c_4_stack_location"].astype(str)
pos_end = pos_end[[
    "camel_c_0_position_end", "camel_c_1_position_end", 
    "camel_c_2_position_end", "camel_c_3_position_end", 
    "camel_c_4_position_end"]]

pos_end = pos_end.rank(axis=1, ascending=False).astype(int)

pos = pd.merge(left=pos_beg, right=pos_end, left_index=True, right_index=True)

In [16]:
# Create confusion matrix for each individual camel relating the start and end
# positions. We can add the confusion matrices together as the camels are effectively identical
c_0_cm = pd.crosstab(index=pos["camel_c_0_position_beginning"], columns=pos["camel_c_0_position_end"])
c_1_cm = pd.crosstab(index=pos["camel_c_1_position_beginning"], columns=pos["camel_c_1_position_end"])
c_2_cm = pd.crosstab(index=pos["camel_c_2_position_beginning"], columns=pos["camel_c_2_position_end"])
c_3_cm = pd.crosstab(index=pos["camel_c_3_position_beginning"], columns=pos["camel_c_3_position_end"])
c_4_cm = pd.crosstab(index=pos["camel_c_4_position_beginning"], columns=pos["camel_c_4_position_end"])
camel_cm = c_0_cm + c_1_cm + c_2_cm + c_3_cm + c_4_cm
camel_cm

camel_c_0_position_end,1,2,3,4,5
camel_c_0_position_beginning,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,8599,9703,10431,10556,10711
2,9325,9836,10189,10272,10378
3,9593,9836,10199,10226,10146
4,10166,10111,9908,9936,9879
5,12317,10514,9273,9010,8886


There appears to be an ever-so-small negative correlation between beginning and end positions. A $\chi^2$-test shows that the positions at the beginning and end of the game for any given camel are not independent of each other.

In [17]:
print("p-value of chi2 test = {:.2e}".format(scipy.stats.chi2_contingency(camel_cm)[1]))

p-value of chi2 test = 3.72e-257


Normalizing the contingency table puts it into a more human-readable format. We can see that camels starting in last place have a 25% chance of winning the game. If start and end positions were independent of each other, we would expect this to be 20%.

In [32]:
camel_cm_prob = (camel_cm / len(pos)).round(2)
camel_cm_prob.index.name = "Game Start Position"
camel_cm_prob.columns.name = "Game End Position"
camel_cm_prob

Game End Position,1,2,3,4,5
Game Start Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.17,0.19,0.21,0.21,0.21
2,0.19,0.2,0.2,0.21,0.21
3,0.19,0.2,0.2,0.2,0.2
4,0.2,0.2,0.2,0.2,0.2
5,0.25,0.21,0.19,0.18,0.18
