In [21]:
import pandas as pd
import numpy as np

In [22]:
rewards = pd.read_csv("rewards_table.csv")

In [23]:
rewards

Unnamed: 0.1,Unnamed: 0,game,turn,team_id,team_label,unit_id,total_reward,reward_move,reward_fuel_collected,reward_turn_unit,reward_living_city_tiles,reward_death_city_tile,reward_research_point,reward_no_action,reward_fuel_dropped_at_city,reward_build_city,reward_death_before_end,reward_win,reward_transfer,reward_coal_researched
0,0,0,1,0,random,1,0.15,0.0,0.0,0.0,0.1,-0.0,0.05,,,,,,,
1,1,0,1,1,no_action,2,0.15,,,0.0,0.1,-0.0,0.05,0.0,0.0,,,,,
2,2,0,2,0,random,1,0.10,0.0,0.0,0.0,0.1,-0.0,0.00,,,,,,,
3,3,0,2,1,no_action,2,0.10,,,0.0,0.1,-0.0,0.00,0.0,0.0,,,,,
4,4,0,3,0,random,1,0.10,0.0,,0.0,0.1,-0.0,0.00,,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125520,125520,299,189,1,no_action,2,0.10,,,0.0,0.1,-0.0,0.00,0.0,0.0,,,,,
125521,125521,299,190,0,random,1,0.10,0.0,0.0,0.0,0.1,-0.0,0.00,,,,,,,
125522,125522,299,190,1,no_action,2,0.10,,,0.0,0.1,-0.0,0.00,0.0,0.0,,,,,
125523,125523,299,191,1,no_action,2,5.15,,,0.0,0.1,-0.0,0.05,0.0,0.0,,,5.0,,


In [24]:
## cleaning up

# drop columns with unnamed
drop_cols = list(filter(lambda x: "Unnamed" in x, rewards.columns))
rewards.drop(columns=drop_cols, inplace=True)

# fillna
specific_reward_cols = list(filter(lambda x: "reward_" in x, rewards.columns))
for col in specific_reward_cols:
    rewards[col].fillna(0, inplace=True)

rewards = rewards.set_index(["game", "turn", "team_id", "team_label", "unit_id"])

In [25]:
def highlight_positive(v, props=''):
    return props if v <= 0 else None

def highlight_max(s, props=''):
    return np.where((s == np.nanmax(s.values)) & (s.values>0), props, '')

In [29]:
import seaborn as sns
cm_total = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
cm_total = sns.color_palette("icefire", as_cmap=True)
cm_other = sns.color_palette("vlag", as_cmap=True)

def highlight(df):
    return df.style.background_gradient(vmin=-2, vmax=2, axis=1, subset=specific_reward_cols,cmap=cm_other).\
    background_gradient(axis=0, subset=["total_reward"], vmin=-2, vmax=2, cmap=cm_other)

highlight(rewards.query("game == 1").sort_values(["turn", "team_id"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,total_reward,reward_move,reward_fuel_collected,reward_turn_unit,reward_living_city_tiles,reward_death_city_tile,reward_research_point,reward_no_action,reward_fuel_dropped_at_city,reward_build_city,reward_death_before_end,reward_win,reward_transfer,reward_coal_researched
game,turn,team_id,team_label,unit_id,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,1,0,random,1,0.76484,0.0,0.61484,0.0,0.1,-0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1,no_action,2,0.15,0.0,0.0,0.0,0.1,-0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0,random,1,0.335009,0.0,0.235009,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,1,no_action,2,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,0,random,1,0.189723,0.0,0.089723,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,1,no_action,2,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4,0,random,1,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4,1,no_action,2,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,0,random,1,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,1,no_action,2,0.1,0.0,0.0,0.0,0.1,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Statistics

In [31]:
# mean cumulative reward of unit per game

rewards.groupby(["game", "team_id", "unit_id"])[["total_reward"]].sum().reset_index().groupby("team_id")[["total_reward"]].mean().rename(columns={"total_reward": "mean_cumulative_reward"}, index={0:"random", 1:"no_action"})

Unnamed: 0_level_0,mean_cumulative_reward
team_id,Unnamed: 1_level_1
random,12.208517
no_action,21.400667


In [28]:

p = rewards.reset_index().query("reward_win > 0").groupby(["game", "turn"])[["team_id"]].first().team_id.mean()
print("Winrate:")
print(f"0: {1-p:%}")
print(f"1: {p:%}")

Winrate:
0: 17.000000%
1: 83.000000%
