In [2]:
import pandas as pd
import numpy as np

In [3]:
rewards = pd.read_csv("rewards_table.csv")

In [4]:
rewards

Unnamed: 0.1,Unnamed: 0,game,turn,team_id,team_label,unit_id,total_reward,reward_move,reward_fuel_collected,reward_turn_unit,reward_living_city_tiles,reward_death_city_tile,reward_research_point,reward_no_action,reward_fuel_dropped_at_city,reward_build_city,reward_death_before_end,reward_win,reward_transfer,reward_coal_researched
0,0,0,1,0,random,1,0.844400,0.0,0.744400,0.0,0.1,0.0,0.0,,,,,,,
1,1,0,1,1,no_action,2,0.100000,,,0.0,0.1,0.0,0.0,0.0,0.0,,,,,
2,2,0,2,0,random,1,0.295172,0.0,0.195172,0.0,0.1,0.0,0.0,,,,,,,
3,3,0,2,1,no_action,2,0.100000,,,0.0,0.1,0.0,0.0,0.0,0.0,,,,,
4,4,0,3,0,random,1,0.100000,0.0,0.000000,0.0,0.1,0.0,0.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117866,117866,299,189,1,no_action,2,0.100000,,,0.0,0.1,0.0,0.0,0.0,0.0,,,,,
117867,117867,299,190,0,random,1,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,,,,,,,
117868,117868,299,190,1,no_action,2,0.100000,,,0.0,0.1,0.0,0.0,0.0,0.0,,,,,
117869,117869,299,191,1,no_action,2,19.047368,,,0.0,0.1,0.0,0.0,0.0,0.0,,,18.947368,,


In [5]:
## cleaning up

# drop columns with unnamed
drop_cols = list(filter(lambda x: "Unnamed" in x, rewards.columns))
rewards.drop(columns=drop_cols, inplace=True)

# fillna
specific_reward_cols = list(filter(lambda x: "reward_" in x, rewards.columns))
for col in specific_reward_cols:
    rewards[col].fillna(0, inplace=True)

rewards = rewards.set_index(["game", "turn", "team_id", "team_label", "unit_id"])

In [6]:
def highlight_positive(v, props=''):
    return props if v <= 0 else None

def highlight_max(s, props=''):
    return np.where((s == np.nanmax(s.values)) & (s.values>0), props, '')

In [7]:
import seaborn as sns
cm_total = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
cm_total = sns.color_palette("icefire", as_cmap=True)
cm_other = sns.color_palette("vlag", as_cmap=True)

def highlight(df):
    return df.style.background_gradient(vmin=-2, vmax=2, axis=1, subset=specific_reward_cols,cmap=cm_other).\
    background_gradient(axis=0, subset=["total_reward"], vmin=-2, vmax=2, cmap=cm_other)

highlight(rewards.query("game == 9").sort_values(["turn", "team_id"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,total_reward,reward_move,reward_fuel_collected,reward_turn_unit,reward_living_city_tiles,reward_death_city_tile,reward_research_point,reward_no_action,reward_fuel_dropped_at_city,reward_build_city,reward_death_before_end,reward_win,reward_transfer,reward_coal_researched
game,turn,team_id,team_label,unit_id,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
9,1,0,random,1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1,1,no_action,2,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2,0,random,1,0.539861,0.0,0.439861,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2,1,no_action,2,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,3,0,random,1,0.274978,0.0,0.174978,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,3,1,no_action,2,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,4,0,random,1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,4,1,no_action,2,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,5,0,random,1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,5,1,no_action,2,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Statistics

In [8]:
# mean cumulative reward of unit per game

rewards.groupby(["game", "team_id", "unit_id"])[["total_reward"]].sum().reset_index().groupby("team_id")[["total_reward"]].mean().rename(columns={"total_reward": "mean_cumulative_reward"}, index={0:"random", 1:"no_action"})

Unnamed: 0_level_0,mean_cumulative_reward
team_id,Unnamed: 1_level_1
random,-5.092069
no_action,24.69426


In [9]:

p = rewards.reset_index().query("reward_win > 0").groupby(["game", "turn"])[["team_id"]].first().team_id.mean()
print("Winrate:")
print(f"0: {1-p:%}")
print(f"1: {p:%}")

Winrate:
0: 18.666667%
1: 81.333333%


In [13]:
# mean reward per turn
import plotly.express as px

t = rewards.reset_index().groupby(["team_id", "turn"]).agg({"total_reward": "mean", "unit_id": "count"}).reset_index()

px.bar(t, x="turn", y="total_reward", color="team_id")