In [1]:
import numpy as np
import pandas as pd
import pickle as pkl

In [2]:
DATA_DIR = "aquawar/tourney_saves"

In [3]:
def hitory_entailment(data):
    for turn in data['history']:
        print(f'Player: {turn["player"]}, Game turn: {turn["game_turn"]}, Player turn: {turn["player_turn"]}, move: {turn["move"]} attempt {turn["attempt"]}')
        
#hitory_entailment(data)

In [4]:
def extract_winner(data):
    if data['evaluation']['players']['1']['current_hp'] > data['evaluation']['players']['2']['current_hp']:
        return 1
    return 2

#extract_winner(data)

In [5]:
def extract_validity(data):
    eval = data['evaluation']
    return {player:eval['players'][player]['invalid_moves']['total'] for player in eval['players']}

#extract_validity(data)

In [6]:
def extract_status(data):
    return data['evaluation']['game_status']

In [7]:
def extract_players(data):
    players = data['players']
    return {player: players[player][0]['name'] if type(players[player]) == list else players[player]['name'] for player in players.keys()}

#extract_players(data)

In [44]:

import os

def aggregate_results(directory):
    results = pd.DataFrame(columns=['player_1', 'player_2','p1_invalid_moves', 'p2_invalid_moves', 'winner', 'conclusion'])
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'latest.pkl':
                file_path = os.path.join(root, file)
                data = pkl.load(open(file_path, "rb"))
                row = {}
                players = extract_players(data)
                row['player_1'] = players['1']
                row['player_2'] = players['2']
                
                validity = extract_validity(data)
                row['p1_invalid_moves'] = validity['1']
                row['p2_invalid_moves'] = validity['2']

                winner = extract_winner(data)
                row['winner'] = winner

                conclusion = extract_status(data)
                row['conclusion'] = conclusion

                row['fatal'] = data['history'] == []
                
                results = pd.concat((results, pd.DataFrame(row, index=[0])), ignore_index=True)
    return results

# Example usage:
results = aggregate_results(DATA_DIR)

In [9]:
results.loc[results['conclusion'] == 'error', 'winner'] = 0

In [45]:
results

Unnamed: 0,player_1,player_2,p1_invalid_moves,p2_invalid_moves,winner,conclusion,fatal
0,gpt-oss:20b (Majority 3),gpt-oss:20b (Majority 5),0,0,2,error,True
1,gpt-oss:20b (Majority 3),gpt-oss:20b (Majority 5),0,0,2,error,True
2,gpt-oss:20b (Majority 3),llama3.1:8b (Majority 5),0,0,2,error,True
3,gpt-oss:20b (Majority 3),llama3.1:8b (Majority 5),0,0,2,error,True
4,gpt-oss:20b (Majority 3),mistral-nemo:12b (Majority 5),0,0,2,error,True
...,...,...,...,...,...,...,...
115,qwen3:14b (Single 5),llama3.1:8b (Majority 5),5,0,2,error,False
116,qwen3:14b (Single 5),mistral-nemo:12b (Majority 5),5,0,2,error,False
117,qwen3:14b (Single 5),mistral-nemo:12b (Majority 5),5,0,2,error,False
118,qwen3:14b (Single 5),qwen3:14b (Majority 5),5,0,2,error,False


In [46]:
# Group by player_1 and player_2, then count conclusions and fatal
summary = results.groupby(['player_1', 'player_2']).agg(
    completed_count=('conclusion', lambda x: (x == 'completed').sum()),
    error_count=('conclusion', lambda x: (x == 'error').sum()),
    fatal_count=('fatal', lambda x: (x == True).sum())
).reset_index()
summary.to_csv('aquawar_summary.csv', index=False)

In [75]:
import regex as re

en_size = re.compile(r'\(\w+\s\d\)')

summary['ensamble_size'] = summary['player_1'].apply(lambda x: re.findall(en_size, x)[0]) 

In [72]:
# Aggregate by ensamble_size and count fatal occurrences
ensamble_summary = summary.groupby('ensamble_size').agg(
    fatal_count=('fatal_count', 'sum'),
    completed_games=('completed_count', 'sum'),
    error_games=('error_count', 'sum')
).reset_index()
ensamble_summary

Unnamed: 0,ensamble_size,fatal_count,completed_games,error_games
0,(Majority 3),32,0,32
1,(Single 3),0,46,10
2,(Single 5),0,0,32


In [79]:


#path = path.replace

data = pkl.load(open("aquawar\\tourney_saves\\gpt-oss_20b_S5\\llama3.1_8b_M5\\round_001\\latest.pkl", "rb"))

In [80]:
hitory_entailment(data)

Player: 1, Game turn: 1, Player turn: 1, move: Error: [Errno 111] Connection refused attempt 1
Player: 1, Game turn: 2, Player turn: 1, move: Error: [Errno 111] Connection refused attempt 1
Player: 1, Game turn: 3, Player turn: 1, move: Error: [Errno 111] Connection refused attempt 1
Player: 1, Game turn: 4, Player turn: 1, move: Error: [Errno 111] Connection refused attempt 1
Player: 1, Game turn: 5, Player turn: 1, move: Error: [Errno 111] Connection refused attempt 1


In [37]:
data['history']

[]

In [33]:
data['evaluation']

{'players': {'1': {'current_hp': 1600,
   'damage_dealt': 0,
   'damage_taken': 0,
   'assertions': {'true': 0, 'false': 0, 'skipped': 0},
   'invalid_moves': {'total': 0,
    'by_type': {'invalid_response': 0,
     'invalid_parameter': 0,
     'invalid_action': 0}}},
  '2': {'current_hp': 1600,
   'damage_dealt': 0,
   'damage_taken': 0,
   'assertions': {'true': 0, 'false': 0, 'skipped': 0},
   'invalid_moves': {'total': 0,
    'by_type': {'invalid_response': 0,
     'invalid_parameter': 0,
     'invalid_action': 0}}}},
 'game_status': 'error'}

In [41]:
results[results['conclusion']== 'error'].to_csv('aquawar_error_games.csv', index=False)