# Libraries

In [1]:
import numpy as np
import pandas as pd
import json
import glob
import ast
import gzip
import os
import yaml
from tqdm import tqdm
import re
from collections import defaultdict

# Functions

In [3]:
def process_player_join_event(event, combat_data, combat_id):
    player_id = event.get("author_id")
    player_name = event.get("caster", {}).get("name", event.get("author_name"))
    caster_info = event.get("caster", {})

    # Add player ID
    combat_data[combat_id]['player_ids'].add(player_id)

    # Check if player already exists in player_info; merge details or add new
    if player_name not in combat_data[combat_id]['player_info']:
        combat_data[combat_id]['player_info'][player_name] = {
            'hp_ratio': None,
            'class': [],
            'slots': {},
            'max_slots': {},
            'ac': 0,
            'stats': {}
        }

    # Merge or update player_info with new data
    player_info = combat_data[combat_id]['player_info'][player_name]
    player_info['class'] = list(caster_info.get('levels', {}).get('classes', {}).items())
    player_info['slots'] = caster_info.get('spellbook', {}).get('slots', {})
    player_info['max_slots'] = caster_info.get('spellbook', {}).get('max_slots', {})
    player_info['ac'] = caster_info.get('ac', 0)
    player_info['stats'] = caster_info.get('stats', {})

    # Update slots and max_slots
    for slot, value in player_info['slots'].items():
        combat_data[combat_id]['total_slots'][slot] += value
    for slot, value in player_info['max_slots'].items():
        combat_data[combat_id]['total_max_slots'][slot] += value

def process_combat_state_update(event, combat_data, combat_id):
    for actor in event.get("data", {}).get("combatants", []):
        if actor["type"] == "monster" and not any(monster['monster_id'] == actor["id"] for monster in combat_data[combat_id]['monsters']):
            combat_data[combat_id]['monsters'].append({
                'monster_id': actor["id"],
                'monster_code': actor.get("name"),
                'monster_name': actor.get('monster_name'),
                'level': actor.get('levels', {}).get('total_level', 0)
            })

def update_player_health_from_human_readable(last_human_readable, combat_data):
    for combat_id, human_readable in last_human_readable.items():
        rounds_match = re.search(r'\(round (\d+)\)', human_readable)
        if rounds_match:
            combat_data[combat_id]['rounds'] = int(rounds_match.group(1))
        for player_name, player_info in combat_data[combat_id]['player_info'].items():
            pattern = rf"{re.escape(player_name)} <(\d+)/(\d+) HP>"
            match = re.search(pattern, human_readable)
            if match:
                current_hp, max_hp = match.groups()
                player_info['hp_ratio'] = (int(current_hp), int(max_hp))

def combat_data_template():
    return {
        'start_time': None,
        'player_ids': set(),
        'player_info': {},
        'total_slots': defaultdict(int),
        'total_max_slots': defaultdict(int),
        'monsters': [],
        'rounds': 0 
    }

def process_file(file_path, combat_data_template_func):
    combat_data = defaultdict(combat_data_template_func)
    with gzip.open(file_path, 'rt', encoding='utf-8') as file:
        last_human_readable = {}
        for line in file:
            event = json.loads(line)
            combat_id = event.get("combat_id")

            if event.get("event_type") == "combat_start":
                combat_data[combat_id]['start_time'] = event.get("timestamp")

            elif event.get("event_type") == "command" and event.get("command_name") == "init join":
                process_player_join_event(event, combat_data, combat_id)

            elif event.get("event_type") == "combat_state_update":
                process_combat_state_update(event, combat_data, combat_id)

            human_readable = event.get("human_readable")
            if human_readable:
                last_human_readable[combat_id] = human_readable

        update_player_health_from_human_readable(last_human_readable, combat_data)

    return combat_data

def merge_combat_data(existing_data, new_data):
    for combat_id, data in new_data.items():
        if combat_id not in existing_data:
            existing_data[combat_id] = data
        else:
            existing_data[combat_id]['player_ids'].update(data['player_ids'])
            for name, info in data['player_info'].items():
                if name in existing_data[combat_id]['player_info']:
                    existing_player_info = existing_data[combat_id]['player_info'][name]
                    existing_player_info.update(info)
                else:
                    existing_data[combat_id]['player_info'][name] = info
            
            # Merge monsters, ensuring no duplicates
            existing_monsters_ids = {monster['monster_id'] for monster in existing_data[combat_id]['monsters']}
            for new_monster in data['monsters']:
                if new_monster['monster_id'] not in existing_monsters_ids:
                    existing_data[combat_id]['monsters'].append(new_monster)
                    existing_monsters_ids.add(new_monster['monster_id'])

            # Merge slots and max_slots by adding values for each slot
            for slot, value in data['total_slots'].items():
                existing_data[combat_id]['total_slots'][slot] += value
            for slot, value in data['total_max_slots'].items():
                existing_data[combat_id]['total_max_slots'][slot] += value

    return existing_data

def process_combat_files(files):
    combat_datas = {}

    for file_path in tqdm(files, desc="Processing files"):
        combat_data_latest = process_file(file_path, combat_data_template)
        combat_datas = merge_combat_data(combat_datas, combat_data_latest)

    return combat_datas

def flatten_combat_data(combat_data):
    flattened_data = []

    for combat_id, data in combat_data.items():
        flattened_info = {
            'combat_id': combat_id,
            'start_time': data['start_time'],
            'player_ids': list(data['player_ids']),
            'total_slots': dict(data['total_slots']),
            'total_max_slots': dict(data['total_max_slots']),
            'monsters_info': data['monsters'],
            'party_size': len(data['player_info']),
            }
        flattened_data.append(flattened_info)

    return flattened_data
##############################################################################################

def calculate_averages_for_encounters(encounters_dict):
    monster_levels = []  
    player_healths = []

    for encounter_data in tqdm(encounters_dict.values(), desc="Calculating averages"):
        # Process monster levels
        if encounter_data['monsters']:
            total_level = sum(monster['level'] for monster in encounter_data['monsters'])
            monster_levels.append(total_level)
        else:
            monster_levels.append(0)  # Append 0 if no monsters are present
        
        # Process player healths
        player_infos = encounter_data['player_info'].values()
        total_health_ratio = sum(player['hp_ratio'][0] for player in player_infos if player.get('hp_ratio'))
        total_max_health = sum(player['hp_ratio'][1] for player in player_infos if player.get('hp_ratio'))
        
        if total_max_health > 0:
            player_healths.append(total_health_ratio / total_max_health * 100)
        else:
            player_healths.append(0)  # Append 0 if no valid player health data is available

    # Calculate overall averages
    monster_average = np.mean(monster_levels)
    player_average = np.mean(player_healths)

    return monster_levels, player_healths, monster_average, player_average
##############################################################################################

def calculate_metrics(final_combat_data):
    data_rows = []

    for combat_id, combat_info in final_combat_data.items():
        monster_types = [monster['monster_name'] for monster in combat_info.get('monsters', [])]
        monster_number = len(monster_types)
        monster_total_level = sum(monster['level'] for monster in combat_info.get('monsters', []))

        row = {
            'combat_id': combat_id,
            'start_time': combat_info.get('start_time', ''),
            'number_of_rounds': combat_info.get('rounds', 0),
            'player_ids': list(combat_info['player_ids']),
            'player_info': list(combat_info['player_info'].values()),
            'monsters_info': combat_info['monsters'],
            'party_size': len(combat_info['player_ids']),
            'total_slots': {k: v for k, v in combat_info['total_slots'].items()},
            'total_max_slots': {k: v for k, v in combat_info['total_max_slots'].items()},
            'party_size': len(combat_info.get('player_ids', [])),
            'party_classes_with_level': [],
            'party_total_class_composition': [],
            'player_individual_hp_ratios': [],
            'player_individual_ac': [],
            'player_individual_prof_bonus': [],
            'player_individual_strength': [],
            'player_individual_dexterity': [],
            'player_individual_constitution': [],
            'player_individual_intelligence': [],
            'player_individual_wisdom': [],
            'player_individual_charisma': [],
            'monster_types': monster_types,
            'monster_number': monster_number,
            'monster_total_level': monster_total_level,
            'party_total_level': 0,
            'party_level1_spellslots': 0,
            'party_level2_spellslots': 0,
            'party_level3_spellslots': 0,
            'party_level4_spellslots': 0,
            'party_level5_spellslots': 0,
            'party_level6_spellslots': 0,
            'party_level7_spellslots': 0,
            'party_level8_spellslots': 0,
            'party_level9_spellslots': 0,
        }
        
        total_precombat_hp, total_postcombat_hp = 0, 0
        party_total_level = 0
        # Initialize counters for spell slots by level
        spell_slots = {level: 0 for level in range(1, 10)}

        for player_info in combat_info.get('player_info', {}).values():
            classes_with_levels = player_info.get('class', [])
            row['party_classes_with_level'].extend(classes_with_levels)
            row['party_total_class_composition'].extend(cls for cls, _ in classes_with_levels)

            for cls_level in player_info.get('class', []):
                row['party_total_level'] += cls_level[1]
            
            hp_ratio = player_info.get('hp_ratio')
            if hp_ratio:
                row['player_individual_hp_ratios'].append(hp_ratio)
                total_precombat_hp += hp_ratio[1]
                total_postcombat_hp += hp_ratio[0]
            
            row['player_individual_ac'].append(player_info.get('ac', 0))

            # Collect and append individual player stats
            stats = player_info.get('stats', {})
            row['player_individual_prof_bonus'].append(stats.get('prof_bonus', 0))
            row['player_individual_strength'].append(stats.get('strength', 0))
            row['player_individual_dexterity'].append(stats.get('dexterity', 0))
            row['player_individual_constitution'].append(stats.get('constitution', 0))
            row['player_individual_intelligence'].append(stats.get('intelligence', 0))
            row['player_individual_wisdom'].append(stats.get('wisdom', 0))
            row['player_individual_charisma'].append(stats.get('charisma', 0))

            slots = player_info.get('slots', {})
            for level, count in slots.items():
                spell_slots[int(level)] += count
            
        # Add spell slot totals to the row
        for level, total in spell_slots.items():
            row[f'party_level{level}_spellslots'] = total
        
        row['party_total_ac'] = sum(row['player_individual_ac'])
        row['party_total_precombat_hp'] = total_precombat_hp
        row['party_total_postcombat_hp'] = total_postcombat_hp
        row['party_total_hpratio'] = total_postcombat_hp / total_precombat_hp if total_precombat_hp > 0 else 'NA'
        row['party_total_prof_bonus'] = sum(row['player_individual_prof_bonus'])
        row['party_total_strength'] = sum(row['player_individual_strength'])
        row['party_total_dexterity'] = sum(row['player_individual_dexterity'])
        row['party_total_constitution'] = sum(row['player_individual_constitution'])
        row['party_total_intelligence'] = sum(row['player_individual_intelligence'])
        row['party_total_wisdom'] = sum(row['player_individual_wisdom'])
        row['party_total_charisma'] = sum(row['player_individual_charisma'])

        data_rows.append(row)

    final_combat_data_df = pd.DataFrame(data_rows).set_index('combat_id').reset_index()
    return final_combat_data_df

# Workflow

In [8]:
#file_pattern = '../anonymized/data/*.jsonl.gz'
file_pattern = 'C:/Users/josep/OneDrive/Desktop/Erdos/anonymized/data/*.jsonl.gz'
files = glob.glob(file_pattern)
print(len(files))

24748


In [9]:
final_combat_data = process_combat_files(files)

Processing files: 100%|██████████| 24748/24748 [12:08<00:00, 33.96it/s]


In [10]:
mls, phs, monster_average, player_average = calculate_averages_for_encounters(final_combat_data)
monster_average, player_average
# Check player average code - seems wrong

Calculating averages: 100%|██████████| 24748/24748 [00:00<00:00, 366543.20it/s]


(13.943369161144336, 2000363737.58867)

In [12]:
final_combat_data_df = calculate_metrics(final_combat_data)
display(final_combat_data_df.head(10))

Unnamed: 0,combat_id,start_time,number_of_rounds,player_ids,player_info,monsters_info,party_size,total_slots,total_max_slots,party_classes_with_level,...,party_total_precombat_hp,party_total_postcombat_hp,party_total_hpratio,party_total_prof_bonus,party_total_strength,party_total_dexterity,party_total_constitution,party_total_intelligence,party_total_wisdom,party_total_charisma
0,1653718840-26eb6840-30d2-4231-a510-493e98f19b40,1653719000.0,0,[],[],[],0,{},{},[],...,0,0,,0,0,0,0,0,0,0
1,1653817902-15ec25a2-6757-4ceb-ac41-d44352b97705,1653818000.0,1,[209330647903743785],"[{'hp_ratio': (65, 65), 'class': [('Ranger', 1...",[{'monster_id': 'c0cdb522-64c2-42ee-b1a8-6ef22...,1,"{'1': 4, '2': 3, '3': 2, '4': 0, '5': 0, '6': ...","{'1': 4, '2': 3, '3': 2, '4': 0, '5': 0, '6': ...","[(Ranger, 10)]",...,65,65,1.0,4,14,16,12,12,14,11
2,1653818306-26460642-20fa-4d1f-9152-a53493cf7cb3,1653818000.0,0,[209330647903743785],"[{'hp_ratio': (65, 65), 'class': [('Ranger', 1...",[],1,"{'1': 4, '2': 3, '3': 2, '4': 0, '5': 0, '6': ...","{'1': 4, '2': 3, '3': 2, '4': 0, '5': 0, '6': ...","[(Ranger, 10)]",...,65,65,1.0,4,14,16,12,12,14,11
3,1653819786-0644834c-bd73-43a3-98bb-6a36fa3c0fca,1653820000.0,1,[],[],[{'monster_id': '4e0200d1-3fc1-4d96-97cc-3db4d...,0,{},{},[],...,0,0,,0,0,0,0,0,0,0
4,1653820492-5e967803-0dfc-44a3-aaf6-8a2637750af0,1653820000.0,0,[],[],[],0,{},{},[],...,0,0,,0,0,0,0,0,0,0
5,1653820568-5c545c92-95ac-4c7f-b677-0276489affbe,1653821000.0,0,[],[],[],0,{},{},[],...,0,0,,0,0,0,0,0,0,0
6,1653820676-2737863e-d1fe-4249-99a2-2fce622a5b73,1653821000.0,0,[],[],[],0,{},{},[],...,0,0,,0,0,0,0,0,0,0
7,1653820936-807a2ff0-373e-41bf-854e-6799815211ed,1653821000.0,0,[],[],[],0,{},{},[],...,0,0,,0,0,0,0,0,0,0
8,1653853909-2a642a70-bfca-4daa-8056-61d5198075f9,1653854000.0,0,[142344480852772526],"[{'hp_ratio': (57, 72), 'class': [('Bard', 7),...",[],1,"{'1': 5, '2': 2, '3': 3, '4': 1, '5': 0, '6': ...","{'1': 6, '2': 3, '3': 3, '4': 1, '5': 0, '6': ...","[(Bard, 7), (Warlock, 2)]",...,72,57,0.791667,4,11,17,12,12,12,20
9,1653856349-96235919-cd9d-4a0d-9ea9-9a15cf968979,1653856000.0,0,[],[],[{'monster_id': 'f501c8ae-3303-49ad-816e-28af0...,0,{},{},[],...,0,0,,0,0,0,0,0,0,0


In [13]:
csv_file_path = './unfiltered_combat_data_df_03-30_I.csv'
final_combat_data_df.to_csv(csv_file_path, index=True)