# Libraries

In [21]:
import numpy as np
import pandas as pd
import json
import glob
import ast
import gzip
import os
import yaml
from tqdm import tqdm
import re
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt

In [48]:
# Load data

df = pd.read_csv('../extracted_data_24_4_9.csv')

# Functions

In [49]:
### Run this code on the unfiltered CSV that was extracted from the raw data

# Applying some reasonable filters

def filter_std_player_classes_monsters(df):
    # Convert the list of standard classes to a set for faster membership checks
    standard_classes = {
        'Barbarian', 'Bard', 'Cleric', 'Druid', 'Fighter', 'Monk', 
        'Paladin', 'Ranger', 'Rogue', 'Sorcerer', 'Warlock', 'Wizard', 'Blood Hunter'
    }

    def are_all_classes_standard(player_list_str):
        # Convert the string representation of the list only once
        player_list = ast.literal_eval(player_list_str)
        for player in player_list:
            # Iterate through each class information tuple
            for class_info in player['class']:
                # Check against the set of standard classes
                if class_info[0].strip() not in standard_classes:
                    return False
        return True
    
    # Filter rows based on player_info
    tqdm.pandas(desc="Filtering Players")
    df_filtered = df[df['player_info'].progress_apply(are_all_classes_standard)]
    
    # Filter rows based on monsters_info
    df_filtered = df_filtered[df_filtered['monsters_info'] != "[]"]

    # Filter rows based on party total hpratio
    df_filtered = df_filtered.dropna(subset=['party_total_hpratio'])
    
    # Filter rows based on party_total_precombat_hp
    df_filtered = df_filtered[df_filtered['party_total_precombat_hp']<1e10]

    # Filter rows based on party_total_precombat_hp
    df_filtered = df_filtered[df_filtered['party_total_postcombat_hp']<1e10]
    
    return df_filtered

In [50]:
filtered_df = filter_std_player_classes_monsters(df)


Filtering Players: 100%|██████████| 24748/24748 [00:07<00:00, 3446.60it/s]


In [51]:
# Applying agreed upon filters


# Dropping party sizes below 10
filtered_df = filtered_df[filtered_df['party_size'] < 10]


rows_to_drop = []

# Dropping levels above 20
for index, row in filtered_df.iterrows():
    player_info = ast.literal_eval(row['player_info'])
    for d in player_info:
        total_lvl = sum(class_lvl[1] for class_lvl in d['class'])
        if total_lvl > 20:
            rows_to_drop.append(index)
            break  # No need to check further dicts in this row

filtered_df = filtered_df.drop(rows_to_drop)


rows_to_drop = []

# Droping those with either no hp ratio or hp's above 350
for index, row in filtered_df.iterrows():
    player_info = ast.literal_eval(row['player_info'])
    for d in player_info:
        # Corrected the check for None
        if d['hp_ratio'] is None:
            rows_to_drop.append(index)  # Appending index for consistency
            break  # Exit the loop after finding the condition met
        else:
            max_health = d['hp_ratio'][1]
            if max_health > 350:
                rows_to_drop.append(index)
                break  # No need to check further dicts in this row

                
filtered_df = filtered_df.drop(rows_to_drop)


rows_to_drop = []

# Dropping those either with no AC value(s) or AC's above 38
for index, row in filtered_df.iterrows():
    player_info = ast.literal_eval(row['player_info'])
    for d in player_info:
        # Corrected the check for None
        if d['ac'] is None:
            rows_to_drop.append(index)  # Appending index for consistency
            break  # Exit the loop after finding the condition met
        else:
            ac = d['ac']
            if ac > 38:
                rows_to_drop.append(index)
                break  # No need to check further dicts in this row


filtered_df = filtered_df.drop(rows_to_drop)


# Dropping who either do not have an ability score or if an ability score is above 22
stats = ['strength', 'dexterity', 'constitution', 'intelligence', 'wisdom', 'charisma']

rows_to_drop = []
for index, row in filtered_df.iterrows():
    player_info = ast.literal_eval(row['player_info'])
    for d in player_info:
        # Check if 'stats' is None
        if d['stats'] is None:
            rows_to_drop.append(index)  # Appending index for consistency
            break  # Exit the loop after finding the condition met
        else:
            # Iterate over the items in the 'stats' dictionary
            for key, value in d['stats'].items():
                if key in stats and value > 22:
                    rows_to_drop.append(index)
                    break  # Exit the loop after finding a stat greater than 22


filtered_df = filtered_df.drop(rows_to_drop)

filtered_df.shape

(10842, 43)

In [52]:
# Adding player to monster and monster to player ratios 
filtered_df['player_monster_ratio'] = filtered_df['party_size']/filtered_df['monster_number']

filtered_df['monster_player_ratio'] = filtered_df['monster_number']/filtered_df['party_size']

# Creating empty column for party_max_hp
filtered_df['party_max_hp'] = 0

# Correcting total_precombat_hp column and calculating party max hp
for index, row in filtered_df.iterrows():
    precombat_hp = 0
    max_hp = 0
    player_info = ast.literal_eval(row['player_info'])
    for h in player_info:
        precombat_hp += h.get('health_begin', 0)  # Use .get to avoid KeyError if 'health_begin' is missing
        max_hp += h['hp_ratio'][1]  # Assuming 'hp_ratio' always exists and has two elements

    # Assign calculated values back to the DataFrame
    filtered_df.at[index, 'party_total_precombat_hp'] = precombat_hp
    filtered_df.at[index, 'party_max_hp'] = max_hp

    # Calculate 'party_total_hpratio'
    if precombat_hp == 0:
        filtered_df.at[index, 'party_total_hpratio'] = 0
    else:
        filtered_df.at[index, 'party_total_hpratio'] = row['party_total_postcombat_hp'] / precombat_hp

In [53]:
# One-hot encoding all the classews

filtered_df['party_total_class_composition'] = filtered_df['party_total_class_composition'].apply(lambda x: ast.literal_eval(x))

# Get all unique classes
all_classes_str = set(class_name.strip() for sublist in filtered_df['party_total_class_composition'] for class_name in sublist)

# Initialize columns for each class with zeros
for class_name in all_classes_str:
    filtered_df[class_name] = 0

# Fill in the DataFrame with one-hot encoding
for index, row in filtered_df.iterrows():
    corrected_class_names = []
    for class_name in row['party_total_class_composition']:
        # Correct the class name if needed
        if class_name == 'Barbarian ':
            class_name = 'Barbarian'
        corrected_class_names.append(class_name)
        filtered_df.at[index, class_name.strip()] = 1
    # Update the row with corrected class names if necessary
    filtered_df.at[index, 'party_total_class_composition'] = corrected_class_names



In [54]:
filtered_df.shape

(10842, 59)

In [55]:
filtered_df.columns

Index(['combat_id', 'start_time', 'player_ids', 'player_info', 'monsters_info',
       'party_size', 'total_slots', 'total_max_slots',
       'party_classes_with_level', 'party_total_class_composition',
       'player_individual_hp_ratios', 'player_individual_ac',
       'player_individual_prof_bonus', 'player_individual_strength',
       'player_individual_dexterity', 'player_individual_constitution',
       'player_individual_intelligence', 'player_individual_wisdom',
       'player_individual_charisma', 'monster_types', 'monster_number',
       'monster_total_level', 'party_total_level', 'party_level1_spellslots',
       'party_level2_spellslots', 'party_level3_spellslots',
       'party_level4_spellslots', 'party_level5_spellslots',
       'party_level6_spellslots', 'party_level7_spellslots',
       'party_level8_spellslots', 'party_level9_spellslots', 'party_total_ac',
       'party_total_precombat_hp', 'party_total_postcombat_hp',
       'party_total_hpratio', 'party_total_prof_b

In [56]:
filtered_df.to_csv('filtered_24_4_10')