# Libraries

In [1]:
import numpy as np
import pandas as pd
import json
import glob
import ast
import gzip
import os
import yaml
from tqdm import tqdm
import re
from collections import defaultdict

# Functions

In [2]:
def filter_std_player_classes_monsters(df):
    # Convert the list of standard classes to a set for faster membership checks
    standard_classes = {
        'Barbarian', 'Bard', 'Cleric', 'Druid', 'Fighter', 'Monk', 
        'Paladin', 'Ranger', 'Rogue', 'Sorcerer', 'Warlock', 'Wizard', 'Blood Hunter'
    }

    def are_all_classes_standard(player_list_str):
        # Convert the string representation of the list only once
        player_list = ast.literal_eval(player_list_str)
        for player in player_list:
            # Iterate through each class information tuple
            for class_info in player['class']:
                # Check against the set of standard classes
                if class_info[0].strip() not in standard_classes:
                    return False
        return True
    
    # Filter rows based on player_info
    tqdm.pandas(desc="Filtering Players")
    df_filtered = df[df['player_info'].progress_apply(are_all_classes_standard)]
    
    # Filter rows based on monsters_info
    df_filtered = df_filtered[df_filtered['monsters_info'] != "[]"]
    
    return df_filtered

# Workflow

In [3]:
combat_data_df = pd.read_csv("final_combat_data_df_03-25_FULL.csv")
print(combat_data_df.shape)

combat_data_filtered_df = filter_std_player_classes_monsters(combat_data_df)
print(combat_data_filtered_df.shape)

(24748, 44)


Filtering Players: 100%|██████████| 24748/24748 [00:06<00:00, 3840.21it/s]


(14824, 44)


In [4]:
csv_file_path = './filtered_combat_data_df.csv'
combat_data_filtered_df.to_csv(csv_file_path, index=True)