In [19]:
import pandas as pd
import numpy as np
import re

# Read the CSV file
df = pd.read_csv('raw_battlelogs.csv')

# Extract the specified columns
columns_to_extract = ['event_mode', 'event_map', 'battle_mode', 'battle_type', 'battle_result', 'battle_teams']
extracted_data = df[columns_to_extract]

# Filter out friendly games and draws
filtered_data = extracted_data[(extracted_data['battle_type'] != 'friendly') & (extracted_data['battle_result'] != 'draw')]

# Consolidate 'event_mode' and 'battle_mode' into one column
filtered_data['mode'] = filtered_data.apply(
    lambda row: row['event_mode'] if pd.notna(row['event_mode']) else row['battle_mode'], axis=1
)

# Remove rows where both 'event_mode' and 'battle_mode' are empty
filtered_data = filtered_data.dropna(subset=['mode'])

# Remove rows with specific modes
modes_to_remove = ['bigGame', 'bossFight', 'roboRumble', 'lastStand', 'soloShowdown', 'duoShowdown']
filtered_data = filtered_data[~filtered_data['mode'].isin(modes_to_remove)]

# Drop the original 'event_mode' and 'battle_mode' columns
filtered_data = filtered_data.drop(columns=['event_mode', 'battle_mode'], errors='ignore')


# Function to extract brawler data using regular expressions
def extract_brawler_data(battle_teams):
    if pd.isna(battle_teams):
        return []
    pattern = r"'brawler': \{[^}]+\}"
    matches = re.findall(pattern, battle_teams)
    brawlers = []
    for match in matches:
        brawler_info = eval(match.split(": ", 1)[1])
        brawlers.append(brawler_info['name'])
        brawlers.append(brawler_info['power'])
        brawlers.append(brawler_info['trophies'])
    return brawlers

# Apply the function to extract brawler data
filtered_data['brawler_data'] = filtered_data['battle_teams'].apply(extract_brawler_data)

# Create new column names
new_columns = []
for team in range(1, 3):
    for brawler in range(1, 4):
        new_columns.append(f'team {team} brawler {brawler}')
        new_columns.append(f'team {team} brawler {brawler} power')
        new_columns.append(f'team {team} brawler {brawler} trophies')

# Create a DataFrame with the new columns
brawler_df = pd.DataFrame(filtered_data['brawler_data'].tolist(), columns=new_columns)

# Drop the original 'battle_teams' and 'brawler_data' columns
filtered_data = filtered_data.drop(columns=['battle_teams', 'brawler_data'])

# Concatenate the new columns with the filtered_data DataFrame
final_data = pd.concat([filtered_data, brawler_df], axis=1)
final_data = final_data.dropna()

# Separate 'battle_result' into its own list
battle_result = final_data['battle_result'].tolist()

# Remove 'battle_result' from the filtered data
final_data = final_data.drop(columns=['battle_result'])

# Convert the final DataFrame to a NumPy matrix
numpy_matrix = final_data.to_numpy()

# Print the results
print("Final NumPy Matrix:")
print(numpy_matrix)
print("\nBattle Result List:")
print(battle_result)
print(len(battle_result))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['mode'] = filtered_data.apply(


Final NumPy Matrix:
[['Deep Diner' 'ranked' 'gemGrab' ... 'JESSIE' 10.0 682.0]
 ['Sneaky Fields' 'ranked' 'brawlBall' ... 'FRANK' 11.0 905.0]
 ['Sneaky Fields' 'ranked' 'brawlBall' ... 'DARRYL' 11.0 752.0]
 ...
 ['Goldarm Gulch' 'ranked' 'knockout' ... 'MAX' 11.0 805.0]
 ['Goldarm Gulch' 'ranked' 'knockout' ... 'GRAY' 11.0 15.0]
 ['Goldarm Gulch' 'ranked' 'knockout' ... 'GRAY' 11.0 15.0]]

Battle Result List:
['defeat', 'defeat', 'defeat', 'defeat', 'victory', 'defeat', 'victory', 'defeat', 'defeat', 'victory', 'defeat', 'defeat', 'victory', 'victory', 'defeat', 'victory', 'defeat', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'victory', 'defeat', 'victory', 'victory', 'defeat', 'defeat', 'defeat', 'victory', 'victory', 'victory', 'defeat', 'victory', 'victory', 'defeat', 'victory', 'victory', 'victory', 'victory', 'victory', 'defeat',

Unnamed: 0,event_map,battle_type,mode,team 1 brawler 1,team 1 brawler 1 power,team 1 brawler 1 trophies,team 1 brawler 2,team 1 brawler 2 power,team 1 brawler 2 trophies,team 1 brawler 3,...,team 1 brawler 3 trophies,team 2 brawler 1,team 2 brawler 1 power,team 2 brawler 1 trophies,team 2 brawler 2,team 2 brawler 2 power,team 2 brawler 2 trophies,team 2 brawler 3,team 2 brawler 3 power,team 2 brawler 3 trophies
0,Deep Diner,ranked,gemGrab,GUS,11.0,717.0,CROW,11.0,791.0,DARRYL,...,701.0,MORTIS,11.0,791.0,FANG,11.0,740.0,JESSIE,10.0,682.0
1,Sneaky Fields,ranked,brawlBall,RICO,11.0,782.0,NITA,11.0,837.0,EL PRIMO,...,890.0,MORTIS,11.0,799.0,EMZ,10.0,784.0,FRANK,11.0,905.0
2,Sneaky Fields,ranked,brawlBall,POCO,11.0,673.0,JACKY,10.0,681.0,SPIKE,...,704.0,GALE,11.0,651.0,SURGE,11.0,649.0,DARRYL,11.0,752.0
3,Sneaky Fields,ranked,brawlBall,LEON,10.0,641.0,SANDY,10.0,604.0,BIBI,...,663.0,BIBI,11.0,654.0,JACKY,10.0,625.0,CROW,11.0,674.0
4,Sneaky Fields,ranked,brawlBall,COLETTE,7.0,403.0,BUSTER,11.0,419.0,GENE,...,411.0,MAX,8.0,395.0,COLETTE,7.0,390.0,FANG,7.0,446.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373561,Goldarm Gulch,ranked,knockout,JACKY,10.0,829.0,SURGE,10.0,756.0,DARRYL,...,861.0,MORTIS,11.0,791.0,SHELLY,10.0,799.0,RUFFS,10.0,765.0
373562,Goldarm Gulch,ranked,knockout,RICO,11.0,792.0,MORTIS,11.0,800.0,TICK,...,795.0,MAX,11.0,826.0,TICK,10.0,829.0,RICO,11.0,812.0
373563,Goldarm Gulch,ranked,knockout,CROW,11.0,872.0,PIPER,11.0,910.0,STU,...,888.0,BELLE,11.0,899.0,PIPER,10.0,908.0,MAX,11.0,805.0
373564,Goldarm Gulch,ranked,knockout,BEA,11.0,14.0,PENNY,11.0,16.0,FANG,...,16.0,PIPER,11.0,12.0,CARL,11.0,15.0,GRAY,11.0,15.0


In [14]:
num_nan_event_map = final_data['mode'].isna().sum()
print(f"\nNumber of NaN values in 'event_map': {num_nan_event_map}")


Number of NaN values in 'event_map': 99862


In [3]:
import re

# The input string
input_string = "[[{'tag': '#LGC0PQ98', 'name': '🍁PassØut™', 'brawler': {'id': 16000029, 'name': 'BEA', 'power': 11, 'trophies': 14}}, {'tag': '#VULUQUY8', 'name': '金|𝔻ανσχ💔', 'brawler': {'id': 16000019, 'name': 'PENNY', 'power': 11, 'trophies': 16}}, {'tag': '#92CRYR8JR', 'name': 'Adͥℝiͣaͫn🌎⚡', 'brawler': {'id': 16000054, 'name': 'FANG', 'power': 11, 'trophies': 16}}], [{'tag': '#PJJJR9JQ2', 'name': 'schwule gans', 'brawler': {'id': 16000015, 'name': 'PIPER', 'power': 11, 'trophies': 12}}, {'tag': '#YVQQJY0', 'name': 'мєℓιн™', 'brawler': {'id': 16000025, 'name': 'CARL', 'power': 11, 'trophies': 15}}, {'tag': '#9GYCJYGP', 'name': 'Armo', 'brawler': {'id': 16000064, 'name': 'GRAY', 'power': 11, 'trophies': 15}}]]"

# Regular expression to match the desired substrings
pattern = r"'brawler': \{[^}]+\}"

# Find all matches
matches = re.findall(pattern, input_string)

# Store matches in a list
brawler_list = matches

# Print the list
for brawler in brawler_list:
    print(brawler)

'brawler': {'id': 16000029, 'name': 'BEA', 'power': 11, 'trophies': 14}
'brawler': {'id': 16000019, 'name': 'PENNY', 'power': 11, 'trophies': 16}
'brawler': {'id': 16000054, 'name': 'FANG', 'power': 11, 'trophies': 16}
'brawler': {'id': 16000015, 'name': 'PIPER', 'power': 11, 'trophies': 12}
'brawler': {'id': 16000025, 'name': 'CARL', 'power': 11, 'trophies': 15}
'brawler': {'id': 16000064, 'name': 'GRAY', 'power': 11, 'trophies': 15}
