# Load JSON

In [10]:
from __future__ import annotations
import json
from pathlib import Path
from collections import Counter
import itertools as it
from typing import Any, Iterable
import pandas as pd

JSON_PATH = Path("./tournament_teams.json")

data = json.loads(JSON_PATH.read_text())

print(f"Top-level type: {type(data).__name__}")
if isinstance(data, dict):
    print(f"Top-level keys: {list(data.keys())[:10]}")
else:
    print(f"Top-level length: {len(data)}")

def preview(obj: Any, n: int = 3) -> Any:
    """Return a quick slice of large objects for inspection."""
    if isinstance(obj, list):
        return obj[:n]
    if isinstance(obj, dict):
        return {k: obj[k] for k in list(obj)[:n]}
    return obj

# preview(data)

Top-level type: dict
Top-level keys: ['generated_at', 'source', 'tournaments']


# Basic Overview

In [34]:
# Adjust indexes here if you want a different tournament slice.
if isinstance(data, dict) and data.get("tournaments"):
    first_entry = data["tournaments"][8]
elif isinstance(data, list) and data:
    first_entry = data[0]
else:
    first_entry = {}

print("Keys on first tournament entry:", list(first_entry.keys()))

# Explore first few players; tweak slicing for deeper inspection.
players = first_entry.get("players", [])
players[:2]

# Filter players with issues
players_with_issues = [p for p in players if len(p.get("issues", [])) > 0]
print(f"Players with issues: {len(players_with_issues)}")
all_issues = set([issue for player in players_with_issues for issue in player.get("issues", [])])
print(f"Total issues across all players: {len(all_issues)}")
# Write all issues to a text file
with open("all_issues.txt", "w") as f:
    for issue in all_issues:
        f.write(issue + "\n")

print(f"Wrote {len(all_issues)} issues to all_issues.txt")


Keys on first tournament entry: ['tournament_id', 'name', 'date', 'division', 'players']
Players with issues: 701
Total issues across all players: 94
Wrote 94 issues to all_issues.txt


In [None]:
data.keys()

dict_keys(['generated_at', 'source', 'tournaments'])

In [28]:
list_tournaments = []
for t in data["tournaments"]:
    list_tournaments.append(int(t['tournament_id']))

sorted(list_tournaments, reverse=True)[:10]

# Find the index where tournament_id equals 160
target_id = 160
index_160 = list_tournaments.index(target_id) if target_id in list_tournaments else None
print(f"Index of tournament 160: {index_160}")

Index of tournament 160: 8


In [27]:
# Count missing vs present values for key player fields.
fields = ["player_name", "country", "placing", "is_valid", "issues"]
counts = {}
for field in fields:
    counts[field] = Counter(
        "missing" if player.get(field) in (None, "", []) else "present"
        for player in players
    )
counts

{'player_name': Counter({'present': 92}),
 'country': Counter({'present': 92}),
 'placing': Counter({'present': 92}),
 'is_valid': Counter({'present': 92}),
 'issues': Counter({'missing': 92})}

# Sanity Checks

In [4]:
# Basic anomaly checks; edit predicates or slices to dig deeper.
empty_team_players = [p for p in players if not p.get("pokemon")]
missing_showdown = [p for p in players if not p.get("showdown_team")]
invalid_players = [p for p in players if p.get("is_valid") is False]

print(f"Players with empty teams: {len(empty_team_players)}")
print(f"Players missing showdown string: {len(missing_showdown)}")
print(f"Players flagged invalid: {len(invalid_players)}")

# Preview problematic records for debugging.
invalid_players[:3]

Players with empty teams: 92
Players missing showdown string: 92
Players flagged invalid: 0


[]