# Scrape

In [1]:
from bs4 import BeautifulSoup
import requests
import polars as pl
import re

nemelee_tournaments_url = 'https://braacket.com/league/nemelee/tournament?rows=200'

# Define scraper components

In [2]:
def scrape_tournament_tags(url):
    response = requests.get(url)
    tournament_soup = BeautifulSoup(response.content, 'html.parser')
    num_pages = (
        tournament_soup
        .find(class_='search-pagination')
        .find_all(class_='input-group-addon')[-1]
        .text.split(' ')[-1].strip()
    )

    tournament_tags = []
    for i in range(1, int(num_pages) + 1):
        url = f'{nemelee_tournaments_url}&page={i}'
        response = requests.get(url)
        tournament_soup = BeautifulSoup(response.content, 'html.parser')
        tournament_tags += list(
            map(
                lambda x: x.find_parent(class_='panel'),
                tournament_soup.find_all(string='Detail')
            )
        )
    return tournament_tags

tournament_tags = scrape_tournament_tags(nemelee_tournaments_url)

In [3]:
tournament_tags[0].find(class_='country_flag').find('img').get('src', None)

'/assets/images/country/flag/us.png'

In [4]:
def extract_tournament_data(tournament_tags):
    tournaments = []
    for tournament_tag in tournament_tags:
        data = {
            'url': tournament_tag.find(class_='panel-heading').find('a')['href'],
            'name': tournament_tag.find(class_='panel-heading').find('a').text.strip(),
            'date': tournament_tag.find(string='Date').parent
                     .find_next_sibling().text.strip() if tournament_tag.find(string='Date').parent.find_next_sibling() else None,
            'country': (tournament_tag.find(class_='country_flag').find('img').get('src', None)
                        if tournament_tag.find(class_='country_flag') else None),
            'region': (tournament_tag.find(class_='country_region_flag').get('src', None)
                       if tournament_tag.find(class_='country_region_flag') else None),
            'number_of_players': (int(tournament_tag.find(data_original_title_='Imported players').text.strip())
                                  if tournament_tag.find(data_original_title_='Imported players') else None)
        }
        tournaments.append(data)
    return tournaments

tournaments = extract_tournament_data(tournament_tags)
# Convert the list of dictionaries to a Polars DataFrame
tournaments_df = pl.DataFrame(tournaments, schema={col: pl.String for col in tournaments[0].keys()}).filter(pl.col('url').is_not_null())
# Save the DataFrame to a CSV file
tournaments_df


url,name,date,country,region,number_of_players
str,str,str,str,str,str
"""/tournament/633C3E6E-A97A-45F9…","""Mass Madness 50 - 10th Anniver…","""03 May 2025""","""/assets/images/country/flag/us…","""/assets/images/country/regions…",
"""/tournament/8E4620F6-BE69-49E7…","""Prodigy Smash Weekly #181""","""02 May 2025""","""/assets/images/country/flag/us…","""/assets/images/country/regions…",
"""/tournament/C9E99448-000D-4EFE…","""HoG: Phoenix 8.2""","""01 May 2025""","""/assets/images/country/flag/us…","""/assets/images/country/regions…",
"""/tournament/0AE52887-C69F-4345…","""Pichu Party Resurrected #7""","""01 May 2025""","""/assets/images/country/flag/us…","""/assets/images/country/regions…",
"""/tournament/4481369B-1408-4DA1…","""One Up Melee 4.30.25""","""30 April 2025""","""/assets/images/country/flag/us…","""/assets/images/country/regions…",
…,…,…,…,…,…
"""/tournament/6C5C5707-D7E8-44C9…","""New Game Plus Ultra 5""","""01 November 2017""",,,
"""/tournament/44A73460-4370-4763…","""OBELISK 74 + NEMESIS 45""","""01 November 2017""",,,
"""/tournament/E751F098-C096-4658…","""Multishine 103017""","""31 October 2017""",,,
"""/tournament/564F913B-F7D2-48EA…","""SFTP 26: A Stock is a Terrible…","""31 October 2017""",,,


In [9]:
match_url = f'https://braacket.com/{tournaments[0]['url']}/match'
print(match_url)

def scrape_stage_urls(match_url):
    response = requests.get(match_url)
    match_soup = BeautifulSoup(response.content, 'html.parser')
    stage_urls = list(set(
        map(
            lambda x: x['href'],
            match_soup.find_all(attrs={"href": re.compile(r"^/tournament/.*/stage/.*$")}),
        )
    ))
    return stage_urls

stage_urls = scrape_stage_urls(match_url)

https://braacket.com//tournament/633C3E6E-A97A-45F9-B1B4-1ADA348C350C/match


In [30]:
def scrape_stage_matches(tournament_url, stage_url):
    matches = []
    response = requests.get(f'https://braacket.com/{stage_url}')
    stage_soup = BeautifulSoup(response.content, 'html.parser')

    stage_encounters = stage_soup.find_all(class_='tournament_encounter-row')
    for encounter in stage_encounters:
        data = {
            'encounter_id': encounter.find(class_='tournament_encounter-id').text.strip(),
            'winner_url': encounter.find(class_=['tournament_encounter_opponent', 'winner']).find('a')['href'],
            'winner': encounter.find(class_=['tournament_encounter_opponent', 'winner']).text.strip(),
            'loser_url': encounter.find(class_=['tournament_encounter_opponent', 'loser']).find('a')['href'],
            'loser': encounter.find(class_=['tournament_encounter_opponent', 'loser']).text.strip(),
            'winner_score': encounter.find(class_=['tournament_encounter-score', 'winner']).text.strip(),
            'loser_score': encounter.find(class_=['tournament_encounter-score', 'loser']).text.strip(),
            'tournament_url': tournament_url,
        }
        matches.append(data)
    return matches

matches = scrape_stage_matches(tournaments[0]['url'], stage_urls[0])
# Convert the list of dictionaries to a Polars DataFrame
matches_df = pl.DataFrame(matches, schema={col: pl.String for col in matches[0].keys()})
# Save the DataFrame to a CSV file
matches_df


encounter_id,winner_url,winner,loser_url,loser,winner_score,loser_score,tournament_url
str,str,str,str,str,str,str,str
"""6""","""/tournament/633C3E6E-A97A-45F9…","""tedkittenski""","""/tournament/633C3E6E-A97A-45F9…","""tedkittenski""","""tedkittenski""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""12""","""/tournament/633C3E6E-A97A-45F9…","""Kumatora""","""/tournament/633C3E6E-A97A-45F9…","""Kumatora""","""Kumatora""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""16""","""/tournament/633C3E6E-A97A-45F9…","""Andrew""","""/tournament/633C3E6E-A97A-45F9…","""Andrew""","""Andrew""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""14""","""/tournament/633C3E6E-A97A-45F9…","""witdashifts | ren""","""/tournament/633C3E6E-A97A-45F9…","""witdashifts | ren""","""witdashifts | ren""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""10""","""/tournament/633C3E6E-A97A-45F9…","""Ferrari""","""/tournament/633C3E6E-A97A-45F9…","""Ferrari""","""0""","""Ferrari""","""/tournament/633C3E6E-A97A-45F9…"
…,…,…,…,…,…,…,…
"""252""","""/tournament/633C3E6E-A97A-45F9…","""oz""","""/tournament/633C3E6E-A97A-45F9…","""oz""","""oz""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""253""","""/tournament/633C3E6E-A97A-45F9…","""Ant""","""/tournament/633C3E6E-A97A-45F9…","""Ant""","""Ant""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""255""","""/tournament/633C3E6E-A97A-45F9…","""hc | saucymain""","""/tournament/633C3E6E-A97A-45F9…","""hc | saucymain""","""hc | saucymain""","""3""","""/tournament/633C3E6E-A97A-45F9…"
"""256""","""/tournament/633C3E6E-A97A-45F9…","""Ant""","""/tournament/633C3E6E-A97A-45F9…","""Ant""","""0""","""Ant""","""/tournament/633C3E6E-A97A-45F9…"


## Run Scraper

In [32]:
tournament_tags = scrape_tournament_tags(nemelee_tournaments_url)

tournaments = extract_tournament_data(tournament_tags)
tournaments_df = pl.DataFrame(tournaments, schema={col: pl.String for col in tournaments[0].keys()}).filter(pl.col('url').is_not_null())
tournaments_df.write_csv('data/tournaments.csv')

matches = []
for tournament in tournaments:
    tournament_matches = []
    try:
        match_url = f'https://braacket.com/{tournament["url"]}/match'
        stage_urls = scrape_stage_urls(match_url)
        for stage_url in stage_urls:
            tournament_matches += scrape_stage_matches(tournament['url'], stage_url)
    except Exception as e:
        print(f"Error processing tournament {tournament['name']}, {tournament['url']}: {e}")
        continue
    matches += tournament_matches

matches_df = pl.DataFrame(matches, schema={col: pl.String for col in matches[0].keys()})
matches_df.write_csv('data/matches.csv')

Error processing tournament One Up Melee 9.4.24, /tournament/343299CA-F3AE-47B1-9899-E6E0E1B17DFF: 'NoneType' object is not subscriptable
Error processing tournament New Game Plus Revival 6.9, /tournament/8DB1E88B-E0BF-414A-BD4C-18962149751B: 'NoneType' object is not subscriptable
Error processing tournament Pho Tai Melee: #28 - DOUBLES EDITION - $50 Pot Bonus!, /tournament/E0585517-99CA-4A62-AEAA-EE9B30C097F0: 'NoneType' object is not subscriptable
Error processing tournament New Game Plus Revival 4.17, /tournament/F8B2ADC3-BC70-4C78-B1B2-435D5B8DD5E7: 'NoneType' object is not subscriptable
Error processing tournament New Game Plus Ultra 13, /tournament/1D0685B3-41D2-42D7-A179-C3376527155C: 'NoneType' object is not subscriptable


In [29]:
matches_df

encounter_id,winner_url,winner,loser_url,loser,winner_score,loser_score
str,str,str,str,str,str,str
"""6""","""/tournament/633C3E6E-A97A-45F9…","""tedkittenski""","""/tournament/633C3E6E-A97A-45F9…","""tedkittenski""","""tedkittenski""","""3"""
"""12""","""/tournament/633C3E6E-A97A-45F9…","""Kumatora""","""/tournament/633C3E6E-A97A-45F9…","""Kumatora""","""Kumatora""","""3"""
"""16""","""/tournament/633C3E6E-A97A-45F9…","""Andrew""","""/tournament/633C3E6E-A97A-45F9…","""Andrew""","""Andrew""","""3"""
"""14""","""/tournament/633C3E6E-A97A-45F9…","""witdashifts | ren""","""/tournament/633C3E6E-A97A-45F9…","""witdashifts | ren""","""witdashifts | ren""","""3"""
"""10""","""/tournament/633C3E6E-A97A-45F9…","""Ferrari""","""/tournament/633C3E6E-A97A-45F9…","""Ferrari""","""0""","""Ferrari"""
…,…,…,…,…,…,…
"""43""","""/tournament/633C3E6E-A97A-45F9…","""bonfire10""","""/tournament/633C3E6E-A97A-45F9…","""bonfire10""","""bonfire10""","""3"""
"""7""","""/tournament/633C3E6E-A97A-45F9…","""MATE | Kalvar""","""/tournament/633C3E6E-A97A-45F9…","""MATE | Kalvar""","""MATE | Kalvar""","""3"""
"""45""","""/tournament/633C3E6E-A97A-45F9…","""bonfire10""","""/tournament/633C3E6E-A97A-45F9…","""bonfire10""","""bonfire10""","""3"""
"""46""","""/tournament/633C3E6E-A97A-45F9…","""Ember""","""/tournament/633C3E6E-A97A-45F9…","""Ember""","""Ember""","""3"""


# Calculate truskill

In [23]:
import trueskill

In [24]:
players = {}

def rate_players(row: dict) -> dict:
    # Create TrueSkill rating objects for the players
    players[row['winner_url']] = trueskill.Rating()
    players[row['loser_url']] = trueskill.Rating()

    result = {
        'winner_rating': players[row['winner_url']],
        'loser_rating': players[row['loser_url']],
    }

    # Update the ratings based on the match outcome
    new_winner_rating, new_loser_rating = trueskill.rate_1vs1(players[row['winner_url']], players[row['loser_url']])
    players[row['winner_url']] = new_winner_rating
    players[row['loser_url']] = new_loser_rating

    result.update({
        'new_winner_rating': new_winner_rating,
        'new_loser_rating': new_loser_rating,
    })
    return result

In [25]:
players = {}

matches_with_ratings = matches_df.with_columns(
    pl.struct(['winner_url', 'loser_url']).map_elements(rate_players, return_dtype=pl.Struct).alias('new_ratings')
).unnest('new_ratings')

In [26]:
matches_with_ratings.write_csv('data/matches-with-ratings.csv')

## Export ranking

In [27]:
ranking = pl.DataFrame({
    'player_url': [player_url for player_url in players.keys()],
    'rating': [rating for rating in players.values()],
})

In [28]:
ranking

player_url,rating
str,f64
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
…,…
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
"""/tournament/633C3E6E-A97A-45F9…",20.604168
