In [1]:
%matplotlib inline

In [2]:
from bs4 import BeautifulSoup
import datetime
from dataclasses import dataclass, field
from IPython.display import display, Markdown, HTML
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from typing import Callable, Dict, List

## Load data

In [3]:
# Go to https://ctcultra.com/result-2024/, lalu save page as HTML (page only)
def read_ctc(filename: str, table_id: str) -> pd.DataFrame:
    with open(filename) as f:
        doc = f.read()
    soup = BeautifulSoup(doc, 'html.parser')
    tabs = soup.find_all(id=table_id)
    assert len(tabs), f"{table_id} not found"
    
    tab = tabs[0]
    tbody = tab.find_all('tbody')[0]
    trs = tbody.find_all('tr')
    
    names = []
    genders = []
    times = []
    
    for tr in trs:
        tds = tr.find_all('td')
        assert len(tds)==9
        
        names.append(tds[1].get_text())
        genders.append(tds[2].get_text())
        times.append(tds[5].get_text())
    
    df = pd.DataFrame({'name': names, 'gender': genders, 'time': times})
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('FEMALE', 'f').replace('MALE', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    
    return df

def read_scorenow(filename: str) -> pd.DataFrame:
    df = pd.read_excel(filename)
    df = df[ df['bib'].notnull() ]
    df = df[ ~df['time'].isin(['DNS', 'DQ', 'Not started', 'Unofficial Winner'])]
    df = df[['name', 'gender', 'time']]
    
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('Female', 'f').replace('Male', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s.lower() not in ['cot', 'dnf', 'dq'] else pd.NaT )
    return df

# Untuk BTR, lihat di result table, lalu copy isi tabel satu per satu per halaman ke OO Calc
def read_btr(filename: str) -> pd.DataFrame:
    return read_scorenow(filename)

# Untuk Mantra, open result page, liat XHR requests, copy jsonnya
def read_mantra(filename: str) -> pd.DataFrame:
    with open(filename) as f:
        doc = json.load(f)
    
    results = doc['result']
    cols = list(results[0].keys())
    data = {col: [] for col in cols}
    for r in results:
        for c in cols:
            data[c].append(r[c])
            
    df = pd.DataFrame(data)
    df = df[['fullname', 'gender', 'age', 'time']]
    df = df.rename(columns={'fullname': 'name'})
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('Female', 'f').replace('Male', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}
    df['age'] = df['age'].fillna(0).astype(int)
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    df = df.reset_index(drop=True)
    return df

# Untuk Bromo Tengger, pakai result excel yg disediakan panitia
def read_bromotengger(filename: str) -> pd.DataFrame:
    df = pd.read_csv(filename)
    df = df[['fullname', 'gender', 'time']]
    df = df.rename(columns={'fullname': 'name'})
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('FEMALE', 'f').replace('MALE', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    df = df.reset_index(drop=True)
    return df

# Read results in sixrace.id
def read_sixrace(filename: str):
    with open(filename) as f:
        doc = f.read()
    soup = BeautifulSoup(doc, 'html.parser')
    tabs = soup.find_all('table', id='example')
    assert len(tabs), f"table id=example not found"
    
    tab = tabs[0]
    tbody = tab.find_all('tbody')[0]
    trs = tbody.find_all('tr')
    
    names = []
    times = []
    statuses = []
    
    for tr in trs:
        tds = tr.find_all('td')
        assert len(tds)==4
        
        names.append(tds[2].get_text().strip())
        infos = tds[3].get_text().split()
        times.append(infos[0])
        statuses.append(infos[1])
    
    df = pd.DataFrame({'name': names, 'time': times, 'status': statuses})
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    
    df = df[ df['status'] != 'DNS' ].reset_index(drop=True)
    statuses = df['status'].unique()
    assert set(statuses).issubset({'Finished', 'DNF', 'DQ'}), f"Found unknown status: " + str(statuses)
    
    max_time = df['time'].max()
    df.loc[ df['status']=='DNF', 'time'] = pd.NaT
    df = df.reset_index(drop=True)
    return df

# Untuk MSR, go https://sixrace.id/result/view.php?r=r243, save as html page once race cat per file
def read_msr(filename: str):
    return read_sixrace(filename)

# Untuk MMDT, go to https://sixrace.id/result/view.php?r=r247, save has html one race cat per file
def read_mmdt(filename: str):
    return read_sixrace(filename)

# Untuk SLU, open result page, liat XHR requests, copy jsonnya
def read_slu(filename: str) -> pd.DataFrame:
    with open(filename) as f:
        results = json.load(f)
    
    datas = []
    for r in results:
        data = {
            'name': r['value']['name'].strip().title(),
            'gender': r['value']['city'],
            'time': r['value']['time'],
        }
        datas.append(data)
            
    df = pd.DataFrame(datas)
    df['gender'] = df['gender'].replace('Pria', 'm').replace('Wanita', 'f')
    assert set(df['gender'].unique()) == {'f', 'm'}, 'Found ' + str(set(df['gender'].unique()))
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    df = df.reset_index(drop=True)
    return df

# Copy table from ITRA race result, paste to OO Calc
def read_itra(filename: str) -> pd.DataFrame:
    df = pd.read_excel(filename)
    df = df[['name', 'time', 'age', 'gender']]
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].apply(lambda s: s.strip().replace('H', 'm').replace('F', 'f'))
    assert set(df['gender'].unique()) == {'f', 'm'}, 'Found ' + str(set(df['gender'].unique()))
    df['time'] = df['time'].astype(str)
    df['time'] = df['time'].apply(lambda s: s.replace(' AM', '') if s else '')
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    df = df.reset_index(drop=True)
    return df

# Go to https://pickmyrace.id/events/dieng-caldera-race-2024/, lalu save page as HTML (page only)
def read_pickmyrace(filename: str, table_id: str) -> pd.DataFrame:
    with open(filename) as f:
        doc = f.read()
    soup = BeautifulSoup(doc, 'html.parser')
    tabs = soup.find_all('table', id=table_id)
    assert len(tabs), f"{table_id} not found"
    
    tab = tabs[0]
    tbody = tab.find_all('tbody')[0]
    trs = tbody.find_all('tr')
    
    names = []
    genders = []
    times = []
    statuses = []
    
    for tr in trs:
        tds = tr.find_all('td')
        assert len(tds)>=9
        
        names.append(tds[1].get_text().strip().title())
        genders.append(tds[2].get_text().strip())
        times.append(tds[8].get_text().strip())
        statuses.append(tds[9].get_text().strip())
    
    df = pd.DataFrame({'name': names, 'gender': genders, 'time': times, 'status': statuses })
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('FEMALE', 'f').replace('MALE', 'm')
    assert set(df['gender'].unique()).issubset({'f', 'm'}), 'Found ' + str(set(df['gender'].unique()))
    
    df = df[ ~df['status'].isin(['DNS', 'DQ']) ]
    assert set(df['status'].unique()).issubset({'FINISHER', 'DNF', 'COT'}), 'Found ' + str(set(df['status'].unique()))
    
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s).round('1s') if s else pd.NaT )
    df = df.sort_values('time')

    # Over COT
    df.loc[ df['status']=='COT', 'time'] = pd.NaT
    
    # DNF and DNS will get time > 99 hours
    df.loc[ df['time'] >= pd.Timedelta(hours=99), 'time'] = pd.NaT
    
    df = df.reset_index(drop=True)
    return df

# Go to https://pickmyrace.id/events/dieng-caldera-race-2024/, lalu save page as HTML (page only)
def read_dcr(filename: str, table_id: str) -> pd.DataFrame:
    return read_pickmyrace(filename, table_id)

# Go to https://bromomarathon.com/past-results, check XHR
def read_bromar(filename: str, category: str) -> pd.DataFrame:
    with open(filename) as f:
        results = json.load(f)
    
    datas = []
    for r in results:
        data = {
            'name': r['full_name'].strip().title(),
            'gender': r['gender'],
            'time': r['finish_time'],
            'category': r['category'],
            'status': r['status'],
            'country': r['nationality'],
        }
        datas.append(data)
            
    df = pd.DataFrame(datas)
    df['gender'] = df['gender'].replace('Female', 'f').replace('Male', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}, 'Found ' + str(set(df['gender'].unique()))
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s else pd.NaT )
    
    df = df[ df['category']==category ]
    df = df[ ~df['status'].isin(['DNS', 'DQF']) ]
    df = df.reset_index(drop=True)
    return df

# Untuk Rinjani100, lihat di result table, lalu copy isi tabel satu per satu per halaman ke OO Calc
def read_rinjani100(filename: str) -> pd.DataFrame:
    df = pd.read_excel(filename)
    df = df[ df['bib'].notnull() ]
    df = df[ ~df['time'].isin(['DNS', 'DQ'])]
    df = df[['name', 'gender', 'time']]
    
    df['name'] = df['name'].apply(lambda s: s.strip().title())
    df['gender'] = df['gender'].replace('Female', 'f').replace('Male', 'm')
    assert set(df['gender'].unique()) == {'f', 'm'}, 'Found ' + str(set(df['gender'].unique()))
    df['time'] = df['time'].apply(lambda s: pd.Timedelta(s) if s.lower() not in ['cot', 'dnf', 'dq'] else pd.NaT )
    df = df.reset_index(drop=True)
    return df

# Go to https://pickmyrace.id/preliminary-bali-ultra-trail-2024/, lalu save page as HTML (page only)
def read_but(filename: str, table_id: str) -> pd.DataFrame:
    return read_pickmyrace(filename, table_id)

# Jabar ultra: https://jabarultra.com/race-result-2024/, and save table to excel
def read_jbu(filename: str) -> pd.DataFrame:
    return read_scorenow(filename)

# UI Trail Race 2024: go to https://pickmyrace.id/preliminary-result-ui-trail-race-2024/ and save page
def read_ui_trail(filename: str, table_id: str) -> pd.DataFrame:
    return read_pickmyrace(filename, table_id)

# How to read https://result.race.id:
# - Go to race result page (e.g https://result.race.id/results.aspx?CId=20127&RId=6002&EId=5)
# - Copy/paste table page by page to OO Calc
def read_race_id(filename:str) -> pd.DataFrame:
    return read_scorenow(filename)

# How to read https://result.race.id:
# - Go to race result page (e.g https://result.race.id/results.aspx?CId=20127&RId=6002&EId=5)
# - Copy/paste table page by page to OO Calc
# - Save as Excel
def read_bdg100(filename:str) -> pd.DataFrame:
    return read_race_id(filename)

# Go to https://pickmyrace.id/preliminary-result-dieng-trail-run-2024/, lalu save page as HTML (page only)
def read_dtr(filename: str, table_id: str) -> pd.DataFrame:
    return read_pickmyrace(filename, table_id)


## Daftar Race

In [4]:
%%time

@dataclass
class Race:
    distance: float
    eg: float
    load_fn: Callable
    quals: List[str] = field(default_factory=list)  # ITRA and UTMB qualifications
    dist_name: str = None    # name if distance is same for the same race
    peaks: List[str] = None  # Info about peaks, views
    ml: int = None           # ITRA mountain level
    fl: int = None           # ITRA finisher level
    nl: bool = None          # ITRA national league
    df: pd.DataFrame = None  # DataFrame
    dnf: int = None          # Number of DNFs
    times: pd.Series = None  # Finisher time
    mean: float = None       # in hours
    median: float = None     # in hours
    event = None             # Parent event

@dataclass
class Event:
    code: str
    title: str
    date: str
    instagram_handle: str
    url: str
    itra_id: int = None
    races: List[Race] = field(default_factory=list)

# Event codes
BRO = "Bromo Mar"
BTG = "Bromo Tgger"
BDG = "BDG100"
BTR = "BTR"
BUT = "BUT"
CTC = "CTC"
DCR = "Dieng Caldera"
DTR = "Dieng Trail Run"
JUT = "JUT"
MAN = "Mantra"
MMD = "MMDT"
MSR = "MSR"
MST = "MesaStila100"
RIN = "Rinjani100"
SLU = "SLU"
SSC = "SSC"
UIT = "UI Trail"
VTM = "V. Telomoyo"

# ITRA and UTMB qualifications
ITRA0 = 'ITRA0'
ITRA1 = 'ITRA1'
ITRA2 = 'ITRA2'
ITRA3 = 'ITRA3'
ITRA4 = 'ITRA4'
ITRA5 = 'ITRA5'
ITRA6 = 'ITRA6'
UT20K =  'UTMB20K'
UT50K =  'UTMB50K'
UT100K = 'UTMB100K'
UT100M = 'UTMB100M'

events = [
    Event(BTR, 'Bali Trail Run', '2024-05-12', 'balitrailrunning', 'balitrailrunning.com', 89832, [
        Race(7, 300, lambda:read_btr('data/2024/btr7k.xlsx')),
        Race(15, 973, lambda:read_btr('data/2024/btr15k.xlsx'), [UT20K], peaks=['Batur']),
        Race(30, 1340, lambda:read_btr('data/2024/btr30k.xlsx'), [ITRA1, UT20K],
             peaks='Batur', ml=4, fl=230, nl=True),
        Race(55, 3778, lambda:read_btr('data/2024/btr55k.xlsx'), [ITRA3, UT50K],
             peaks=['Batur', 'Abang'], ml=9, fl=270, nl=True),
        Race(85, 5250, lambda:read_btr('data/2024/btr85k.xlsx'), [ITRA4, UT100K],
             peaks=['Batur', 'Abang'], ml=7, fl=320, nl=True),
    ]),
    Event(BUT, 'Bali Ultra Trail', '2024-08-03', 'baliultratrail.official', 'baliultratrail.com', 86937, [
        Race(12, 330, lambda:read_but('data/2024/but.html', 'table_1'), [ITRA0], ml=3, fl=170, nl=True),
        Race(25, 1650, lambda:read_but('data/2024/but.html', 'table_2'), [ITRA2, UT20K],
             peaks=['Batur'], ml=7, fl=190, nl=1, ),
        Race(50, 2730, lambda:read_but('data/2024/but.html', 'table_3'), [ITRA3, UT50K],
             peaks=['Batur2x'], ml=6, fl=270, nl=1, ),
        Race(80, 4400, lambda:read_but('data/2024/but.html', 'table_4'), [ITRA4, UT100K],
             peaks=['Batur2x'], fl=300, nl=1, ),
    ]),
    Event(BDG, 'BDG100', '2024-09-01', 'bdg100_official', 'bdg100.id', 91617, [
        Race(13, 724, lambda:read_race_id('data/2024/bdg100-13k.xlsx'), [ITRA0, UT20K],
             ml=5, fl=290, nl=1),
        Race(27, 1265, lambda:read_race_id('data/2024/bdg100-27k.xlsx'), [ITRA1, UT20K],
             ml=5, fl=10, nl=1),
        Race(64, 3482, lambda:read_race_id('data/2024/bdg100-64k.xlsx'), [ITRA3, UT50K],
             ml=6, fl=10, nl=1),
        Race(98, 5678, lambda:read_race_id('data/2024/bdg100-98k.xlsx'), [ITRA4, UT100K],
             ml=7, fl=10, nl=1),
        Race(161, 8995, lambda:read_race_id('data/2024/bdg100-161k.xlsx'), [ITRA6, UT100M],
             ml=8, fl=10, nl=1),
    ]),
    Event(BRO, 'Bromo Marathon', '2023-09-03', 'bromomarathon', 'bromomarathon.com', None, [
        Race(5, 241, lambda:read_bromar('data/2023/bromar.json', '5K')),
        Race(10, 426, lambda:read_bromar('data/2023/bromar.json', '10K')),
        Race(21, 977, lambda:read_bromar('data/2023/bromar.json', '21K'), peaks=['Bromo']),
        Race(42, 1930, lambda:read_bromar('data/2023/bromar.json', '42K'), peaks=['Bromo']),
    ]),
    Event(BTG, 'Bromo Tengger Trail Run', '2024-07-28', 'bromo.tenggertrailrun', '', 93006, [
        Race(11, 480, lambda:read_bromotengger('data/2024/bromo11k.csv')),
        Race(21, 1000, lambda:read_bromotengger('data/2024/bromo21k.csv'), [ITRA1, UT20K],
             peaks=['Bromo'], ml=5, fl=190, nl=True),
    ]),
    Event(CTC, 'Coast to Coast', '2024-02-25', 'ctc.ultra', 'ctcultra.com', 88662, [
        Race(5, 30, None, [], median=0.6),
        Race(15, 300, lambda:read_ctc('data/2024/ctc.html', 'table_1'), [ITRA0],
             ml=2, fl=160, nl=1),
        Race(30, 1040, lambda:read_ctc('data/2024/ctc.html', 'table_2'), [ITRA0, UT20K],
             ml=3, fl=250, nl=1),
        Race(50, 1620, lambda:read_ctc('data/2024/ctc.html', 'table_3'), [ITRA2, UT50K],
             ml=3, fl=290, nl=1),
        Race(80, 2550, lambda:read_ctc('data/2024/ctc.html', 'table_4'), [ITRA3, UT100K],
             ml=3, fl=370, nl=1),
    ]),
    Event(DCR, 'Dieng Caldera Race', '2024-06-09', 'diengcalderarace', 'diengcalderarace.com', 93604, [
        Race(10, 495, lambda:read_dcr('data/2024/dcr.html', 'table_1'), [ITRA0],
             ml=5, fl=130, nl=1),
        Race(21, 1185, lambda:read_dcr('data/2024/dcr.html', 'table_2'), [ITRA1, UT20K],
             ml=6, fl=190, nl=1),
        Race(42, 2630, lambda:read_dcr('data/2024/dcr.html', 'table_3'), [ITRA2, UT50K],
             ml=7, fl=260, nl=1),
        Race(75, 4850, lambda:read_dcr('data/2024/dcr.html', 'table_4'), [ITRA4, UT100K],
             ml=8, fl=290, nl=1),
    ]),
    Event(DTR, 'Dieng Trail Run', '2024-09-22', 'diengtrailrun', 'diengtrailrun.id', 95482, [
        Race(12, 920, lambda:read_dtr('data/2024/dtr.html', 'table_1'), [ITRA0],
             ml=6, fl=190, nl=1, peaks=['Prau']),
        Race(25, 1470, lambda:read_dtr('data/2024/dtr.html', 'table_2'), [ITRA1, UT20K],
             ml=5, fl=200, nl=1, peaks=['Prau']),
        Race(63, 3807, lambda:read_dtr('data/2024/dtr.html', 'table_3'), [ITRA3, UT50K],
             ml=7, fl=300, nl=1, peaks=['Prau']),
        Race(100, 6800, lambda:read_dtr('data/2024/dtr.html', 'table_3'), [ITRA5, UT100K],
             ml=9, fl=390, nl=1, peaks=['Prau', 'Sindoro']),
    ]),
    Event(JUT, 'Jabar Ultra Trail', '2024-06-09', 'jabarultra', 'jabarultra.com', 92806, [
        Race(22, 2500, lambda:read_jbu('data/2024/jabarultra-22k.xlsx'), [ITRA2, UT20K],
             peaks=['Ciremai'], ml=12, fl=180, nl=1),
        Race(55, 6010, lambda:read_jbu('data/2024/jabarultra-55k.xlsx'), [ITRA3, UT100K],
             peaks=['Ciremai 3x'], ml=12, fl=220, nl=1),
    ]),
    Event(MAN, 'Mantra 116', '2024-07-07', 'mantra116.id', 'mantra116.com', 90032, [
        Race(10, 620, lambda:read_mantra('data/2024/mantra116-10k.json'), [ITRA0],
             ml=6, fl=140, nl=1),
        Race(17, 1000, lambda:read_mantra('data/2024/mantra116-17k.json'), [ITRA1, UT20K],
             ml=6, fl=220, nl=1),
        Race(34, 3050, lambda:read_mantra('data/2024/mantra116-34k.json'), [ITRA2, UT50K],
             dist_name='Arjuno', peaks=['Arjuno'], ml=12, fl=200, nl=1),
        Race(38, 2750, lambda:read_mantra('data/2024/mantra116-38k.json'), [ITRA2, UT50K],
             dist_name='Welirang', peaks=['Welirang'], ml=9, fl=220, nl=1),
        Race(68, 5000, lambda:read_mantra('data/2024/mantra116-68k.json'), [ITRA4, UT100K],
             peaks=['Arjuno', 'Welirang'], ml=10, fl=280, nl=1),
        Race(116, 7400, lambda:read_mantra('data/2024/mantra116-116k.json'), [ITRA5, UT100M],
             peaks=['Arjuno', 'Welirang'], ml=9, fl=380, nl=1),
    ]),
    Event(MMD, 'Merapi Merbabu De Trail', '2024-08-04', 'merapi_merbabu.detrail',
          'www.merapimerbabudetrail.com', 90917, [
        Race(5, 700, lambda:read_mmdt('data/2024/mmdt-5k.html'), [ITRA0],
             ml=11, fl=150, nl=1),
        Race(10, 1320, lambda:read_mmdt('data/2024/mmdt-10k.html'), [ITRA0, UT20K],
             peaks=['Merbabu'], ml=12, fl=140, nl=1),
        Race(20, 2940, lambda:read_mmdt('data/2024/mmdt-20k.html'), [ITRA2, UT20K],
             peaks=['Merbabu 2x'], ml=12, fl=140, nl=1),
    ]),
    Event(MSR, 'Merbabu Sky Race', '2024-04-28', 'merbabu_skyrace', 'merbabuskyrace.com', 84613, [
        Race(5, 170, lambda:read_msr('data/2024/msr5k.html'), []),
        Race(10, 810, lambda:read_msr('data/2024/msr10k.html'), [ITRA0],
            ml=9, fl=210, nl=1),
        Race(20, 1830, lambda:read_msr('data/2024/msr20k.html'), [ITRA1, UT20K],
             peaks=['Merbabu'], ml=12, nl=1),
        Race(40, 4290, lambda:read_msr('data/2024/msr40k.html'), [ITRA3, UT50K],
             peaks=['Merbabu 2x'], ml=12, nl=1),
        Race(50, 5970, lambda:read_msr('data/2024/msr50k.html'), [ITRA3, UT100K],
             peaks=['Merbabu 3x'], ml=12, fl=220, nl=1),
    ]),
    Event(MST, 'Mesastila 100', '2023-10-08', 'mesastila100', 'mesastila100.com', 82926, [
        Race(21, 1230, lambda:read_itra('data/2023/mesastila100-21k.xlsx'), [ITRA0], 
             ml=6, fl=220, nl=1),
    ]),
    Event(RIN, 'Rinjani 100', '2024-05-26', 'rinjani100.official', 'fonesport.id/rinjani100', 91507, [
        Race(27, 1847, lambda:read_rinjani100('data/2024/rinjani100-27k.xlsx'), [ITRA1, UT20K],
             ml=7, fl=210, nl=1),
        Race(36, 3179, lambda:read_rinjani100('data/2024/rinjani100-36k.xlsx'), [ITRA2, UT50K],
             peaks=['Rinjani'], ml=11, nl=1),
        Race(60, 5493, lambda:read_rinjani100('data/2024/rinjani100-60k.xlsx'), [ITRA3, UT100K],
             peaks=['Rinjani'], ml=12, fl=280, nl=1),
        Race(100, 9194, lambda:read_rinjani100('data/2024/rinjani100-100k.xlsx'), [ITRA5, UT100M],
             peaks=['Rinjani'], ml=12, fl=350, nl=1),
        Race(162, 13646, lambda:read_rinjani100('data/2024/rinjani100-162k.xlsx'), [ITRA6, UT100M],
             peaks=['Rinjani'], ml=12, nl=1),
    ]),
    Event(SLU, 'Siksorogo Lawu Ultra', '2023-12-03', 'siksorogolawuultra', 'siksorogo.id', 88372, [
        Race(7, 400, lambda:read_slu('data/2023/slu7k.json')),
        Race(15, 1200, lambda:read_slu('data/2023/slu15k.json'), [ITRA0, ],
             ml=6, nl=1),
        Race(30, 1800, lambda:read_slu('data/2023/slu30k.json'), [ITRA1, UT20K],
             ml=7, fl=210, nl=1),
        Race(50, 3800, lambda:read_slu('data/2023/slu50k.json'), [ITRA3, UT50K],
             peaks=['Lawu'], ml=8, fl=280, nl=1),
        Race(80, 5400, lambda:read_slu('data/2023/slu80k.json'), [ITRA4, UT100K],
             peaks=['Lawu'], ml=9, fl=290, nl=1),
    ]),
    Event(SSC, 'Sindoro Sumbing Challenge', '2024-05-05', 'sindoro_sumbing_challenge',
          'www.sindorosumbingchallenge.com/', 89387, [
        Race(20, 1963, lambda:read_itra('data/2024/ssc-sumbing.xlsx'), [ITRA1],
             dist_name='Sumbing', peaks=['Sumbing'], ml=12, fl=190, nl=1),
        Race(20, 2076, lambda:read_itra('data/2024/ssc-sindoro.xlsx'), [ITRA1],
             dist_name='Sindoro', peaks=['Sindoro'], ml=12, fl=190, nl=1),
        Race(35, 4046, lambda:read_itra('data/2024/ssc-40k.xlsx'), [ITRA3],
             peaks=['Sindoro', 'Sumbing'], ml=12, fl=180, nl=1),
    ]),
    Event(UIT, 'UI Trail Race', '2024-08-11', 'uitrailrace', 'uitrailrun.com', 94755, [
        Race(5, 180, lambda:read_ui_trail('data/2024/uitrailrace.html', 'table_1'),
             ml=3, fl=130, nl=1),
        Race(10, 650, lambda:read_ui_trail('data/2024/uitrailrace.html', 'table_2'), [ITRA0],
             ml=6, fl=130, nl=1),
        Race(20, 1500, lambda:read_ui_trail('data/2024/uitrailrace.html', 'table_3'),[ITRA1],
             ml=7, fl=150, nl=1),
        Race(40, 2400, lambda:read_ui_trail('data/2024/uitrailrace.html', 'table_4'), [ITRA2],
             ml=7, fl=220, nl=1),
        Race(80, 4800, lambda:read_ui_trail('data/2024/uitrailrace.html', 'table_5'), [ITRA4],
             ml=8, fl=320, nl=1),
    ]),
    Event(VTM, 'Vertical Telomoyo', '2023-10-01', 'vertical_telomoyo', 'verticaltelomoyo.com', 85282, [
        Race(7, 810, lambda:read_msr('data/2023/telomoyo7k.html'), [ITRA0],
             peaks=['Telomoyo'], ml=11, fl=190, nl=1),
        Race(27, 1420, lambda:read_msr('data/2023/telomoyo27k.html'), [ITRA1, UT20K],
             peaks=['Telomoyo', 'Andong'], ml=5, fl=230, nl=1, ),
    ]),
]


def load_races():
    for i, event in enumerate(events):
        for j, race in enumerate(event.races):
            print(f'Reading race data {i+1}/{len(events)} {event.code} {j+1}/{len(event.races)}..         \r', end='')
            race.event = event
            if race.load_fn is not None:
                race.df = race.load_fn()
                race.dnf = sum(pd.isnull(race.df['time']))
                race.times = race.df.loc[ race.df['time'].notnull(), 'time'].dt.total_seconds() / 3600
                race.mean = race.times.mean()
                race.median = race.times.median()
        
    print('\nDone')

load_races()

Reading race data 18/18 V. Telomoyo 2/2..            
Done
CPU times: user 19.4 s, sys: 459 ms, total: 19.8 s
Wall time: 19.8 s


## Generate README

In [5]:
def create_race_title(race: Race) -> str:
    dist_name = f'({race.dist_name}) ' if race.dist_name else ''
    title = f'{race.event.code} {race.distance:.0f}K {race.eg:.0f}m {dist_name}{pd.Timestamp(race.event.date).year}'
    title = title + f' finishers: {len(race.times)}'
    if race.dnf:
        title += f', dnf: {race.dnf} ({race.dnf/(race.dnf+len(race.times)):.0%})'
    return title

def plot_distribution(race: Race, names: List[str] = [], ax=None):
    bins = 30
    df = race.df
    times = df.loc[ df['time'].notnull(), 'time'].dt.total_seconds() / 3600
    
    if ax:
        ax.hist(times, bins=bins, alpha=0.6)
        ax.grid()
        show = False
    else:
        ax = times.hist(bins=bins, alpha=0.6, figsize=(8,5))
        show = True
    ax.set_xlabel('Finished Time (Hour)')
    ax.set_ylabel('Number of runners')
    
    ax.set_title(create_race_title(race))
    ax.axvline(x=times.mean(), linestyle=':',
              color='k', alpha=1, zorder=100, label='mean')
    ax.axvline(x=times.median(), linestyle='--',
              color='k', alpha=1, zorder=100, label='median')
    
    if race.quals:
        quals = ', '.join(race.quals)
        right = ax.get_xlim()[1]
        top = ax.get_ylim()[1]
        ax.text(right, top*0.99, quals,
                horizontalalignment='right',
                verticalalignment='top', fontweight='book')
    
    icolor = 0
    for name in names:
        found = df[ df['name'].str.contains(name) ]
        #match = df['name'].apply(lambda s: (re.match(name, s) is not None))
        #found = df[match]
        if not len(found):
            continue

        a = found.iloc[0]
        t = a['time'].total_seconds() / 3600
        ax.axvline(x=t, color=f'C{1+icolor}', alpha=1, zorder=10,
                   label=''.join([c for c in name if c.isalpha() or c.isspace()]))
        icolor += 1
            
    
    ax.legend(loc='upper left')
    if show:
        plt.show()
        
def plot_age_distribution(ages: pd.Series):
    ages = ages[(ages>10) & (ages<100)]
    ax = ages.hist(bins=20, figsize=(8,5))
    ax.set_title('Age Distribution' + f' (finishers: {len(ages)})')


In [12]:
#%%time

from urllib.parse import urljoin
import matplotlib.image as pltimg
import os

readme = '''# Statistik Event Trail/Ultra Running Indonesia

Berikut distribusi finisher dari lomba-lomba yang terdata, diurutkan bds
median finish time, agar bisa dikira-kira tingkat kesulitan dari lomba itu.

Tapii... harap diwaspadai, distribusi hanya menghitung finish time dari finisher.
Harap diperhatikan juga DNF ratenya. Kalau median finish time lebih rendah tapi
DNF rate lebih tinggi, kemungkinan lombanya lebih berat (misalnya lomba2 MSR).

Beberapa event juga tidak memberikan data peserta yang over COT atau DNF (misalnya
CTC). Dari bentuk distribusinya, kalau puncaknya di kanan (left skewed, misalnya
CTC 30K, 50K) maka kemungkinan banyak peserta yg DNF/over COT.

Untuk tiap lomba juga ditampilkan kualifikasi [ITRA](https://itra.run) and
[UTMB](https://utmb.world/), [ITRA mountain level](https://itra.run/FAQ/Runner) dan
[ITRA finisher level](https://itra.run/FAQ/Runner), dan finish time saya dan bbrp
teman yg saya tahu dan selebriti (namanya engga disebut lengkap) biar mantau posisi
aja hehe.

Event-event yg terdata:

{list_events}

Enjoy dan fork/PR ya.
'''


list_nama = ['^..nny Prij', '^..gus Mahap', '^..odong Cah', '^..lfin Bah',
             '..griawan Su', '^.iza Satr',  '.ranindo', '^.ommy Bas', '^.akka Fau',
             '^.wan Bud', '.alih Apr', '.lham Sam',
             '^.aldy Mas', '^.eny Roh', '^.endi Dw', 'mad Suhu.$', 'ad Hariy', '^.udhi *Nugr',
             '^.ani Chi']

def create_img_link(img: str, url: str, method=0) -> str:
    if method==0:
        return f'[![img]({img})]({url})'
    elif method==1:
        return f'[<img src="{img}">]({url}) '
    elif method==2:
        return f'<a href="{url}">![Foo]({img})</a>'
    elif method==3:
        return f'<a href="{url}"><img src="{img}"></a>'
    elif method==4:
        return f'[![image]({img} "icon")]({url})'
    else:
        assert False
    
def get_event_links(e: Event, which: str = 'web instagram itra') -> str:
    links = []
    if e.url and 'web' in which:
        url = ('https://' + e.url) if 'http' not in e.url else url
        #links += create_img_link('images/website_icon.png', url)
        links.append(f'[homepage]({url})')
    if e.instagram_handle and 'insta' in which:
        url = f'https://www.instagram.com/{e.instagram_handle}/'
        #links += create_img_link('images/instagram_icon.jpg', url)
        links.append(f' [instagram]({url})')
    if e.itra_id and 'itra' in which:
        url = f'https://itra.run/Races/RaceDetails/{e.itra_id}/'
        #links += create_img_link('images/itra_icon.png', url)
        links.append(f'[ITRA]({url})')
    return ' | '.join(links)

def create_race_anchor(race: Race) -> str:
    return f'{race.event.code}{race.distance:.0f}'
    
def create_race_table(races: List[Race]) -> str:
    md = ''
    md += '| Event | Jarak (km) | Eg (m) | Finish ers | Med / Max (jam) | DNF (Rate) | Kualif. | Mtn Lvl | Fns Lvl |\n'
    md += '|-------|------------|--------|-----------|-----------------|------------|---------|---------|---------|\n'
    for race in races:
        title = f'[{race.event.code}](https://www.instagram.com/{race.event.instagram_handle}/)'
        anchor = f'<A name="{create_race_anchor(race)}"></A>'
        dnf = str(race.dnf) if race.dnf else '   '
        dnf_rate = f'({race.dnf/(race.dnf+len(race.times)):.0%})' if race.dnf else ''
        n_finishers = len(race.times) if race.times is not None else ''
        med = f'{race.median:.1f}' if race.median else ''
        max = f'{race.times.max():.1f}' if race.times is not None else '-'
        quals = ''
        for qual in race.quals:
            quals += f' <img src="images/{qual}.png" height="16">'
        md += f'| {anchor} {title} | {race.distance:.0f} | {race.eg:.0f} | {n_finishers} | {med} / {max} | {dnf} {dnf_rate} | {quals} | {race.ml or ""} | {race.fl or ""} |\n'
    return md
    

def generate_readme():
    global events
    md = '' + readme
    
    list_events = ''
    events = sorted(events, key=lambda e: e.title.lower())
    races = []
    for ie, e in enumerate(events):
        list_events += f'{ie+1}. **{e.title}** ({e.code}): tanggal {e.date}\n'
        rcs = sorted(e.races, key=lambda r: r.distance)
        #dists = [f'<A href="{create_race_anchor(r)}"> {str(r.distance)}</A>' for r in rcs]
        dists = [f'[{str(r.distance)}](#{create_race_anchor(r)})' for r in rcs]
        list_events += f'    - {len(e.races)} lomba: {", ".join(dists)} km\n'
        list_events += f'    - diikuti sekitar {sum([(len(r.df) if r.df is not None else 0) for r in rcs])} peserta\n'
        list_events += f'    - links: {get_event_links(e)}\n'
        races.extend( e.races )
    
    md = md.replace('{list_events}', list_events)
    
    hours = [2, 4, 6, 8, 10, 12, 16, 24, 100]
    ncols = 2

    prev_h = 0
    for h in hours:
        print(f'Generating {prev_h}-{h}h..   \r', end='')
        # For the table, list all races even without finisher data
        lst = [ r for r in races if r.median>=prev_h and r.median<h ]
        if not lst:
            continue
        lst = sorted(lst, key=lambda r: r.median)

        fname = f'images/{prev_h}-{h}h.png'
        md += f'## {prev_h} - {h} jam\n\n'
        md += create_race_table(lst)
        md += f'\n![stat]({fname} "Statistik")\n\n'
        
        # List again only for races with finisher data
        lst = [ r for r in races if r.median>=prev_h and r.median<h and r.df is not None]
        lst = sorted(lst, key=lambda r: r.median)
        nrows = (len(lst)+ncols-1)//ncols
        
        fig, axs = plt.subplots(nrows, ncols, figsize=(ncols*6.5, nrows*4), facecolor = 'aliceblue')
        r,c=0,0
        for i in range(len(lst)):
            race = lst[i]
            if race.df is None:
                continue
            plot_distribution(race, list_nama, ax=axs[r][c])
            c+=1
            if c==ncols:
                r+=1
                c=0

        if c==1:
            fig.delaxes(axs[r][c])
            
        fig.tight_layout()
        
        plt.savefig(fname)
        plt.close(fig)

        
        prev_h = h

    md += '\n\n(Catatan: file ini dihasilkan oleh kode di .ipynb)\n'
    with open('README.md', 'wt') as f:
        f.write(md)
        
    print('\nDone.')
        
generate_readme()

Generating 24-100h..   
Done.
