# WCA - Resumen del 2024


In [1]:
# Imports

from pathlib import Path

import pandas as pd
from sqlalchemy import create_engine

In [2]:
## Database

user = "user"
password = "pass"
host = "127.0.0.1"
port = 3306
database = "wca_db"

conn_str = f"mariadb+mariadbconnector://{user}:{password}@{host}:{port}/{database}"
engine = create_engine(conn_str)

## Reportes


In [3]:
# Global Variables

COUNTRY = "Colombia"
YEAR = 2024

In [4]:
def process_report(name: str, query: str) -> pd.DataFrame:
    df = pd.read_sql_query(query, engine)

    dst_file = Path(f"output/{COUNTRY}/{name}.csv")
    dst_file.parent.mkdir(parents=True, exist_ok=True)

    df.to_csv(dst_file, sep=";", index=False)
    print("Results saved in", dst_file)

    return df

### 1. Top de competidores con más podios


In [5]:
df = process_report(
    "1 - Top de competidores con más podios",
    f"""
        select r.personName nombre, count(*) conteo
        from Results r
        join Competitions c on r.competitionId = c.id
        where 1=1
        and c.year = {YEAR}
        -- and c.countryId != '{COUNTRY}'
        and r.personCountryId = '{COUNTRY}'
        and r.roundTypeId in ('f', 'b', 'c')
        and r.pos in ('1', '2', '3')
        group by 1
        order by 2 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/1 - Top de competidores con más podios.csv


Unnamed: 0,nombre,conteo
0,Manuel Popayán,135
1,Diego Alejandro Casas Jimenez,107
2,Alejandro Restrepo Echeverri,102
3,Hector David Martinez Argaez,100
4,Juan Miguel Saboya Soto,80
5,Dennis Rosero,79
6,Eduard Esteban García Domínguez,54
7,Gabriel Santiago Velez Gonzalez,50
8,Ramses Amaya,49
9,Haiver Lenin Reyes Garcia,49


### 2. Top de competidores con más podios en una competencia


In [6]:
df = process_report(
    "2 - Top de competidores con más podios en una competencia",
    f"""
        with p1 as (
            select
                r.personName nombre,
                c.name competencia,
                c.month mes,
                c.day dia,
                count(*) conteo
            from Results r
            join Competitions c on r.competitionId = c.id
            where 1=1
            and c.year = {YEAR}
            -- and c.countryId != '{COUNTRY}'
            and r.personCountryId = '{COUNTRY}'
            and r.roundTypeId in ('f', 'b', 'c')
            and r.pos in ('1', '2', '3')
            group by 1, 2
        ), p2 as (
            select *, row_number() over (partition by nombre order by conteo desc, mes, dia) rn
            from p1
        )
        select p2.nombre, p2.competencia, p2.conteo
        from p2 where rn = 1
        order by 3 desc, p2.mes, p2.dia
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/2 - Top de competidores con más podios en una competencia.csv


Unnamed: 0,nombre,competencia,conteo
0,Diego Alejandro Casas Jimenez,Málaga Al Cubo 2024,10
1,Oscar Nieto,Nacionales El Salvador 2024,10
2,Julian Alejandro Forero Gonzalez,Bogotá No Duerme 2024,10
3,Hector David Martinez Argaez,The New School Medellín 2024,9
4,Samuel Martinez Sanchez,Prenacionales Colombia 2024,8
5,Michael Andres Castillo Lemus,NxN Cali 2024,8
6,Jerónimo Espinosa,Málaga Al Cubo 2024,7
7,Alejandro Restrepo Echeverri,Copa Beillín 2024,7
8,Juan Esteban Posada Ochoa,The New School Medellín 2024,7
9,Gabriel Santiago Velez Gonzalez,Encanto Soacha II 2024,7


### 3. Top de competidores con más oros


In [7]:
df = process_report(
    "3 - Top de competidores con más oros",
    f"""
        select r.personName nombre, count(*) conteo
        from Results r
        join Competitions c on r.competitionId = c.id
        where 1=1
        and c.year = {YEAR}
        -- and c.countryId != '{COUNTRY}'
        and r.personCountryId = '{COUNTRY}'
        and r.roundTypeId in ('f', 'b', 'c')
        and r.pos = '1'
        group by 1
        order by 2 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/3 - Top de competidores con más oros.csv


Unnamed: 0,nombre,conteo
0,Juan Miguel Saboya Soto,53
1,Manuel Popayán,49
2,Hector David Martinez Argaez,47
3,Alejandro Restrepo Echeverri,40
4,Gabriel Santiago Velez Gonzalez,34
5,Diego Alejandro Casas Jimenez,30
6,Ramses Amaya,24
7,Michael Andres Castillo Lemus,24
8,David Rendón Martínez,20
9,Jerónimo Espinosa,18


### 4. Top de competidores con más oros en una competencia


In [8]:
df = process_report(
    "4 - Top de competidores con más oros en una competencia",
    f"""
        with o1 as (
            select
                r.personName nombre,
                c.name competencia,
                c.month mes,
                c.day dia,
                count(*) conteo
            from Results r
            join Competitions c on r.competitionId = c.id
            where 1=1
            and c.year = {YEAR}
            -- and c.countryId != '{COUNTRY}'
            and r.personCountryId = '{COUNTRY}'
            and r.roundTypeId in ('f', 'b', 'c')
            and r.pos = '1'
            group by 1, 2
        ), o2 as (
            select *, row_number() over (partition by nombre order by conteo desc, mes, dia) rn
            from o1
        )
        select o2.nombre, o2.competencia, o2.conteo
        from o2 where rn = 1
        order by 3 desc, o2.mes, o2.dia
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/4 - Top de competidores con más oros en una competencia.csv


Unnamed: 0,nombre,competencia,conteo
0,Diego Alejandro Casas Jimenez,Málaga Al Cubo 2024,7
1,Gabriel Santiago Velez Gonzalez,Encanto Soacha II 2024,7
2,Julian Alejandro Forero Gonzalez,Bogotá No Duerme 2024,7
3,Hector David Martinez Argaez,The New School Medellín 2024,6
4,Alejandro Restrepo Echeverri,Wembley Quimbaya II 2024,6
5,Juan Miguel Saboya Soto,Ritmo Boyacá Innovo Duitama 2024,5
6,Rey Danilo Florez,Speedcubing La Guajira 2024,5
7,Brayan Alexander Sandoval Camacho,Regionales Santander 2024,5
8,Manuel Popayán,Meridiano Mosquera 2024,5
9,David Rendón Martínez,San Juan V 2024,4


### 5. Top PPT ponderado


In [9]:
df = process_report(
    "5 - Top PPT ponderado",
    f"""
        with pods as (
            select
                r.personName nombre,
                count(case when r.pos = '1' then 1 else null end) as oros,
                count(case when r.pos = '2' then 1 else null end) as platas,
                count(case when r.pos = '3' then 1 else null end) as bronces,
                sum(4-r.pos) puntos
            from Results r
            join Competitions c on r.competitionId = c.id
            where 1=1
            and c.year = {YEAR}
            -- and c.countryId != '{COUNTRY}'
            and r.personCountryId = '{COUNTRY}'
            and r.roundTypeId in ('f', 'b', 'c')
            and r.pos in ('1', '2', '3')
            and r.best != '-1'
            group by 1
        ), comps as (
            select r.personName, count(distinct r.competitionId) cnt
            from Results r
            join Competitions c on r.competitionId = c.id
            where 1=1
            and c.year = {YEAR}
            -- and c.countryId != '{COUNTRY}'
            and r.personCountryId = '{COUNTRY}'
            group by 1
        )
        select p.*, c.cnt comps, p.puntos/c.cnt ppt
        from pods p
        join comps c on p.nombre = c.personName
        where c.cnt >= 5
        order by 7 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/5 - Top PPT ponderado.csv


Unnamed: 0,nombre,oros,platas,bronces,puntos,comps,ppt
0,Gabriel Santiago Velez Gonzalez,34,10,6,128.0,13,9.8462
1,Juan Miguel Saboya Soto,53,18,9,204.0,23,8.8696
2,Julian Alejandro Forero Gonzalez,15,6,5,62.0,7,8.8571
3,Samuel Martinez Sanchez,12,19,16,90.0,12,7.5
4,David Rendón Martínez,20,14,6,94.0,13,7.2308
5,Alejandro Restrepo Echeverri,40,37,21,215.0,30,7.1667
6,Hector David Martinez Argaez,47,28,25,222.0,32,6.9375
7,Michael Andres Castillo Lemus,24,18,4,112.0,17,6.5882
8,Juan José Ramírez Beyaert,9,14,10,65.0,10,6.5
9,Jhonier Andres Díaz Brito,16,6,8,68.0,11,6.1818


### 6. Top de competidores con más competencias


In [10]:
df = process_report(
    "6 - Top de competidores con más competencias",
    f"""
        select r.personName nombre, count(distinct r.competitionId) conteo
        from Results r
        join Competitions c on r.competitionId = c.id
        where 1=1
        and c.year = {YEAR}
        -- and c.countryId != '{COUNTRY}'
        and r.personCountryId = '{COUNTRY}'
        group by 1
        order by 2 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/6 - Top de competidores con más competencias.csv


Unnamed: 0,nombre,conteo
0,Eduard Esteban García Domínguez,84
1,Dennis Rosero,84
2,Diego Alejandro Casas Jimenez,63
3,Francia Perez,54
4,Manuel Popayán,49
5,Shanty Rodríguez,46
6,Catalina Herrera López,46
7,Juan Camilo González Barragán,42
8,Haiver Lenin Reyes Garcia,39
9,Rafael Ricardo Gómez Algarra,39


### 7. Top de competidores con más ciudades visitadas


In [11]:
df = process_report(
    "7 - Top de competidores con más ciudades visitadas",
    f"""
        with cities as (
            select distinct
                r.personName,
                c.cityName,
                case
                    when c.cityName in ('Bogotá', 'Bogotá D.C.', 'Bogotá, Cundinamarca') then 'Bogotá'
                    when c.cityName in ('Quimbaya') then 'Quimbaya, Quindío'
                    when c.cityName in ('Bucaramanga') then 'Bucaramanga, Santander'
                    when c.cityName in ('Yumbo, Valle del Cauca.') then 'Yumbo, Valle del Cauca'
                    when c.cityName in ('Sabaneta, Antioquia.') then 'Sabaneta, Antioquia'
                    when c.cityName in ('Mosquera') then 'Mosquera, Cundinamarca'
                    else c.cityName
                end as normalized_city
            from Results r
            join Competitions c on r.competitionId = c.id
            where 1=1
            and c.cityName not in ('Multiple cities', 'Multiple locations')
            and c.year = {YEAR}
            -- and c.countryId != '{COUNTRY}'
            and r.personCountryId = '{COUNTRY}'
            order by normalized_city
        )
        select
            personName nombre,
            count(distinct normalized_city) conteo
        from cities
        group by 1
        order by 2 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/7 - Top de competidores con más ciudades visitadas.csv


Unnamed: 0,nombre,conteo
0,Diego Alejandro Casas Jimenez,26
1,Haiver Lenin Reyes Garcia,20
2,Eduard Esteban García Domínguez,18
3,Dennis Rosero,18
4,Manuel Popayán,13
5,Alejandro Restrepo Echeverri,11
6,Francia Perez,11
7,Catalina Herrera López,9
8,José David Castiblanco Marin,9
9,Jhon Edison Arcila Quintero,9


### 8. Número de novatos en el año


In [12]:
df = process_report(
    "8 - Numero de novatos en el año",
    f"""
        select
            count(*) conteo
        from Persons p
        where 1=1
        and p.countryId = '{COUNTRY}'
        and p.id like '{YEAR}%'
    """,
)
df.head(10)

Results saved in output/Colombia/8 - Numero de novatos en el año.csv


Unnamed: 0,conteo
0,1037


### 9. Top de novatos más rápidos en 333


In [13]:
df = process_report(
    "9 - Top de novatos más rápidos en 333",
    f"""
        with n as (
            select
                r.personId,
                r.personName,
                c.name,
                c.cityName,
                c.countryId,
                r.average,
                rank() over (partition by r.personId order by c.year, c.month, c.day, r.average) rnk
            from Results r
            join Competitions c on c.id = r.competitionId
            where 1=1
            and r.personId like '{YEAR}%'
            and r.personCountryId = '{COUNTRY}'
            and r.eventId = '333'
            and r.average != -1
        )
        select
            n.personId wca_id,
            n.personName nombre,
            n.name torneo,
            n.cityName ciudad,
            n.countryId pais,
            n.average avg
        from n
        where n.rnk = 1
        order by n.average
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/9 - Top de novatos más rápidos en 333.csv


Unnamed: 0,wca_id,nombre,torneo,ciudad,pais,avg
0,2024GUAR01,Diego Andrés Torres Guarín,Longjum'Open 2024,Longjumeau,France,1030
1,2024CHAM17,Ivan Dario Mipaz Chamorro,Popayán al Cubo 2024,"Popayán, Cauca",Colombia,1146
2,2024REYE24,Luis Alejandro Silva Reyes,Yopal Open 2024,"Yopal, Casanare",Colombia,1227
3,2024SOLE02,Jhonatan Alejandro Alvarez Soler,Yopal Open 2024,"Yopal, Casanare",Colombia,1230
4,2024SERN03,Jesús Serna,Trebolis El Porvenir Bogotá 2024,Bogotá,Colombia,1308
5,2024ESCO02,David Fernando González Escobar,Popayán 2024,"Popayán, Cauca",Colombia,1313
6,2024ORTI07,Juan Daniel Tangarife Ortiz,Unicentro Pereira 2024,Pereira,Colombia,1313
7,2024GARC31,Laura Esthefany Ramírez García,Centro Suba Bogotá 2024,Bogotá D.C.,Colombia,1321
8,2024MUNO09,Pablo Esteban Sotelo Muñoz,Popayán 2024,"Popayán, Cauca",Colombia,1321
9,2024AREV03,Thomas Arevalo,Miami Spring 2024,"Miami, Florida",USA,1343


### 10. Top torneos con más novatos


In [14]:
df = process_report(
    "10 - Top torneos con más novatos",
    f"""
        with n as (
            select
                c.name,
                c.cityName,
                r.personId,
                rank() over (partition by r.personId order by c.year, c.month, c.day) rnk
            from Results r
            join Competitions c on c.id = r.competitionId
            where 1=1
            and r.personId like '{YEAR}%'
            and c.countryId = '{COUNTRY}'
        ), nc as (
            select distinct *
            from n
            where rnk = 1
        )
        select
            name competencia,
            cityName ciudad,
            count(*) conteo
        from nc
        group by 1
        order by 3 desc
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/10 - Top torneos con más novatos.csv


Unnamed: 0,competencia,ciudad,conteo
0,Popayán 2024,"Popayán, Cauca",50
1,Ritmo Boyacá Innovo Duitama 2024,"Duitama, Boyacá.",48
2,Medellin Explora 2024,"Medellín, Antioquia",31
3,NxN Cali 2024,"Cali, Valle del Cauca",31
4,Centro Mayor 2024,Bogotá D.C.,29
5,Avenida Chile VII 2024,Bogotá,27
6,Cali Vuelve 2024,"Cali, Valle del Cauca",23
7,Unicentro Pereira 2024,Pereira,22
8,Cubomanía Monteria 2024,"Monteria, Cordoba",21
9,Popayán al Cubo 2024,"Popayán, Cauca",19


### 11. Records del año


In [15]:
df = process_report(
    "11 - Records del año",
    f"""
        with r as (
            select
                r.*,
                case
                    when regionalSingleRecord is not null and regionalAverageRecord is not null then 2
                    when regionalSingleRecord is not null then 1
                    when regionalAverageRecord is not null then 1
                    else 0
                end records
            from Results r
            join Competitions c on c.id = r.competitionId
            where 1=1
            and (r.regionalSingleRecord is not null or r.regionalAverageRecord is not null)
            and r.personCountryId = '{COUNTRY}'
            and c.year = {YEAR}
        )
        select personName nombre, sum(records) conteo
        from r
        group by 1
        order by 2 desc
    """,
)
df.head(10)

Results saved in output/Colombia/11 - Records del año.csv


Unnamed: 0,nombre,conteo
0,Juan Miguel Saboya Soto,16.0
1,Gabriel Santiago Velez Gonzalez,6.0
2,Shemuel Cuellar Ramirez,4.0
3,Hector David Martinez Argaez,4.0
4,Jefferson Durango,2.0
5,Julian Alejandro Forero Gonzalez,2.0
6,Mauricio Arias,2.0
7,Shanty Rodríguez,2.0
8,Alejandro Restrepo Echeverri,1.0
9,Juan Manuel Rueda Jaramillo,1.0


### 12. Top mujeres 333


In [16]:
df = process_report(
    "12 - Top mujeres 333",
    f"""
        with f as (
            select *
            from Persons p
            where 1=1
            and p.gender = 'f'
            and p.countryId = '{COUNTRY}'
        ), r as (
            select r.personId, min(r.average) avg
            from Competitions c
            join Results r on r.competitionId = c.id
            where 1=1
            and r.average != -1
            and c.year = {YEAR}
            and r.eventId = '333'
            and r.personCountryId = '{COUNTRY}'
            group by 1
        )
        select f.id wca_id, f.name nombre, r.avg
        from r
        join f on f.id = r.personId
        order by 3
        limit 50
    """,
)
df.head(10)

Results saved in output/Colombia/12 - Top mujeres 333.csv


Unnamed: 0,wca_id,nombre,avg
0,2021RODR03,Isabela Quesada Rodríguez,970
1,2019BUIT01,Victoria Alejandra Zambrano Buitrago,1060
2,2017SILV16,Juliana Gonzalez Silva,1171
3,2024GARC31,Laura Esthefany Ramírez García,1257
4,2022NARV03,Paula Sofía Chamat Narváez,1438
5,2012GAIT01,Luna Gaitán,1455
6,2022TIBA01,Sara Lucia Florez Tibacan,1698
7,2022ORTI06,Mariana Henao Ortiz,1712
8,2024ARTU01,Isabella Artunduaga,1763
9,2017MUNO06,Valentina Sánchez Muñoz,1778


In [1]:
!jupyter nbconvert --to html wca_eoy_2024.ipynb;

[NbConvertApp] Converting notebook wca_eoy_2024.ipynb to html
[NbConvertApp] Writing 337557 bytes to wca_eoy_2024.html
