In [11]:
import numpy as np
import pandas as pd
import re
import requests
import time
from collections import defaultdict
import datetime
from bs4 import BeautifulSoup

In [12]:
RIDERS = pd.read_csv("../data/riders.csv")
RIDER_TEAMS = pd.read_csv("../data/rider_teams.csv")
MANAGERS = pd.read_csv("../data/managers.csv")
MANAGER_TEAMS = pd.read_csv("../data/manager_teams.csv")
MANAGER_CHEAPO_TEAMS = pd.read_csv("../data/manager_cheapo_teams.csv")
POINTS_SYSTEM = pd.read_csv("../data/points_system.csv")
RACES = pd.read_csv("../data/races.csv")
RESULTS_2023 = pd.read_csv("../data/results/results_2023_full.csv")
RESULTS_2024 = pd.read_csv("../data/results/results_2024_full.csv")
RESULTS_2025 = pd.read_csv("../data/results/results_2025_full.csv")
CHEAPO_BANS = pd.read_csv("../data/cheapo_bans.csv")

In [13]:
TEAMS_OVERVIEW = pd.merge(MANAGER_TEAMS, RIDERS, how = "left", left_on = "RiderName", right_on = "RiderName_Zweeler")
TEAMS_OVERVIEW = TEAMS_OVERVIEW[["ManagerName", "RiderName_PCS", "RiderPrice"]]

def pad_price(price, name):
    padded_price = ("000000" + str(price))[::-1][0:5][::-1]
    name_added_price = padded_price + name
    return name_added_price

TEAMS_OVERVIEW["RankCol"] = TEAMS_OVERVIEW.apply(lambda row: pad_price(row["RiderPrice"], row["RiderName_PCS"]), axis = 1)
TEAMS_OVERVIEW["TeamRank"] = (TEAMS_OVERVIEW.groupby("ManagerName")["RankCol"].rank(method = "dense", ascending = False).astype(int))

nums = list(TEAMS_OVERVIEW["TeamRank"].unique())
nums.sort()
OVERVIEW = pd.DataFrame(nums, columns = ["Rank"])

def get_rider(manager, rank):
    R = TEAMS_OVERVIEW.loc[(TEAMS_OVERVIEW["ManagerName"] == manager) & (TEAMS_OVERVIEW["TeamRank"] == rank)]
    return list(R["RiderName_PCS"].unique())[0]

ms = list(TEAMS_OVERVIEW["ManagerName"].unique())

for m in ms:
    OVERVIEW[m] = OVERVIEW.apply(lambda row: get_rider(m, row["Rank"]), axis = 1)

In [14]:
def get_startlist(URL, RACENAME):
        
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")

    title = soup.title.text
    if title.startswith("Page not found"):
        return []

    startlist_table = soup.find_all("table", class_ = "basic")[0]
    rows = startlist_table.find("tbody").find_all("tr")

    startlist = []

    for i, row in enumerate(rows):
        cols = row.find_all("td")
        startlist.append(cols[2].text.strip())

    startlist = [s.title() for s in startlist]        

    txt = OVERVIEW.to_html()

    for rider in startlist:
        to_replace = "<td>" + rider + "</td>"
        replacement = '<td class = "startlist">' + rider + "</td>"
        txt = txt.replace(to_replace, replacement)

    txt = """<html>
    <head>
    <style>
      body {
        font-family: 'Trebuchet MS', 'Lucida Sans Unicode', 'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
      }

      table {
        font-size: 11px;
        border-collapse: collapse;
        width: 90%;
      }
      
      td, th {
        border: 1px solid #dddddd;
        text-align: left;
        padding: 8px;
      }
    .startlist {
    background-color: green;
    }
    </style>
    </head>
    <body>
    """ + txt + """

    </body>
    </html>"""

    print("DONE:", RACENAME)

    with open("../Løbsstartlister/" + RACENAME + ".md", "w") as file:
        file.write(txt)

In [None]:
races = pd.read_csv("../data/races.csv")

NOW = datetime.datetime.now()
FROM = NOW + datetime.timedelta(days = -2)
TO = NOW + datetime.timedelta(days = 200)

RACES_TO_GET = races
RACES_TO_GET["RaceStart"] = pd.to_datetime(RACES_TO_GET["RaceStart"], format = "%Y-%m-%d %H:%M:00")
RACES_TO_GET["RaceEnd"] = pd.to_datetime(RACES_TO_GET["RaceEnd"], format = "%Y-%m-%d %H:%M:00")
RACES_TO_GET = races[(races["RaceEnd"] > FROM) & (races["RaceEnd"] < TO)]

for i, r in RACES_TO_GET.iterrows():
    racename = r['RaceName']
    racestart = str(r['RaceStart'])
    racestart = datetime.datetime.strptime(racestart, "%Y-%m-%d %H:%M:00")
    racestart = datetime.datetime.strftime(racestart, "%Y-%m-%d")
    racename = racestart + " " + racename
    url = r['RaceLink_PCS']
    url = url + "/startlist/alphabetical"
    get_startlist(url, racename)
    time.sleep(15)

DONE: 2025-07-05 Tour de France


KeyboardInterrupt: 