In [2]:
import re
import time
import random
import os
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil.parser import parse
import pandas as pd
import requests

base_url = "https://www.fis-ski.com/DB/general/results.html?sectorcode=CC&raceid="
start_raceid = 41638
end_raceid = 41655


table_competition = []

for raceid in range(start_raceid, end_raceid + 1):
    page = base_url + str(raceid)
    answer_page = requests.get(page)
    checked_html = answer_page.text
    checked_soup = BeautifulSoup(checked_html, 'html.parser')

    date_competition_element = checked_soup.find("span", class_="date__full")
    if date_competition_element is not None:
        date_competition = date_competition_element.string
        date_competition = datetime.strptime(date_competition, "%B %d, %Y")

        name_competition = checked_soup.find("div", class_="event-header__subtitle").string
        place_competition = checked_soup.find("h1", class_="heading heading_l2 heading_white heading_off-sm-style").string
        discipline = checked_soup.find("div", class_="event-header__kind").string.strip()
        discipline = re.sub(r"\s+|/", " ", discipline)
        time_format = "%Y-%m-%d %H:%M:%S"

        results_competition = checked_soup.find("div", id="events-info-results")
        results_skiers = results_competition.find_all("div", class_="g-row justify-sb")

        for skier in range(len(results_skiers)):
            place = int(results_skiers[skier].find("div", class_="g-lg-1 g-md-1 g-sm-1 g-xs-2 justify-right pr-1 bold").string)

            number_element = results_skiers[skier].find("div", class_="g-lg-1 g-md-1 g-sm-1 justify-right hidden-xs pr-1 gray")
            if number_element is None:
                number_element = results_skiers[skier].find("div", class_ ="g-lg-1 g-md-1 g-sm-1 g-xs-2 justify-right pr-1 bold")
            if number_element is not None:
                number = number_element.text.strip()
            else:
                number = ""                                            
                                                        
            fiscode = int(results_skiers[skier].find("div", class_="pr-1 g-lg-2 g-md-2 g-sm-2 hidden-xs justify-right gray").string)
            
            name_element = results_skiers[skier].find("div", class_="g-lg-14 g-md-14 g-sm-13 g-xs-11 justify-left bold")
            if name_element is None:
                name_element = results_skiers[skier].find("div", class_="g-lg-12 g-md-12 g-sm-11 g-xs-8 justify-left bold")
            if name_element is None:
                name_element = results_skiers[skier].find("div", class_="g-lg-12 g-md-12 g-sm-11 g-xs-8 justify-left bold-sm")
            if name_element is None:
                name_element = results_skiers[skier].find("div", class_="g-lg-8 g-md-8 g-sm-7 g-xs-8 justify-left bold")
            if name_element is None:
                name_element = results_skiers[skier].find("div", class_="g-lg-15 g-md-15 g-sm-14 g-xs-11 justify-left bold")
            if name_element is None:
                name_element = results_skiers[skier].find("div", class_="g-lg-18 g-md-18 g-sm-16 g-xs-16 justify-left bold")
            if name_element is not None:
                name = name_element.text.strip()
            else:
                name = ""

            
            nation = results_skiers[skier].find("span", class_="country__name-short").string.strip()
            gandicap_time_span = results_skiers[skier].find("span", class_="hidden-md-up visible-sm")
            gandicap_time_div = results_skiers[skier].find("div", class_="g-lg-2 g-md-2 g-sm-3 g-xs-5 justify-right bold")
            if gandicap_time_span:
                gandicap_time = float(0)
            elif gandicap_time_div:
                gandicap_time = gandicap_time_div.string.strip()
            else:
                gandicap_time = ""
            
            
            finish_time_element = results_skiers[skier].find("div", class_="g-lg-2 g-md-2 justify-right blue bold hidden-sm hidden-xs")
            if finish_time_element is not None:
                finish_time_string = finish_time_element.string.strip()
                try:
                    finish_time = parse(finish_time_string)
                except ValueError:
                    finish_time = datetime.strptime(finish_time_string, '%M:%S.%f')

                finish_time = finish_time.strftime('%H:%M:%S.%f')
                finish_time = ":".join(finish_time.split(":")[:3])  # Оставляем только часы, минуты и секунды
                finish_time = finish_time[:11]  # Обрезаем до 11 символов (включая десятые доли секунды)
            else:
                finish_time = ""


            skiers_full = []
            skiers_full.append(date_competition)
            skiers_full.append(place_competition)
            skiers_full.append(discipline)
            skiers_full.append(place)
            skiers_full.append(number)
            skiers_full.append(fiscode)
            skiers_full.append(name)
            skiers_full.append(nation)
            skiers_full.append(finish_time)
            skiers_full.append(gandicap_time)
            table_competition.append(skiers_full)
            
    time.sleep(random.uniform(1, 4))  # Случайная пауза от 1 до 4 секунд


# путь к папке сохранения файла
save_folder = "C:/cross-country/" # путь для сохранения файлов

# путь к папке и имя файла
filename = os.path.join(save_folder, place_competition + " 2022-2023 41638-41655" + ".csv")

df = pd.DataFrame(table_competition, columns=["Date", "Competition", "Discipline", "Finish place", "Start number", "Number FIS", "Name", "Nation", "Finish", "Gandicap_time"])
df.to_csv(filename, index=False, sep=';')



