In [1]:
import requests
import pandas as pd
import json

import time
from datetime import datetime

startTime = time.time()
print("")
print("")
print(f"--- *** {datetime.now()} *** ---")
print("")
print("")


########################################################################################################################
# RUN
########################################################################################################################


# Define the API endpoint for seasons
seasons_url = "https://api.motogp.pulselive.com/motogp/v1/results/seasons"
# Request data from the API
seasons_response = requests.get(seasons_url).json()
# Create seasons_dict with seasons year as keys and their id as values
seasons_dict = {str(season["year"]): season["id"] for season in seasons_response}

filtered_dict = {key: value for key, value in seasons_dict.items() if "202" in key}

# Find the MotoGP category id
motogp_id = None
# Define the API endpoints for categories
categories_url = "https://api.motogp.pulselive.com/motogp/v1/results/categories?eventUuid=14bd179e-a4bc-4b0d-bd2e-69a9ed99cb60"
# Request data from the API
categories_response = requests.get(categories_url).json()

for category in categories_response:
    if "MotoGP" in category["name"]:
        motogp_id = category["id"]
        break


# Initialize an empty list to store event data
all_events_data = []

# Iterate through each season and fetch event data
for season_year, season_id in filtered_dict.items():
    # Define the API endpoint for events of a specific season
    events_url = f"https://api.motogp.pulselive.com/motogp/v1/results/events?seasonUuid={season_id}&isFinished=true"
    # Request data from the API
    events_response = requests.get(events_url).json()

    # Create a DataFrame with relevant columns from events_response
    events_data = []
    for event in events_response:
        events_data.append(
            {
                "season_year": season_year,
                "event_name": event["name"],
                "event_id": event["id"],
                "country": event.get("country", {}).get("name"),
                "circuit_name": event.get("circuit", {}).get("name"),
            }
        )

    # Append the event data for this season to the overall list
    all_events_data.extend(events_data)

# Create a DataFrame containing information from all seasons
events_df = pd.DataFrame(all_events_data)

# Filter and drop rows where "TEST" is in the "event_name" column
events_df = events_df[~events_df["event_name"].str.contains("TEST")]

# Reset the index
events_df.reset_index(drop=True, inplace=True)


# Initialize an empty list to store race data
all_races_data = []

# Iterate through each event and fetch race data
for event_id in events_df["event_id"]:
    # Define the API endpoint for race sessions of a specific event
    events_types_url = f"https://api.motogp.pulselive.com/motogp/v1/results/sessions?eventUuid={event_id}&categoryUuid={motogp_id}"

    # Request data from the API
    events_types_response = requests.get(events_types_url).json()

    # Filter and extract data for races (type == "RAC")
    races_data = []
    for session in events_types_response:
        if session["type"] == "RAC":
            races_data.append(
                {
                    "event_id": event_id,
                    "circuit_name": session["circuit"],
                    "race_id": session["id"],
                }
            )

    # Append the race data for this event to the overall list
    all_races_data.extend(races_data)

# Create a DataFrame containing race information from all events
events_types_df = pd.DataFrame(all_races_data)

# Merge event_df and events_types_df on 'event_id'
races_df = events_df.merge(events_types_df, on="event_id", how="left")


races_df = races_df.drop(["circuit_name_y"], axis=1)
races_df = races_df.rename(columns={"circuit_name_x": "circuit_name"})

# # Drop duplicates
races_df = races_df.drop_duplicates()

# # Reset the index
races_df.reset_index(drop=True, inplace=True)
# Initialize an empty list to store championship rank data
championship_ranks_data = []
counter = 0
# Define the base URL for fetching championship ranks
base_url = "https://api.motogp.pulselive.com/motogp/v1/results/session"

# Iterate through each race_id in races_df
for race_id in races_df["race_id"]:
    # Define the API endpoint for fetching championship ranks
    ranks_url = f"{base_url}/{race_id}/classification?test=false"
    counter += 1
    print(counter)

    try:
        # Request data from the API
        ranks_response = requests.get(ranks_url)
        ranks_response.raise_for_status()  # Raise an error for non-OK responses

        # Parse the JSON response
        ranks_data = ranks_response.json()

        # Iterate through the ranks in the response and extract relevant information
        for rank in ranks_data.get("classification", []):
            rider_info = rank.get("rider", {})
            country_info = rider_info.get("country", {})
            team_info = rank.get("team", {})
            constructor_info = rank.get("constructor", {})

            championship_ranks_data.append(
                {
                    "season_year": races_df.loc[
                        races_df["race_id"] == race_id, "season_year"
                    ].values[0],
                    "event_name": races_df.loc[
                        races_df["race_id"] == race_id, "event_name"
                    ].values[0],
                    "event_id": races_df.loc[
                        races_df["race_id"] == race_id, "event_id"
                    ].values[
                        0
                    ],  # Keep event_id
                    "circuit_country": races_df.loc[
                        races_df["race_id"] == race_id, "country"
                    ].values[0],
                    "circuit_name": races_df.loc[
                        races_df["race_id"] == race_id, "circuit_name"
                    ].values[0],
                    "race_id": race_id,
                    "rider_name": rider_info.get("full_name", ""),
                    "rider_country": country_info.get("name", ""),
                    "team_name": team_info.get("name", ""),
                    "constructor_name": constructor_info.get("name", ""),
                    "position": rank.get("position", ""),
                }
            )
    except requests.exceptions.HTTPError as e:
        # Handle HTTP error, such as 404 Not Found or 500 Internal Server Error
        print(f"HTTP Error: {e}")
    except json.JSONDecodeError as e:
        # Handle JSON decode error (invalid JSON response)
        print(f"JSON Decode Error: {e}")
    except Exception as e:
        # Handle other exceptions
        print(f"An error occurred: {e}")
# Create the championship_ranks_df DataFrame
championship_ranks_df = pd.DataFrame(championship_ranks_data)
# Drop duplicates
championship_ranks_df = championship_ranks_df.drop_duplicates()

# Reset the index
championship_ranks_df.reset_index(drop=True, inplace=True)
# Export into CSV
championship_ranks_df.to_csv("motogp_datas.csv", index=False)


########################################################################################################################
# TIMER
########################################################################################################################
endTime = time.time()
executionTime = endTime - startTime
print("")
print("")
print(
    f"--- *** Total execution time in seconds: {executionTime}, in minutes: {executionTime/60} and in hours: {executionTime/60/60} *** ---"
)
print("")
print("")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
HTTP Error: 400 Client Error: Bad Request for url: https://api.motogp.pulselive.com/motogp/v1/results/session/nan/classification?test=false
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
HTTP Error: 400 Client Error: Bad Request for url: https://api.motogp.pulselive.com/motogp/v1/results/session/nan/classification?test=false
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
