In [1]:
import requests
import json
import pandas as pd
import time
import random

In [2]:
from datetime import datetime, timedelta

# Current time in UTC
now = datetime.utcnow()

# Calculate the start time and end time
start_time = now.strftime("%Y-%m-%dT%H:%M:%SZ")  # Format as ISO 8601
end_time = (now + timedelta(weeks=1)).strftime("%Y-%m-%dT%H:%M:%SZ")  # 3 weeks later

# Construct the URL
print(start_time)
print(end_time)

2025-01-20T21:27:15Z
2025-01-27T21:27:15Z


In [3]:
import requests
import time
import random
import pandas as pd

# Initialize an empty list to store each batch of matches data
all_matches = []

# Loop over drillDownTagIds ranges in steps of 900
for start in range(1400, 13700, 200):
    # Generate a comma-separated string of drillDownTagIds for the current range
    drilldown_ids = ','.join(str(i) for i in range(start, start + 200))

    # Define the request URL
    url = (
        f"https://content.toto.nl/content-service/api/v1/q/event-list?"
        "startTimeFrom=2025-01-19T16:00:00Z&"
        "startTimeTo=2025-02-17T22:59:59Z&"
        "liveNow=false&"
        "maxEvents=190&"
        "orderEventsBy=popularity&"
        "orderMarketsBy=displayOrder&"
        "marketSortsIncluded=--,CS,DC,DN,HH,HL,MH,MR,WH&"
        "marketGroupTypesIncluded=CUSTOM_GROUP,DOUBLE_CHANCE,DRAW_NO_BET,MATCH_RESULT,"
        "MATCH_WINNER,MONEYLINE,ROLLING_SPREAD,ROLLING_TOTAL,STATIC_SPREAD,STATIC_TOTAL&"
        "eventSortsIncluded=MTCH&"
        "includeChildMarkets=true&"
        "prioritisePrimaryMarkets=true&"
        "includeCommentary=true&"
        "includeMedia=true&"
        f"drilldownTagIds={drilldown_ids}&"
        "categoryCode=tennis&"  # Specify the desired category
        "lang=nl-NL&"
        "channel=I"
    )

    # Headers for the request
    headers = {
        'accept': 'application/json',
        'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
    }

    try:
        # Make the request
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses

        # Parse the JSON response
        data = response.json()

        # Extract match data if available
        matches = []
        for event in data.get('data', {}).get('events', []):
            match = {
                "event_id": event.get("id"),
                "match_name": event.get("name"),
                "start_time": event.get("startTime"),
                "home_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "HOME"), None),
                "away_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "AWAY"), None),
                "competition": event.get('type', {}).get('name'),
                "country": event.get('class', {}).get('name'),
                "sport": event.get('category', {}).get('name'),
            }

            # Extract odds if available
            outcomes = event.get("markets", [{}])[0].get("outcomes", [])
            match["home_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "H"), None)
            match["away_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "A"), None)
            
            matches.append(match)

        # Add the batch of matches to the all_matches list
        all_matches.extend(matches)

        # Sleep for a random duration between 1 and 4 seconds, with variability of 0.1 ms
        time.sleep(random.uniform(1, 2.001))

    except Exception as e:
        print(f"An error occurred for drilldown ID range {start}–{start + 200}: {e}")

# Combine all match data into a single DataFrame
final_matches_df = pd.DataFrame(all_matches)

# Display the resulting DataFrame
print(final_matches_df)

     event_id                              match_name            start_time  \
0     6627725    VC Activia vs Regio Zwolle Volleybal  2025-01-21T19:00:00Z   
1     6513636                       India vs Engeland  2025-01-22T13:30:00Z   
2     6618447                  Pakistan vs West Indië  2025-01-25T04:30:00Z   
3     6628093                   Australië vs Engeland  2025-01-23T08:15:00Z   
4     6630563       Durbar Rajshahi vs Rangpur Riders  2025-01-23T07:30:00Z   
...       ...                                     ...                   ...   
1384  6620069              Tom Ford vs Stuart Bingham  2025-01-21T13:00:00Z   
1385  6627155                      Tsjechië vs Italië  2025-01-21T17:00:00Z   
1386  6239787           Weili Zhang vs Tatiana Suarez  2025-02-09T03:40:00Z   
1387  6310322  Zsc Lions Zurich vs Geneve Servette HC  2025-01-21T19:15:00Z   
1388  6627156                  Zwitserland vs Tunesië  2025-01-21T14:30:00Z   

             home_team               away_team  \
0

In [4]:
len(final_matches_df)

1389

In [5]:
final_matches_df

Unnamed: 0,event_id,match_name,start_time,home_team,away_team,competition,country,sport,home_odds,away_odds
0,6627725,VC Activia vs Regio Zwolle Volleybal,2025-01-21T19:00:00Z,VC Activia,Regio Zwolle Volleybal,Eredivisie Dames,Nederland,Volleybal,1.51,2.40
1,6513636,India vs Engeland,2025-01-22T13:30:00Z,India,Engeland,T20 Series,Internationaal,Cricket,1.63,2.30
2,6618447,Pakistan vs West Indië,2025-01-25T04:30:00Z,Pakistan,West Indië,Test Series,Internationaal,Cricket,1.36,5.25
3,6628093,Australië vs Engeland,2025-01-23T08:15:00Z,Australië,Engeland,T20 Series Vrouwen,Internationaal,Cricket,1.36,3.15
4,6630563,Durbar Rajshahi vs Rangpur Riders,2025-01-23T07:30:00Z,Durbar Rajshahi,Rangpur Riders,Bangladesh Premier League,Bangladesh,Cricket,2.40,1.59
...,...,...,...,...,...,...,...,...,...,...
1384,6620069,Tom Ford vs Stuart Bingham,2025-01-21T13:00:00Z,Tom Ford,Stuart Bingham,Championship League Snooker,Groot-Brittannië,Snooker,2.35,1.59
1385,6627155,Tsjechië vs Italië,2025-01-21T17:00:00Z,Tsjechië,Italië,Wereld Kampioenschap,Wereldwijd,Handbal,1.87,2.35
1386,6239787,Weili Zhang vs Tatiana Suarez,2025-02-09T03:40:00Z,Weili Zhang,Tatiana Suarez,UFC 312: Du Plessis vs. Strickland 2,UFC,Mixed Martial Arts,1.54,2.35
1387,6310322,Zsc Lions Zurich vs Geneve Servette HC,2025-01-21T19:15:00Z,Zsc Lions Zurich,Geneve Servette HC,Champions Hockey League,Europees,IJshockey,,


In [6]:
final_matches_df[final_matches_df['sport'] == 'Voetbal']

Unnamed: 0,event_id,match_name,start_time,home_team,away_team,competition,country,sport,home_odds,away_odds
6,6543708,Airdrieonians FC vs Dunfermline Athletic FC,2025-01-25T15:00:00Z,Airdrieonians FC,Dunfermline Athletic FC,Schotland Championship,Schotland,Voetbal,4.25,1.77
7,6247780,Al Ain FC vs Al-Nasr Dubai SC,2025-01-21T16:00:00Z,Al Ain FC,Al-Nasr Dubai SC,VAE Arabian Gulf League,Verenigde Arabische Emiraten,Voetbal,1.50,4.50
8,6543732,Alloa Athletic FC vs Inverness Caledonian This...,2025-01-25T15:00:00Z,Alloa Athletic FC,Inverness Caledonian Thistle FC,Schotland League One,Schotland,Voetbal,2.40,2.70
9,6543730,Annan Athletic FC vs Montrose,2025-01-25T15:00:00Z,Annan Athletic FC,Montrose,Schotland League One,Schotland,Voetbal,2.60,2.45
10,6543757,Arbroath FC vs Kelty Hearts FC,2025-01-25T17:30:00Z,Arbroath FC,Kelty Hearts FC,Schotland League One,Schotland,Voetbal,1.83,3.75
...,...,...,...,...,...,...,...,...,...,...
1081,6440019,Botafogo Fr RJ vs CR Flamengo RJ,2025-02-02T19:00:00Z,Botafogo Fr RJ,CR Flamengo RJ,Brazil Supercup,Brazilië,Voetbal,2.60,2.60
1148,6458066,Blooming vs CD El Nacional,2025-02-07T00:30:00Z,Blooming,CD El Nacional,Copa Libertadores,Americas,Voetbal,2.75,2.45
1149,6457981,Monagas SC vs Defensor Sporting,2025-02-05T00:30:00Z,Monagas SC,Defensor Sporting,Copa Libertadores,Americas,Voetbal,3.00,2.35
1150,6458069,Nacional Asunción vs Alianza Lima,2025-02-06T00:30:00Z,Nacional Asunción,Alianza Lima,Copa Libertadores,Americas,Voetbal,2.20,3.15


In [7]:
import requests
import time
import random
import pandas as pd

# Initialize an empty list to accumulate all data
all_data = []

# Set batch size
batch_size = 100

# Loop over event_ids in batches of 500
for i in range(0, len(final_matches_df['event_id']), batch_size):
    try:
        # Get a batch of event_ids
        batch_ids = final_matches_df['event_id'][i:i + batch_size]
        
        # Concatenate event_ids for the URL
        event_ids_str = ','.join(map(str, batch_ids))
        
        # Define the URL for the current batch
        url = (
            f"https://content.toto.nl/content-service/api/v1/q/events-by-ids?"
            f"eventIds={event_ids_str}&"
            "includeChildMarkets=true&includeCollections=true&includePriorityCollectionChildMarkets=true&"
            "includePriceHistory=false&includeCommentary=true&includeIncidents=false&includeRace=false&"
            "includePools=false&includeNonFixedOdds=false&lang=nl-NL&channel=I"
        )
        
        headers = {
            'accept': 'application/json',
            'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
            'Cookie': 'incap_ses_1581_2280942=FvpNPI919nl1UsyZNNfwFR6GL2cAAAAA8yX5UHfYkaWD/ax05SY+Jg==; visid_incap_2280942=CzAo1Y6yTMa654jo8RfVRtBiL2cAAAAAQkIPAAAAAAAE7rpFtkP20364qTw5VQQQ'
        }
        
        # Send request
        response = requests.get(url, headers=headers)
        json_data = response.json()

        # Initialize lists to store the data for this batch
        event_ids = []
        event_names = []
        market_names = []
        outcome_names = []
        odds_decimal = []
        prices_numerator = []
        prices_denominator = []
        outcome_types = []
        outcome_subtypes = []

        # Process the JSON response for each event
        for event in json_data['data']['events']:  # Iterate over each event
            event_id = event['id']
            event_name = event['name']
            
            for market in event['markets']:  # Iterate over each market in the event
                market_name = market['name']
                
                for outcome in market['outcomes']:  # Iterate over outcomes for each market
                    outcome_name = outcome['name']
                    odds_decimal_value = outcome['prices'][0]['decimal']
                    numerator = outcome['prices'][0]['numerator']
                    denominator = outcome['prices'][0]['denominator']
                    outcome_type = outcome['type']
                    outcome_subtype = outcome['subType']
                    
                    # Append captured values to the respective lists
                    event_ids.append(event_id)
                    event_names.append(event_name)
                    market_names.append(market_name)
                    outcome_names.append(outcome_name)
                    odds_decimal.append(odds_decimal_value)
                    prices_numerator.append(numerator)
                    prices_denominator.append(denominator)
                    outcome_types.append(outcome_type)
                    outcome_subtypes.append(outcome_subtype)

        # Create a temporary DataFrame for this batch and append it to all_data
        temp_df = pd.DataFrame({
            'event_id': event_ids,
            'Event Name': event_names,
            'Market Name': market_names,
            'Outcome Name': outcome_names,
            'Odds (Decimal)': odds_decimal,
            'Price Numerator': prices_numerator,
            'Price Denominator': prices_denominator,
            'Outcome Type': outcome_types,
            'Outcome SubType': outcome_subtypes
        })
        
        all_data.append(temp_df)
        
        # Wait between 1 and 3 seconds
        time.sleep(random.uniform(1, 3.001))
    except:
        None

# Concatenate all individual DataFrames into a final DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Show the final DataFrame
print(final_df)

      event_id                                   Event Name  \
0      6619004  Northern Districts (W) vs Central Hinds (W)   
1      6619004  Northern Districts (W) vs Central Hinds (W)   
2      6619004  Northern Districts (W) vs Central Hinds (W)   
3      6619004  Northern Districts (W) vs Central Hinds (W)   
4      6619004  Northern Districts (W) vs Central Hinds (W)   
...        ...                                          ...   
28438  6308709        Sharaputdin Magomedov vs Michael Page   
28439  6239787                Weili Zhang vs Tatiana Suarez   
28440  6239787                Weili Zhang vs Tatiana Suarez   
28441  6239065         Dricus Du Plessis vs Sean Strickland   
28442  6239065         Dricus Du Plessis vs Sean Strickland   

                                             Market Name  \
0      Innings 1 Kerry-Anne Tomlinson Aantal Runs - O...   
1      Innings 1 Kerry-Anne Tomlinson Aantal Runs - O...   
2                     Winnaar Wedstrijd (Inc Super Over)   
3  

In [8]:
# Perform the left join on 'event_id' to get 'sport' and 'group_name' from events_df
final_df = final_df.merge(final_matches_df[['event_id', 'sport', 'competition']], on='event_id', how='left').drop_duplicates()
# Remove columns that end with '_x'
final_df = final_df[[col for col in final_df.columns if not col.endswith('_x')]]

# Rename columns to remove '_y'
final_df.columns = final_df.columns.str.replace('_y', '', regex=False)

In [9]:
# Replace 'A' with 2 and 'H' with 1 in the outcome_subtype column
final_df['Outcome SubType'] = final_df['Outcome SubType'].replace({'A': '2', 'H': '1'})

# Display the updated final_df
print(final_df)

      event_id                                   Event Name  \
0      6619004  Northern Districts (W) vs Central Hinds (W)   
3      6619004  Northern Districts (W) vs Central Hinds (W)   
6      6619004  Northern Districts (W) vs Central Hinds (W)   
9      6619004  Northern Districts (W) vs Central Hinds (W)   
12     6619004  Northern Districts (W) vs Central Hinds (W)   
...        ...                                          ...   
39121  6308709        Sharaputdin Magomedov vs Michael Page   
39122  6239787                Weili Zhang vs Tatiana Suarez   
39123  6239787                Weili Zhang vs Tatiana Suarez   
39124  6239065         Dricus Du Plessis vs Sean Strickland   
39125  6239065         Dricus Du Plessis vs Sean Strickland   

                                             Market Name  \
0      Innings 1 Kerry-Anne Tomlinson Aantal Runs - O...   
3      Innings 1 Kerry-Anne Tomlinson Aantal Runs - O...   
6                     Winnaar Wedstrijd (Inc Super Over)   
9  

In [10]:
set(final_df[final_df['sport'].str.contains('Voetbal')].competition)

{'Algerije Ligue 1',
 'Australië W-League',
 'Brazil Gaucho A1',
 'Brazil Supercup',
 'Brazilië Mineiro Modulo I',
 'Copa Libertadores',
 'Engeland FA Super League Vrouwen',
 'Ethiopië Premier League',
 'Europa Super League',
 'Europees Kampioenschap',
 'Finland Liiga Cup',
 'Guatemala Liga Nacional',
 'India Super League',
 'Indonesië Liga 1',
 'Italy Serie C',
 'La Liga 23/24',
 'Mexico League MX Vrouwen',
 'Myanmar National League',
 'Nederlandse competitie',
 'Noord-Ierland Premiership',
 'Panama Liga de Fútbol',
 'Portugal Liga 3',
 'Portugal Liga Revelacao U23',
 'Premier League 23/24',
 'Qatar Stars League',
 'Schotland Championship',
 'Schotland League One',
 'Schotland League Two',
 'Serie A 23/24',
 'Thailand League 1',
 'VAE Arabian Gulf League',
 'Vriendschappelijk',
 'WK Voetbal',
 'WK Vrouwen'}

In [11]:
final_df[(final_df['Event Name'].str.contains('Milan')) & final_df['sport']=='Voetbal']

Unnamed: 0,event_id,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport,competition


In [12]:
[i for i in list(set(final_df['Market Name'])) if 'No Bet' in i]

['1e Helft: Draw No Bet', 'Draw No Bet', '2e Helft: Draw No Bet']

In [13]:
final_output_name = 'totoAllSports' + start_time + '.csv'
final_df.to_csv(final_output_name)

## Check id ranges

In [14]:
# import requests
# import time
# import random
# import pandas as pd

# # Initialize an empty list to store all match data
# all_matches = []

# # Initialize a dictionary to store sports and competitions by drillDownTagIds range
# range_summary = {}

# # Loop over drillDownTagIds ranges in steps of 1000
# for start in range(0, 18000, 200):
#     # Generate a comma-separated string of drillDownTagIds for the current range
#     drilldown_ids = ','.join(str(i) for i in range(start, start + 200))

#     # Define the request URL
#     url = (
#         f"https://content.toto.nl/content-service/api/v1/q/event-list?"
#         "startTimeFrom=2025-01-15T23:00:00Z&"
#         "startTimeTo=2025-01-30T22:59:59Z&"
#         "liveNow=false&"
#         "maxEvents=190&"
#         "orderEventsBy=popularity&"
#         "orderMarketsBy=displayOrder&"
#         "marketSortsIncluded=--,CS,DC,DN,HH,HL,MH,MR,WH&"
#         "marketGroupTypesIncluded=CUSTOM_GROUP,DOUBLE_CHANCE,DRAW_NO_BET,MATCH_RESULT,"
#         "MATCH_WINNER,MONEYLINE,ROLLING_SPREAD,ROLLING_TOTAL,STATIC_SPREAD,STATIC_TOTAL&"
#         "eventSortsIncluded=MTCH&"
#         "includeChildMarkets=true&"
#         "prioritisePrimaryMarkets=true&"
#         "includeCommentary=true&"
#         "includeMedia=true&"
#         f"drilldownTagIds={drilldown_ids}&"
#         "categoryCode=tennis&"  # Specify the desired category
#         "lang=nl-NL&"
#         "channel=I"
#     )

#     # Headers for the request
#     headers = {
#         'accept': 'application/json',
#         'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
#     }

#     try:
#         # Make the request
#         response = requests.get(url, headers=headers)
#         response.raise_for_status()  # Raise HTTPError for bad responses

#         # Parse the JSON response
#         data = response.json()

#         # Extract match data if available
#         matches = []
#         sports_set = set()
#         competitions_set = set()

#         for event in data.get('data', {}).get('events', []):
#             # Collect match data
#             match = {
#                 "event_id": event.get("id"),
#                 "match_name": event.get("name"),
#                 "start_time": event.get("startTime"),
#                 "home_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "HOME"), None),
#                 "away_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "AWAY"), None),
#                 "competition": event.get('type', {}).get('name'),
#                 "country": event.get('class', {}).get('name'),
#                 "sport": event.get('category', {}).get('name'),
#             }

#             # Collect sports and competitions
#             sports_set.add(match["sport"])
#             competitions_set.add(match["competition"])

#             # Extract odds if available
#             outcomes = event.get("markets", [{}])[0].get("outcomes", [])
#             match["home_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "H"), None)
#             match["away_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "A"), None)

#             matches.append(match)

#         # Add matches to the all_matches list
#         all_matches.extend(matches)

#         # Add sports and competitions to the range summary
#         range_summary[f"{start}-{start + 200}"] = {
#             "sports": list(sports_set),
#             "competitions": list(competitions_set)
#         }

#         # Sleep for a random duration between 1 and 2 seconds
#         time.sleep(random.uniform(1, 2.001))

#     except Exception as e:
#         print(f"An error occurred for drilldown ID range {start}–{start + 200}: {e}")

# # Combine all match data into a single DataFrame
# final_matches_df = pd.DataFrame(all_matches)

# # Display the range summary
# for range_key, summary in range_summary.items():
#     print(f"\nDrillDownTagIds Range: {range_key}")
#     print(f"Sports: {summary['sports']}")
#     print(f"Competitions: {summary['competitions']}")

# # Save the match data to a CSV or Excel file if needed
# final_matches_df.to_csv("matches_output.csv", index=False)

# # Optionally, save the range summary to a file
# pd.DataFrame.from_dict(range_summary, orient="index").to_csv("range_summary.csv")

In [15]:
# range starts at 1500 and stops at 13000

In [16]:
# range_summary

In [17]:
# keys_with_basketbal = [key for key, value in range_summary.items() if 'Voetbal' in value.get('sports', [])]
# keys_with_basketbal

In [18]:
# keys_with_voetbal = [key for key, value in range_summary.items() if 'Basketbal' in value.get('sports', [])]
# keys_with_voetbal