In [41]:
import requests
import json
import pandas as pd
import time
import random

In [42]:
from datetime import datetime, timedelta

# Current time in UTC
now = datetime.utcnow()

# Calculate the start time and end time
start_time = now.strftime("%Y-%m-%dT%H:%M:%SZ")  # Format as ISO 8601
end_time = (now + timedelta(weeks=1)).strftime("%Y-%m-%dT%H:%M:%SZ")  # 3 weeks later

# Construct the URL
print(start_time)
print(end_time)

2025-01-07T10:16:22Z
2025-01-14T10:16:22Z


In [43]:
import requests
import time
import random
import pandas as pd

# Initialize an empty list to store each batch of matches data
all_matches = []

# Loop over drillDownTagIds ranges in steps of 900
for start in range(1400, 13700, 200):
    # Generate a comma-separated string of drillDownTagIds for the current range
    drilldown_ids = ','.join(str(i) for i in range(start, start + 200))

    # Define the request URL
    url = (
        f"https://content.toto.nl/content-service/api/v1/q/event-list?"
        "startTimeFrom=2025-01-06T23:00:00Z&"
        "startTimeTo=2025-01-18T22:59:59Z&"
        "liveNow=false&"
        "maxEvents=190&"
        "orderEventsBy=popularity&"
        "orderMarketsBy=displayOrder&"
        "marketSortsIncluded=--,CS,DC,DN,HH,HL,MH,MR,WH&"
        "marketGroupTypesIncluded=CUSTOM_GROUP,DOUBLE_CHANCE,DRAW_NO_BET,MATCH_RESULT,"
        "MATCH_WINNER,MONEYLINE,ROLLING_SPREAD,ROLLING_TOTAL,STATIC_SPREAD,STATIC_TOTAL&"
        "eventSortsIncluded=MTCH&"
        "includeChildMarkets=true&"
        "prioritisePrimaryMarkets=true&"
        "includeCommentary=true&"
        "includeMedia=true&"
        f"drilldownTagIds={drilldown_ids}&"
        "categoryCode=tennis&"  # Specify the desired category
        "lang=nl-NL&"
        "channel=I"
    )

    # Headers for the request
    headers = {
        'accept': 'application/json',
        'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
    }

    try:
        # Make the request
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses

        # Parse the JSON response
        data = response.json()

        # Extract match data if available
        matches = []
        for event in data.get('data', {}).get('events', []):
            match = {
                "event_id": event.get("id"),
                "match_name": event.get("name"),
                "start_time": event.get("startTime"),
                "home_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "HOME"), None),
                "away_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "AWAY"), None),
                "competition": event.get('type', {}).get('name'),
                "country": event.get('class', {}).get('name'),
                "sport": event.get('category', {}).get('name'),
            }

            # Extract odds if available
            outcomes = event.get("markets", [{}])[0].get("outcomes", [])
            match["home_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "H"), None)
            match["away_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "A"), None)
            
            matches.append(match)

        # Add the batch of matches to the all_matches list
        all_matches.extend(matches)

        # Sleep for a random duration between 1 and 4 seconds, with variability of 0.1 ms
        time.sleep(random.uniform(1, 2.001))

    except Exception as e:
        print(f"An error occurred for drilldown ID range {start}–{start + 200}: {e}")

# Combine all match data into a single DataFrame
final_matches_df = pd.DataFrame(all_matches)

# Display the resulting DataFrame
print(final_matches_df)

     event_id                                      match_name  \
0     6473087                      Nieuw-Zeeland vs Sri Lanka   
1     6483192             Fortune Barishal vs Sylhet Strikers   
2     6492630  AS Cannes Volley Ball vs Montpellier Volley UC   
3     6491812              Adelaide Strikers vs Brisbane Heat   
4     6492275                Allister Carter vs Chris Wakelin   
...       ...                                             ...   
1182  6492259                          SI Jiahui vs Jak Jones   
1183  6492273                         SI Jiahui vs Pang Junxu   
1184  6492260                           SI Jiahui vs Ryan Day   
1185  6308687         Santiago Ponzinibbio vs Carlston Harris   
1186  6308693                     Thiago Moises vs Trey Ogden   

                start_time              home_team              away_team  \
0     2025-01-08T01:00:00Z          Nieuw-Zeeland              Sri Lanka   
1     2025-01-07T12:30:00Z       Fortune Barishal        Sylhet Str

In [46]:
len(final_matches_df)

1194

In [47]:
final_matches_df

Unnamed: 0,event_id,match_name,start_time,home_team,away_team,competition,country,sport,home_odds,away_odds
0,6473087,Nieuw-Zeeland vs Sri Lanka,2025-01-08T01:00:00Z,Nieuw-Zeeland,Sri Lanka,ODI Series,Internationaal,Cricket,1.40,3.00
1,6483192,Fortune Barishal vs Sylhet Strikers,2025-01-07T12:30:00Z,Fortune Barishal,Sylhet Strikers,Bangladesh Premier League,Bangladesh,Cricket,1.44,2.80
2,6492630,AS Cannes Volley Ball vs Montpellier Volley UC,2025-01-07T19:00:00Z,AS Cannes Volley Ball,Montpellier Volley UC,Frankrijk Coupe de France,Frankrijk,Volleybal,2.75,1.40
3,6491812,Adelaide Strikers vs Brisbane Heat,2025-01-11T09:00:00Z,Adelaide Strikers,Brisbane Heat,Australië Twenty20 Big Bash,Australië,Cricket,1.95,1.87
4,6492275,Allister Carter vs Chris Wakelin,2025-01-07T11:00:00Z,Allister Carter,Chris Wakelin,Championship League Snooker,Groot-Brittannië,Snooker,1.78,2.05
...,...,...,...,...,...,...,...,...,...,...
1189,6492259,SI Jiahui vs Jak Jones,2025-01-07T16:30:00Z,SI Jiahui,Jak Jones,Championship League Snooker,Groot-Brittannië,Snooker,1.78,2.05
1190,6492273,SI Jiahui vs Pang Junxu,2025-01-07T12:30:00Z,SI Jiahui,Pang Junxu,Championship League Snooker,Groot-Brittannië,Snooker,1.71,2.15
1191,6492260,SI Jiahui vs Ryan Day,2025-01-07T11:00:00Z,SI Jiahui,Ryan Day,Championship League Snooker,Groot-Brittannië,Snooker,1.63,2.30
1192,6308687,Santiago Ponzinibbio vs Carlston Harris,2025-01-12T01:20:00Z,Santiago Ponzinibbio,Carlston Harris,UFC Fight Night: Dern vs. Ribas 2,UFC,Mixed Martial Arts,1.83,1.91


In [48]:
import requests
import time
import random
import pandas as pd

# Initialize an empty list to accumulate all data
all_data = []

# Set batch size
batch_size = 100

# Loop over event_ids in batches of 500
for i in range(0, len(final_matches_df['event_id']), batch_size):
    try:
        # Get a batch of event_ids
        batch_ids = final_matches_df['event_id'][i:i + batch_size]
        
        # Concatenate event_ids for the URL
        event_ids_str = ','.join(map(str, batch_ids))
        
        # Define the URL for the current batch
        url = (
            f"https://content.toto.nl/content-service/api/v1/q/events-by-ids?"
            f"eventIds={event_ids_str}&"
            "includeChildMarkets=true&includeCollections=true&includePriorityCollectionChildMarkets=true&"
            "includePriceHistory=false&includeCommentary=true&includeIncidents=false&includeRace=false&"
            "includePools=false&includeNonFixedOdds=false&lang=nl-NL&channel=I"
        )
        
        headers = {
            'accept': 'application/json',
            'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
            'Cookie': 'incap_ses_1581_2280942=FvpNPI919nl1UsyZNNfwFR6GL2cAAAAA8yX5UHfYkaWD/ax05SY+Jg==; visid_incap_2280942=CzAo1Y6yTMa654jo8RfVRtBiL2cAAAAAQkIPAAAAAAAE7rpFtkP20364qTw5VQQQ'
        }
        
        # Send request
        response = requests.get(url, headers=headers)
        json_data = response.json()

        # Initialize lists to store the data for this batch
        event_ids = []
        event_names = []
        market_names = []
        outcome_names = []
        odds_decimal = []
        prices_numerator = []
        prices_denominator = []
        outcome_types = []
        outcome_subtypes = []

        # Process the JSON response for each event
        for event in json_data['data']['events']:  # Iterate over each event
            event_id = event['id']
            event_name = event['name']
            
            for market in event['markets']:  # Iterate over each market in the event
                market_name = market['name']
                
                for outcome in market['outcomes']:  # Iterate over outcomes for each market
                    outcome_name = outcome['name']
                    odds_decimal_value = outcome['prices'][0]['decimal']
                    numerator = outcome['prices'][0]['numerator']
                    denominator = outcome['prices'][0]['denominator']
                    outcome_type = outcome['type']
                    outcome_subtype = outcome['subType']
                    
                    # Append captured values to the respective lists
                    event_ids.append(event_id)
                    event_names.append(event_name)
                    market_names.append(market_name)
                    outcome_names.append(outcome_name)
                    odds_decimal.append(odds_decimal_value)
                    prices_numerator.append(numerator)
                    prices_denominator.append(denominator)
                    outcome_types.append(outcome_type)
                    outcome_subtypes.append(outcome_subtype)

        # Create a temporary DataFrame for this batch and append it to all_data
        temp_df = pd.DataFrame({
            'event_id': event_ids,
            'Event Name': event_names,
            'Market Name': market_names,
            'Outcome Name': outcome_names,
            'Odds (Decimal)': odds_decimal,
            'Price Numerator': prices_numerator,
            'Price Denominator': prices_denominator,
            'Outcome Type': outcome_types,
            'Outcome SubType': outcome_subtypes
        })
        
        all_data.append(temp_df)
        
        # Wait between 1 and 3 seconds
        time.sleep(random.uniform(1, 3.001))
    except:
        None

# Concatenate all individual DataFrames into a final DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Show the final DataFrame
print(final_df)

      event_id                              Event Name  \
0      6492275        Allister Carter vs Chris Wakelin   
1      6492275        Allister Carter vs Chris Wakelin   
2      6492275        Allister Carter vs Chris Wakelin   
3      6492275        Allister Carter vs Chris Wakelin   
4      6492275        Allister Carter vs Chris Wakelin   
...        ...                                     ...   
27858  6310323  Geneve Servette HC vs Zsc Lions Zurich   
27859  6310323  Geneve Servette HC vs Zsc Lions Zurich   
27860  6310323  Geneve Servette HC vs Zsc Lions Zurich   
27861  6310323  Geneve Servette HC vs Zsc Lions Zurich   
27862  6310323  Geneve Servette HC vs Zsc Lions Zurich   

                          Market Name                      Outcome Name  \
0               Handicap Winnaar -1.5                   Allister Carter   
1               Handicap Winnaar -1.5                     Chris Wakelin   
2                           Wedstrijd                     Chris Wakelin   
3  

In [49]:
# Perform the left join on 'event_id' to get 'sport' and 'group_name' from events_df
final_df = final_df.merge(final_matches_df[['event_id', 'sport', 'competition']], on='event_id', how='left').drop_duplicates()
# Remove columns that end with '_x'
final_df = final_df[[col for col in final_df.columns if not col.endswith('_x')]]

# Rename columns to remove '_y'
final_df.columns = final_df.columns.str.replace('_y', '', regex=False)

In [50]:
# Replace 'A' with 2 and 'H' with 1 in the outcome_subtype column
final_df['Outcome SubType'] = final_df['Outcome SubType'].replace({'A': '2', 'H': '1'})

# Display the updated final_df
print(final_df)

      event_id                              Event Name  \
0      6492275        Allister Carter vs Chris Wakelin   
2      6492275        Allister Carter vs Chris Wakelin   
4      6492275        Allister Carter vs Chris Wakelin   
6      6492275        Allister Carter vs Chris Wakelin   
8      6492275        Allister Carter vs Chris Wakelin   
...        ...                                     ...   
37361  6310323  Geneve Servette HC vs Zsc Lions Zurich   
37362  6310323  Geneve Servette HC vs Zsc Lions Zurich   
37363  6310323  Geneve Servette HC vs Zsc Lions Zurich   
37364  6310323  Geneve Servette HC vs Zsc Lions Zurich   
37365  6310323  Geneve Servette HC vs Zsc Lions Zurich   

                          Market Name                      Outcome Name  \
0               Handicap Winnaar -1.5                   Allister Carter   
2               Handicap Winnaar -1.5                     Chris Wakelin   
4                           Wedstrijd                     Chris Wakelin   
6  

In [51]:
set(final_df[final_df['sport'].str.contains('Voetbal')].competition)

{'Australië W-League',
 'Egypte Second Division',
 'England FA Cup Women',
 'Ethiopië Premier League',
 'Europa Super League',
 'Europees Kampioenschap',
 'Frankrijk Division 1 Vrouwen',
 'India I-League',
 'India Super League',
 'Indonesië Liga 1',
 'La Liga 23/24',
 'Nederlandse competitie',
 'Noord Ierland League Cup',
 'Noord-Ierland Premiership',
 'Oeganda Premier League',
 'Portugal League Cup',
 'Portugal Liga 3',
 'Portugal Liga Revelacao U23',
 'Premier League 23/24',
 'Qatar Stars League',
 'Schotland Championship',
 'Schotland League One',
 'Schotland League Two',
 'Serie A 23/24',
 'Spain Primera División Vrouwen',
 'Supercopa',
 'Thailand League 1',
 'WK Voetbal',
 'WK Vrouwen'}

In [33]:
final_df[(final_df['Event Name'].str.contains('Milan')) & final_df['sport']=='Voetbal']

Unnamed: 0,event_id,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport,competition


In [21]:
[i for i in list(set(final_df['Market Name'])) if 'No Bet' in i]

['1e Periode - Draw No Bet',
 '3e Periode - Draw No Bet',
 '2e Periode - Draw No Bet',
 '2e Helft: Draw No Bet',
 'Draw No Bet',
 '1e Helft: Draw No Bet']

In [52]:
final_df.to_csv('totoAllSports07012025.csv')

In [54]:
import requests
import time
import random
import pandas as pd

# Initialize an empty list to store all match data
all_matches = []

# Initialize a dictionary to store sports and competitions by drillDownTagIds range
range_summary = {}

# Loop over drillDownTagIds ranges in steps of 1000
for start in range(1400, 13800, 200):
    # Generate a comma-separated string of drillDownTagIds for the current range
    drilldown_ids = ','.join(str(i) for i in range(start, start + 200))

    # Define the request URL
    url = (
        f"https://content.toto.nl/content-service/api/v1/q/event-list?"
        "startTimeFrom=2025-01-06T23:00:00Z&"
        "startTimeTo=2025-01-18T22:59:59Z&"
        "liveNow=false&"
        "maxEvents=190&"
        "orderEventsBy=popularity&"
        "orderMarketsBy=displayOrder&"
        "marketSortsIncluded=--,CS,DC,DN,HH,HL,MH,MR,WH&"
        "marketGroupTypesIncluded=CUSTOM_GROUP,DOUBLE_CHANCE,DRAW_NO_BET,MATCH_RESULT,"
        "MATCH_WINNER,MONEYLINE,ROLLING_SPREAD,ROLLING_TOTAL,STATIC_SPREAD,STATIC_TOTAL&"
        "eventSortsIncluded=MTCH&"
        "includeChildMarkets=true&"
        "prioritisePrimaryMarkets=true&"
        "includeCommentary=true&"
        "includeMedia=true&"
        f"drilldownTagIds={drilldown_ids}&"
        "categoryCode=tennis&"  # Specify the desired category
        "lang=nl-NL&"
        "channel=I"
    )

    # Headers for the request
    headers = {
        'accept': 'application/json',
        'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
    }

    try:
        # Make the request
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses

        # Parse the JSON response
        data = response.json()

        # Extract match data if available
        matches = []
        sports_set = set()
        competitions_set = set()

        for event in data.get('data', {}).get('events', []):
            # Collect match data
            match = {
                "event_id": event.get("id"),
                "match_name": event.get("name"),
                "start_time": event.get("startTime"),
                "home_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "HOME"), None),
                "away_team": next((team['name'] for team in event.get('teams', []) if team['side'] == "AWAY"), None),
                "competition": event.get('type', {}).get('name'),
                "country": event.get('class', {}).get('name'),
                "sport": event.get('category', {}).get('name'),
            }

            # Collect sports and competitions
            sports_set.add(match["sport"])
            competitions_set.add(match["competition"])

            # Extract odds if available
            outcomes = event.get("markets", [{}])[0].get("outcomes", [])
            match["home_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "H"), None)
            match["away_odds"] = next((outcome['prices'][0]['decimal'] for outcome in outcomes if outcome.get('subType') == "A"), None)

            matches.append(match)

        # Add matches to the all_matches list
        all_matches.extend(matches)

        # Add sports and competitions to the range summary
        range_summary[f"{start}-{start + 200}"] = {
            "sports": list(sports_set),
            "competitions": list(competitions_set)
        }

        # Sleep for a random duration between 1 and 2 seconds
        time.sleep(random.uniform(1, 2.001))

    except Exception as e:
        print(f"An error occurred for drilldown ID range {start}–{start + 200}: {e}")

# Combine all match data into a single DataFrame
final_matches_df = pd.DataFrame(all_matches)

# Display the range summary
for range_key, summary in range_summary.items():
    print(f"\nDrillDownTagIds Range: {range_key}")
    print(f"Sports: {summary['sports']}")
    print(f"Competitions: {summary['competitions']}")

# Save the match data to a CSV or Excel file if needed
final_matches_df.to_csv("matches_output.csv", index=False)

# Optionally, save the range summary to a file
pd.DataFrame.from_dict(range_summary, orient="index").to_csv("range_summary.csv")


DrillDownTagIds Range: 1400-1600
Sports: ['Snooker', 'Basketbal', 'Volleybal', 'Voetbal', 'Cricket', 'IJshockey']
Competitions: ['Estland-Letland Basketball League', 'Bangladesh Premier League', 'The Masters', 'Schotland Championship', 'Frankrijk Coupe de France', 'Schotland League One', 'Thailand League 1', 'Schotland League Two', 'New Zealand Super Smash, Women', 'Litouwen LKL', 'Denemarken Superisligaen', 'Vijay Hazare Trophy', 'Oostenrijk Eishockey Liga', 'South Africa SA20 League', 'ODI Series', 'ODI Series Women', 'België Liga Mannen', 'Noord-Ierland Premiership', 'Spain Primera División Vrouwen', "PBA Commissioner's Cup", 'Australië Twenty20 Big Bash', 'Championship League Snooker', 'Noorwegen GET-ligaen', 'Finland Korisliiga']

DrillDownTagIds Range: 1600-1800
Sports: ['Tafeltennis', 'Wintersport', 'Basketbal', 'Badminton', 'Volleybal', 'Voetbal', 'Tennis', 'IJshockey']
Competitions: ['CEV Cup Vrouwen', 'Slowakije Extraliga', 'WTT Star Contender Doha, Doubles', 'Czech Liga Pro

In [55]:
# range starts at 1500 and stops at 13000

In [56]:
range_summary

{'1400-1600': {'sports': ['Snooker',
   'Basketbal',
   'Volleybal',
   'Voetbal',
   'Cricket',
   'IJshockey'],
  'competitions': ['Estland-Letland Basketball League',
   'Bangladesh Premier League',
   'The Masters',
   'Schotland Championship',
   'Frankrijk Coupe de France',
   'Schotland League One',
   'Thailand League 1',
   'Schotland League Two',
   'New Zealand Super Smash, Women',
   'Litouwen LKL',
   'Denemarken Superisligaen',
   'Vijay Hazare Trophy',
   'Oostenrijk Eishockey Liga',
   'South Africa SA20 League',
   'ODI Series',
   'ODI Series Women',
   'België Liga Mannen',
   'Noord-Ierland Premiership',
   'Spain Primera División Vrouwen',
   "PBA Commissioner's Cup",
   'Australië Twenty20 Big Bash',
   'Championship League Snooker',
   'Noorwegen GET-ligaen',
   'Finland Korisliiga']},
 '1600-1800': {'sports': ['Tafeltennis',
   'Wintersport',
   'Basketbal',
   'Badminton',
   'Volleybal',
   'Voetbal',
   'Tennis',
   'IJshockey'],
  'competitions': ['CEV Cup V

In [None]:
keys_with_basketbal = [key for key, value in range_summary.items() if 'Voetbal' in value.get('sports', [])]
keys_with_basketbal

In [58]:
keys_with_voetbal = [key for key, value in range_summary.items() if 'Basketbal' in value.get('sports', [])]
keys_with_voetbal

['1400-1600',
 '1600-1800',
 '1800-2000',
 '3200-3400',
 '3400-3600',
 '3600-3800',
 '4200-4400',
 '5800-6000',
 '8800-9000',
 '9000-9200']