## Set up matching DF's

In [72]:
import pandas as pd
# Set pandas to display all columns
pd.set_option('display.max_columns', None)

In [73]:
toto_raw = pd.read_csv('Data/scrapers/Toto/totoAllSports25122024.csv', index_col=0).drop_duplicates()
kambi_raw = pd.read_csv('Data/scrapers/unibet/unibet_allSports_odds_25122024.csv', index_col=0).drop_duplicates()
# Apply the condition and update outcome_label
toto_raw_basketbal = toto_raw[toto_raw['sport'] == 'Basketbal']
kambi_raw_basketbal = kambi_raw[kambi_raw['sport'] == 'BASKETBALL']

Below is a translation of the abbreviations for the column outcome types into their likely meanings in the context of betting odds:

- `--`: Undefined or No Outcome (e.g., no odds available).
- `AG`: Anytime Goal Scorer (player to score at any time in the match).
- `CS`: Correct Score (predict the exact final score of the event).
- `DC`: Double Chance (cover two outcomes, e.g., Home Win or Draw).
- `DN`: Draw No Bet (stake refunded if the match ends in a draw).
- `FS`: First Scorer (player to score the first goal/point in the event).
- `H1`: First Half Result (outcome at the end of the first half).
- `H2`: Second Half Result (outcome in the second half only).
- `HF`: Half-Time/Full-Time (predict the result at both half-time and full-time).
- `HH`: Head-to-Head (comparison between two participants, e.g., players or teams).
- `HL`: Handicap Line (spread betting; adjust the line to level the playing field).
- `LS`: Last Scorer (player to score the last goal/point in the event).
- `MH`: Match Handicap (spread betting for the entire match).
- `MR`: Match Result (predict the overall winner or draw for the match).
- `OE`: Odd/Even (predict whether the total points/goals scored will be odd or even).
- `WH`: Winning Half (which half will have the higher score).
- `WM`: Winning Margin (predict the margin by which a team/player will win).

In [74]:
# filter down to suitable betting opps
kambi_filtered_basketbal = kambi_raw_basketbal[kambi_raw_basketbal['bet_offer_type_english_name'].isin(['Match', 'Odd/Even', 'Player Occurrence Line', 'Asian Over/Under', 'Over/Under', 'Handicap', 'Asian Handicap', 'Yes/No', 'Head to Head'])]
toto_filtered_basketbal = toto_raw_basketbal[toto_raw_basketbal['Outcome Type'].isin(['--', 'DN', 'OE', 'HH', 'HL', 'AG'])]

kambi_filtered_basketbal = kambi_raw_basketbal
toto_filtered_basketbal = toto_raw_basketbal[toto_raw_basketbal['Outcome Type'].isin(['DN', 'OE', 'HH', 'HL', 'AG'])]

# Remove duplicates in toto_filtered_basketbal based on specified columns
toto_filtered_basketbal = toto_filtered_basketbal.drop_duplicates(
    subset=['Event Name', 'Market Name', 'Outcome Name']
)
# Remove duplicates in kambi_filtered_basketbal based on specified columns
kambi_filtered_basketbal = kambi_filtered_basketbal.drop_duplicates(
    subset=['event_name', 'outcome_label', 'criterion_label']
)

## Winnaar

In [75]:
from rapidfuzz import process, fuzz
import pandas as pd

# Filter kambi_filtered_basketbal based on criterion_label
filtered_kambi_winnaar = kambi_filtered_basketbal[
    kambi_filtered_basketbal['criterion_label'] == 'Resultaat aan het Eind van 4e Kwart'
]

# Filter kambi_filtered_basketbal based on criterion_label
filtered_toto_winnaar = toto_filtered_basketbal[
    toto_filtered_basketbal['Market Name'] == 'Winnaar'
]

# Preprocess strings: Remove extra whitespace and lowercase
def preprocess(text):
    return ' '.join(text.lower().split())

In [76]:
# Create a dictionary for faster lookups of event_name in kambi_filtered_basketbal
kambi_events = filtered_kambi_winnaar['event_name'].tolist()

# Function to find the best match for 'Event Name' in toto_filtered_basketbal
def find_best_match(event_name):
    # Use a token-based similarity for better handling of substrings
    result = process.extractOne(event_name, kambi_events, scorer=fuzz.token_set_ratio, score_cutoff=70)
    if result is None:  # No match found
        return None
    match, score, _ = result
    return match

# Apply matching function to toto_filtered_basketbal
toto_filtered_basketbal['matched_event'] = toto_filtered_basketbal['Event Name'].apply(find_best_match)

In [77]:
# Filter rows in toto_filtered_basketbal where Market Name == 'Winnaar'
toto_filtered_basketbal = toto_filtered_basketbal[toto_filtered_basketbal['Market Name'] == 'Winnaar']

# Perform a left join on the matched_event_name (from toto) and event_name (from kambi)
merged_df = toto_filtered_basketbal.merge(
    filtered_kambi_winnaar,
    left_on='matched_event',
    right_on='event_name',
    how='left'
)

In [78]:
merged_df[merged_df['Event Name'].str.contains('Alba')]

Unnamed: 0,event_id_x,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport_x,competition,matched_event,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id_y,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport_y,group_name
46,6255852,Alba Berlin vs Real Madrid,Winnaar,Real Madrid,1.18,9,50,HH,2,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,2,2,1190.0,,Real Madrid Baloncesto,2,2024-12-24T17:23:59Z,2/11,-530.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,
47,6255852,Alba Berlin vs Real Madrid,Winnaar,Real Madrid,1.18,9,50,HH,2,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,1,1,5250.0,,Alba Berlin,1,2024-12-24T17:23:59Z,17/4,425.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,
48,6255852,Alba Berlin vs Real Madrid,Winnaar,Real Madrid,1.18,9,50,HH,2,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,X,X,18000.0,,,OT_CROSS,2024-12-24T17:23:59Z,17/1,1700.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,
49,6255852,Alba Berlin vs Real Madrid,Winnaar,Alba Berlin,5.25,17,4,HH,1,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,2,2,1190.0,,Real Madrid Baloncesto,2,2024-12-24T17:23:59Z,2/11,-530.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,
50,6255852,Alba Berlin vs Real Madrid,Winnaar,Alba Berlin,5.25,17,4,HH,1,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,1,1,5250.0,,Alba Berlin,1,2024-12-24T17:23:59Z,17/4,425.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,
51,6255852,Alba Berlin vs Real Madrid,Winnaar,Alba Berlin,5.25,17,4,HH,1,Basketbal,Euroleague Mannen,Alba Berlin vs Real Madrid Baloncesto,2481232000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021227000.0,3598105000.0,X,X,18000.0,,,OT_CROSS,2024-12-24T17:23:59Z,17/1,1700.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,


In [79]:
from rapidfuzz import fuzz

# Define a function to check similarity
def is_too_similar(outcome_name, participant):
    # Compute similarity score
    similarity = fuzz.ratio(str(outcome_name), str(participant))
    # Return True if similarity is greater than or equal to 70
    return similarity >= 70

# Filter merged_df to keep only rows where Outcome Name and participant are NOT too similar
filtered_merged_df = merged_df[
    ~merged_df.apply(lambda row: is_too_similar(row['Outcome Name'], row['participant']), axis=1)
]

filtered_merged_df_match_winner = filtered_merged_df[~(filtered_merged_df['matched_event'].isna()) & ~(filtered_merged_df['participant'].isna())]

# Display the filtered DataFrame
filtered_merged_df_match_winner.drop_duplicates(subset=['event_id_x', 'Event Name', 'Market Name', 'Outcome Name',
       'Odds (Decimal)', 'Price Numerator', 'Price Denominator',
       'Outcome Type', 'Outcome SubType', 'sport_x', 'competition',
       'matched_event', 'bet_offer_id', 'criterion_id', 'criterion_label',
       'criterion_english_label', 'occurrence_type', 'lifetime',
       'bet_offer_type_id', 'bet_offer_type_name',
       'bet_offer_type_english_name', 'event_id_y', 'outcome_id',
       'outcome_label', 'outcome_english_label', 'odds', 'line', 'participant',
       'type', 'changed_date', 'odds_fractional', 'odds_american', 'status',
       'cash_out_status', 'home_score', 'away_score', 'event_name', 'sport_y'])

Unnamed: 0,event_id_x,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport_x,competition,matched_event,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id_y,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport_y,group_name
9,5095868,New York Knicks vs San Antonio Spurs,Winnaar,San Antonio Spurs,4.15,63,20,HH,2,Basketbal,NBA,New York Knicks vs San Antonio Spurs,2481419000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598716000.0,1,1,1250.0,,New York Knicks,1,2024-12-25T11:02:34Z,1/4,-400.0,OPEN,ENABLED,,,New York Knicks vs San Antonio Spurs,BASKETBALL,
11,5095868,New York Knicks vs San Antonio Spurs,Winnaar,New York Knicks,1.24,10,41,HH,1,Basketbal,NBA,New York Knicks vs San Antonio Spurs,2481419000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598716000.0,2,2,4250.0,,San Antonio Spurs,2,2024-12-25T11:02:34Z,13/4,325.0,OPEN,ENABLED,,,New York Knicks vs San Antonio Spurs,BASKETBALL,
15,5095905,Dallas Mavericks vs Minnesota Timberwolves,Winnaar,Minnesota Timberwolves,2.75,7,4,HH,2,Basketbal,NBA,Dallas Mavericks vs Minnesota Timberwolves,2481416000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598712000.0,1,1,1520.0,,Dallas Mavericks,1,2024-12-25T10:49:08Z,13/25,-195.0,OPEN,ENABLED,,,Dallas Mavericks vs Minnesota Timberwolves,BASKETBALL,
17,5095905,Dallas Mavericks vs Minnesota Timberwolves,Winnaar,Dallas Mavericks,1.47,10,21,HH,1,Basketbal,NBA,Dallas Mavericks vs Minnesota Timberwolves,2481416000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598712000.0,2,2,2700.0,,Minnesota Timberwolves,2,2024-12-25T10:49:08Z,17/10,170.0,OPEN,ENABLED,,,Dallas Mavericks vs Minnesota Timberwolves,BASKETBALL,
21,5095904,Boston Celtics vs Philadelphia 76ers,Winnaar,Philadelphia 76ers,3.95,59,20,HH,2,Basketbal,NBA,Boston Celtics vs Philadelphia 76ers,2481281000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598255000.0,1,1,1300.0,,Boston Celtics,1,2024-12-25T10:54:13Z,3/10,-335.0,OPEN,ENABLED,,,Boston Celtics vs Philadelphia 76ers,BASKETBALL,
23,5095904,Boston Celtics vs Philadelphia 76ers,Winnaar,Boston Celtics,1.27,10,37,HH,1,Basketbal,NBA,Boston Celtics vs Philadelphia 76ers,2481281000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598255000.0,2,2,3700.0,,Philadelphia 76ers,2,2024-12-25T10:54:13Z,27/10,270.0,OPEN,ENABLED,,,Boston Celtics vs Philadelphia 76ers,BASKETBALL,
27,5095863,Golden State Warriors vs Los Angeles Lakers,Winnaar,Los Angeles Lakers,2.45,29,20,HH,2,Basketbal,NBA,Golden State Warriors vs Los Angeles Lakers,2481424000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598718000.0,1,1,1630.0,,Golden State Warriors,1,2024-12-25T11:03:05Z,8/13,-159.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
29,5095863,Golden State Warriors vs Los Angeles Lakers,Winnaar,Golden State Warriors,1.58,10,17,HH,1,Basketbal,NBA,Golden State Warriors vs Los Angeles Lakers,2481424000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598718000.0,2,2,2430.0,,Los Angeles Lakers,2,2024-12-25T11:03:05Z,7/5,143.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
32,5095864,Phoenix Suns vs Denver Nuggets,Winnaar,Phoenix Suns,2.15,23,20,HH,1,Basketbal,NBA,Phoenix Suns vs Denver Nuggets,2481427000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598724000.0,2,2,1710.0,,Denver Nuggets,2,2024-12-25T11:01:39Z,7/10,-141.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,
36,5095864,Phoenix Suns vs Denver Nuggets,Winnaar,Denver Nuggets,1.74,20,27,HH,2,Basketbal,NBA,Phoenix Suns vs Denver Nuggets,2481427000.0,1001212000.0,Resultaat aan het Eind van 4e Kwart,Result at end of Quarter 4,POINTS,FULL_TIME,2.0,Wedstrijd,Match,1021416000.0,3598724000.0,1,1,2250.0,,Phoenix Suns,1,2024-12-25T11:01:39Z,5/4,125.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,


## Over/Under

In [80]:
# filter down to suitable betting opps
kambi_filtered_basketbal = kambi_raw_basketbal[kambi_raw_basketbal['bet_offer_type_english_name'].isin(['Match', 'Odd/Even', 'Player Occurrence Line', 'Asian Over/Under', 'Over/Under', 'Handicap', 'Asian Handicap', 'Yes/No', 'Head to Head'])]
toto_filtered_basketbal = toto_raw_basketbal[toto_raw_basketbal['Outcome Type'].isin(['--', 'DN', 'OE', 'HH', 'HL', 'AG'])]

kambi_filtered_basketbal = kambi_raw_basketbal
toto_filtered_basketbal = toto_raw_basketbal[toto_raw_basketbal['Outcome Type'].isin(['DN', 'OE', 'HH', 'HL', 'AG'])]

# Remove duplicates in toto_filtered_basketbal based on specified columns
toto_filtered_basketbal = toto_filtered_basketbal.drop_duplicates(
    subset=['Event Name', 'Market Name', 'Outcome Name']
)
# Remove duplicates in kambi_filtered_basketbal based on specified columns
kambi_filtered_basketbal = kambi_filtered_basketbal.drop_duplicates(
    subset=['event_name', 'outcome_label', 'criterion_label']
)

In [89]:
kambi_filtered_basketbal_overunder = kambi_filtered_basketbal[kambi_filtered_basketbal['bet_offer_type_name'].str.contains('Over')]
toto_filtered_basketbal_overunder = toto_filtered_basketbal[(toto_filtered_basketbal['Outcome Name'].str.contains('Over')) | (toto_filtered_basketbal['Outcome Name'].str.contains('Under'))]

kambi_filtered_basketbal_overunder['Participant_1'] = kambi_filtered_basketbal_overunder['event_name'].str.split(' vs ').str[0]
kambi_filtered_basketbal_overunder['Participant_2'] = kambi_filtered_basketbal_overunder['event_name'].str.split(' vs ').str[1]
# Fill the 'overunderparty' column based on conditions
kambi_filtered_basketbal_overunder['OverUnderType'] = kambi_filtered_basketbal_overunder.apply(
    lambda row: 1 if row['Participant_1'] in row['criterion_label'] 
                else 2 if row['Participant_2'] in row['criterion_label'] 
                else 3 if 'Totaal Aantal' in row['criterion_label']
                else None, 
    axis=1
)
kambi_filtered_basketbal_overunder['TimeScale'] = kambi_filtered_basketbal_overunder.apply(
    lambda row: '1e Kwart' if '1e Kwart'in row['criterion_label'] 
                else '2e Kwart' if '2e Kwart' in row['criterion_label'] 
                else '3e Kwart' if '3e Kwart' in row['criterion_label'] 
                else '4e Kwart' if '4e Kwart' in row['criterion_label'] 
                else '1e Helft' if '1e Helft' in row['criterion_label'] 
                else '2e Helft' if '2e Helft' in row['criterion_label']                
                else 'Inclusief Extra Tijd' if 'Full Time' in row['criterion_label'] 
                else None, 
    axis=1
)

toto_filtered_basketbal_overunder['Participant_1'] = toto_filtered_basketbal_overunder['Event Name'].str.split(' vs ').str[0]
toto_filtered_basketbal_overunder['Participant_2'] = toto_filtered_basketbal_overunder['Event Name'].str.split(' vs ').str[1]
toto_filtered_basketbal_overunder['Line'] = toto_filtered_basketbal_overunder['Market Name'].str.split('Under ').str[1]

# Fill the 'overunderparty' column based on conditions
toto_filtered_basketbal_overunder['OverUnderType'] = toto_filtered_basketbal_overunder.apply(
    lambda row: 1 if row['Participant_1'] in row['Market Name'] 
                else 2 if row['Participant_2'] in row['Market Name'] 
                else 3 if 'Aantal Punten' in row['Market Name'] 
                else None, 
    axis=1
)
toto_filtered_basketbal_overunder['TimeScale'] = toto_filtered_basketbal_overunder.apply(
    lambda row: '1e Helft' if '1e Helft' in row['Market Name']          
                else 'Inclusief Extra Tijd' if 'Full Time' in row['Market Name']
                else None, 
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal_overunder['Participant_1'] = kambi_filtered_basketbal_overunder['event_name'].str.split(' vs ').str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal_overunder['Participant_2'] = kambi_filtered_basketbal_overunder['event_name'].str.split(' vs ').str[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gu

In [91]:
from rapidfuzz import process, fuzz
import pandas as pd

# Helper function to find the best fuzzy match
def fuzzy_match(value, choices, threshold=70):
    match = process.extractOne(
        value, choices, scorer=fuzz.ratio
    )  # Get the best match and its score
    if match and match[1] >= threshold:  # Check if score meets the threshold
        return match[0]  # Return the matched value
    return None  # No match found

# Create fuzzy-matched columns for Participant_1 and Participant_2
kambi_filtered_basketbal_overunder['Fuzzy_Participant_1'] = kambi_filtered_basketbal_overunder['Participant_1'].apply(
    lambda x: fuzzy_match(x, toto_filtered_basketbal_overunder['Participant_1'].unique())
)

kambi_filtered_basketbal_overunder['Fuzzy_Participant_2'] = kambi_filtered_basketbal_overunder['Participant_2'].apply(
    lambda x: fuzzy_match(x, toto_filtered_basketbal_overunder['Participant_2'].unique())
)

# Merge the DataFrames using the fuzzy-matched columns
merged_basketbal_overunder = pd.merge(
    kambi_filtered_basketbal_overunder,
    toto_filtered_basketbal_overunder,
    left_on=['Fuzzy_Participant_1', 'Fuzzy_Participant_2', 'OverUnderType', 'TimeScale'],
    right_on=['Participant_1', 'Participant_2', 'OverUnderType', 'TimeScale'],
    suffixes=('_kambi', '_toto'),
    how='inner'  # Perform an inner join
)

# Display the merged DataFrame
# print(merged_basketbal_overunder)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal_overunder['Fuzzy_Participant_1'] = kambi_filtered_basketbal_overunder['Participant_1'].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal_overunder['Fuzzy_Participant_2'] = kambi_filtered_basketbal_overunder['Participant_2'].apply(


In [92]:
merged_basketbal_overunder

Unnamed: 0,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id_kambi,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport_kambi,group_name,Participant_1_kambi,Participant_2_kambi,OverUnderType,TimeScale,Fuzzy_Participant_1,Fuzzy_Participant_2,event_id_toto,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport_toto,competition,Participant_1_toto,Participant_2_toto,Line
0,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3,,AS Monaco,Anadolu Efes Istanbul,6255851,AS Monaco vs Anadolu Efes Istanbul,Aantal Punten - Over/Under 171.5,Over,1.91,91,100,HL,1,Basketbal,Euroleague Mannen,AS Monaco,Anadolu Efes Istanbul,171.5
1,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3,,AS Monaco,Anadolu Efes Istanbul,6255851,AS Monaco vs Anadolu Efes Istanbul,Aantal Punten - Over/Under 171.5,Under,1.87,87,100,HL,L,Basketbal,Euroleague Mannen,AS Monaco,Anadolu Efes Istanbul,171.5
2,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3,,AS Monaco,Anadolu Efes Istanbul,6255851,AS Monaco vs Anadolu Efes Istanbul,Aantal Punten - Over/Under 172.0,Over,1.95,19,20,HL,1,Basketbal,Euroleague Mannen,AS Monaco,Anadolu Efes Istanbul,172.0
3,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3,,AS Monaco,Anadolu Efes Istanbul,6255851,AS Monaco vs Anadolu Efes Istanbul,Aantal Punten - Over/Under 172.0,Under,1.83,83,100,HL,L,Basketbal,Euroleague Mannen,AS Monaco,Anadolu Efes Istanbul,172.0
4,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3,,AS Monaco,Anadolu Efes Istanbul,6255851,AS Monaco vs Anadolu Efes Istanbul,Aantal Punten - Over/Under 171.0,Under,1.93,93,100,HL,L,Basketbal,Euroleague Mannen,AS Monaco,Anadolu Efes Istanbul,171.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,2481529701,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415721,3599166505,Meer dan,Over,1670.0,116500.0,,OT_OVER,2024-12-25T11:01:40Z,4/6,-150.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,,Phoenix Suns,Denver Nuggets,3,1e Helft,Phoenix Suns,Denver Nuggets,5095864,Phoenix Suns vs Denver Nuggets,1e Helft - Aantal Punten - Over/Under 119.5,Under,1.80,4,5,HL,L,Basketbal,NBA,Phoenix Suns,Denver Nuggets,119.5
276,2481529701,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415721,3599166508,Minder dan,Under,2100.0,116500.0,,OT_UNDER,2024-12-25T11:01:40Z,11/10,110.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,,Phoenix Suns,Denver Nuggets,3,1e Helft,Phoenix Suns,Denver Nuggets,5095864,Phoenix Suns vs Denver Nuggets,1e Helft - Aantal Punten - Over/Under 118.5,Over,1.83,83,100,HL,1,Basketbal,NBA,Phoenix Suns,Denver Nuggets,118.5
277,2481529701,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415721,3599166508,Minder dan,Under,2100.0,116500.0,,OT_UNDER,2024-12-25T11:01:40Z,11/10,110.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,,Phoenix Suns,Denver Nuggets,3,1e Helft,Phoenix Suns,Denver Nuggets,5095864,Phoenix Suns vs Denver Nuggets,1e Helft - Aantal Punten - Over/Under 118.5,Under,1.93,93,100,HL,L,Basketbal,NBA,Phoenix Suns,Denver Nuggets,118.5
278,2481529701,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415721,3599166508,Minder dan,Under,2100.0,116500.0,,OT_UNDER,2024-12-25T11:01:40Z,11/10,110.0,OPEN,ENABLED,,,Phoenix Suns vs Denver Nuggets,BASKETBALL,,Phoenix Suns,Denver Nuggets,3,1e Helft,Phoenix Suns,Denver Nuggets,5095864,Phoenix Suns vs Denver Nuggets,1e Helft - Aantal Punten - Over/Under 119.5,Over,1.95,19,20,HL,1,Basketbal,NBA,Phoenix Suns,Denver Nuggets,119.5


In [88]:
[i for i in list(set(toto_filtered_basketbal_overunder['Market Name']))]

['Okapi Aalstar Aantal Punten - Over/Under 80.5',
 'Okapi Aalstar Aantal Punten - Over/Under 80.0',
 'Olympiacos Piraeus Aantal Punten - Over/Under 85.0',
 'Real Madrid Aantal Punten - Over/Under 91.5',
 'Aantal Punten - Over/Under 201.0',
 'Aantal Punten - Over/Under 171.0',
 'Fortaleza Basquete Cearense Aantal Punten - Over/Under 73.5',
 'Kangoeroes Basket Mechelen Aantal Punten - Over/Under 79.0',
 'Bendigo Spirit Aantal Punten - Over/Under 79.0',
 'AS Monaco Aantal Punten - Over/Under 88.5',
 'Limburg United Aantal Punten - Over/Under 78.5',
 'Turk Telekom Aantal Punten - Over/Under 81.5',
 'Melbourne United Aantal Punten - Over/Under 99.5',
 'Aantal Punten - Over/Under 222.5',
 'Guandong Southern Tigers Aantal Punten - Over/Under 111.0',
 'Aantal Punten - Over/Under 153.5',
 'Mogi Das Cruzes Aantal Punten - Over/Under 77.0',
 'Adelaide Lightning Aantal Punten - Over/Under 73.5',
 'Bendigo Spirit Aantal Punten - Over/Under 77.5',
 'Real Madrid Aantal Punten - Over/Under 90.0',
 'De

In [66]:
toto_filtered_basketbal_overunder.to_excel('totodev_overunder.xlsx')
kambi_filtered_basketbal_overunder.to_excel('kambidev_overunder.xlsx')

In [67]:
kambi_filtered_basketbal_overunder[kambi_filtered_basketbal_overunder['OverUnderType']==3]

Unnamed: 0,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport,group_name,Participant_1,Participant_2,OverUnderType
37247,2481231962,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227418,3598105298,Meer dan,Over,1870.0,169000.0,,OT_OVER,2024-12-24T17:23:59Z,17/20,-115.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,,Alba Berlin,Real Madrid Baloncesto,3
37249,2481231962,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227418,3598105304,Minder dan,Under,1950.0,169000.0,,OT_UNDER,2024-12-24T17:23:59Z,19/20,-106.0,OPEN,ENABLED,,,Alba Berlin vs Real Madrid Baloncesto,BASKETBALL,,Alba Berlin,Real Madrid Baloncesto,3
37281,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606900,Meer dan,Over,1960.0,172000.0,,OT_OVER,2024-12-24T17:18:42Z,19/20,-105.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3
37283,2481382764,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227408,3598606901,Minder dan,Under,1870.0,172000.0,,OT_UNDER,2024-12-24T17:18:42Z,17/20,-115.0,OPEN,ENABLED,,,AS Monaco vs Anadolu Efes,BASKETBALL,,AS Monaco,Anadolu Efes,3
37303,2481250457,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021227415,3598108409,Meer dan,Over,1910.0,164500.0,,OT_OVER,2024-12-23T09:57:10Z,10/11,-110.0,OPEN,ENABLED,,,Maccabi Tel Aviv vs Žalgiris Kaunas,BASKETBALL,,Maccabi Tel Aviv,Žalgiris Kaunas,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67402,2481524574,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1022413366,3599089445,Minder dan,Under,1950.0,132500.0,,OT_UNDER,2024-12-25T07:14:27Z,19/20,-106.0,OPEN,ENABLED,,,Hawaii Rainbow Warriors vs Oakland Golden Griz...,BASKETBALL,,Hawaii Rainbow Warriors,Oakland Golden Grizzlies,3
67426,2481467182,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1022412723,3598908427,Meer dan,Over,1850.0,136500.0,,OT_OVER,2024-12-24T22:26:57Z,17/20,-118.0,OPEN,ENABLED,,,Nebraska Cornhuskers vs Oregon State Beavers,BASKETBALL,,Nebraska Cornhuskers,Oregon State Beavers,3
67428,2481467182,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1022412723,3598908428,Minder dan,Under,1950.0,136500.0,,OT_UNDER,2024-12-24T22:26:57Z,19/20,-106.0,OPEN,ENABLED,,,Nebraska Cornhuskers vs Oregon State Beavers,BASKETBALL,,Nebraska Cornhuskers,Oregon State Beavers,3
72590,2481232038,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021173057,3598105715,Meer dan,Over,1940.0,171000.0,,OT_OVER,2024-12-23T10:05:54Z,23/25,-107.0,OPEN,ENABLED,,,Ratiopharm Ulm vs Besiktas,BASKETBALL,EuroCup,Ratiopharm Ulm,Besiktas,3


In [68]:
toto_filtered_basketbal_overunder[toto_filtered_basketbal_overunder['OverUnderType']==3]

Unnamed: 0,event_id,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport,competition,Participant_1,Participant_2,Line,OverUnderType
5,5983675,Guandong Southern Tigers vs Zhejiang Golden Bulls,Aantal Punten - Over/Under 217.5,Under,1.71,71,100,HL,L,Basketbal,China CBA,Guandong Southern Tigers,Zhejiang Golden Bulls,217.5,3
6,5983675,Guandong Southern Tigers vs Zhejiang Golden Bulls,Aantal Punten - Over/Under 217.5,Over,2.00,1,1,HL,1,Basketbal,China CBA,Guandong Southern Tigers,Zhejiang Golden Bulls,217.5,3
13,5983675,Guandong Southern Tigers vs Zhejiang Golden Bulls,Aantal Punten - Over/Under 219.5,Under,1.59,59,100,HL,L,Basketbal,China CBA,Guandong Southern Tigers,Zhejiang Golden Bulls,219.5,3
14,5983675,Guandong Southern Tigers vs Zhejiang Golden Bulls,Aantal Punten - Over/Under 219.5,Over,2.20,6,5,HL,1,Basketbal,China CBA,Guandong Southern Tigers,Zhejiang Golden Bulls,219.5,3
35,5983675,Guandong Southern Tigers vs Zhejiang Golden Bulls,Aantal Punten - Over/Under 215.5,Over,1.87,87,100,HL,1,Basketbal,China CBA,Guandong Southern Tigers,Zhejiang Golden Bulls,215.5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24346,6296133,Et Land Elephants vs Lg Sakers,Aantal Punten - Over/Under 153.5,Over,1.83,83,100,HL,1,Basketbal,Zuid-Korea KBL,Et Land Elephants,Lg Sakers,153.5,3
24391,6296133,Et Land Elephants vs Lg Sakers,Aantal Punten - Over/Under 153.0,Under,1.91,91,100,HL,L,Basketbal,Zuid-Korea KBL,Et Land Elephants,Lg Sakers,153.0,3
24392,6296133,Et Land Elephants vs Lg Sakers,Aantal Punten - Over/Under 153.0,Over,1.80,4,5,HL,1,Basketbal,Zuid-Korea KBL,Et Land Elephants,Lg Sakers,153.0,3
24395,6296133,Et Land Elephants vs Lg Sakers,Aantal Punten - Over/Under 154.0,Under,1.80,4,5,HL,L,Basketbal,Zuid-Korea KBL,Et Land Elephants,Lg Sakers,154.0,3


In [181]:
toto_filtered_basketbal_overunder[toto_filtered_basketbal_overunder['Event Name'].str.contains('Golden State')]

Unnamed: 0,event_id,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport,competition,Line
532,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 225.5,Over,1.95,19,20,HL,1,Basketbal,NBA,225.5
533,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 225.5,Under,1.73,73,100,HL,L,Basketbal,NBA,225.5
534,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 223.5,Over,1.87,87,100,HL,1,Basketbal,NBA,223.5
535,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 223.5,Under,1.95,19,20,HL,L,Basketbal,NBA,223.5
536,5095863,Golden State Warriors vs Los Angeles Lakers,Los Angeles Lakers Aantal Punten - Over/Under ...,Under,1.87,87,100,HL,L,Basketbal,NBA,110.5
537,5095863,Golden State Warriors vs Los Angeles Lakers,Los Angeles Lakers Aantal Punten - Over/Under ...,Over,1.87,87,100,HL,1,Basketbal,NBA,110.5
538,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 224.5,Over,1.95,19,20,HL,1,Basketbal,NBA,224.5
539,5095863,Golden State Warriors vs Los Angeles Lakers,Aantal Punten - Over/Under 224.5,Under,1.87,87,100,HL,L,Basketbal,NBA,224.5
540,5095863,Golden State Warriors vs Los Angeles Lakers,Golden State Warriors Aantal Punten - Over/Und...,Over,1.83,83,100,HL,1,Basketbal,NBA,113.5
541,5095863,Golden State Warriors vs Los Angeles Lakers,Golden State Warriors Aantal Punten - Over/Und...,Under,1.91,91,100,HL,L,Basketbal,NBA,113.5


In [177]:
kambi_filtered_basketbal_overunder[kambi_filtered_basketbal_overunder['event_name'].str.contains('Golden State')]

Unnamed: 0,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport,group_name
49552,2481277702,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598246183,Meer dan,Over,2650.0,230000.0,,OT_OVER,2024-12-25T11:03:05Z,33/20,165.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
49554,2481277702,1001159509,Totaal Aantal Punten - Inclusief Extra Tijd,Total Points - Including Overtime,POINTS,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598246185,Minder dan,Under,1450.0,230000.0,,OT_UNDER,2024-12-25T11:03:05Z,4/9,-225.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
49676,2481423956,1001159837,Totaal Aantal Punten door Los Angeles Lakers -...,Total Points by Los Angeles Lakers - Including...,,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598718068,Meer dan,Over,2070.0,111500.0,,OT_OVER,2024-12-25T11:03:05Z,21/20,107.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
49678,2481423956,1001159837,Totaal Aantal Punten door Los Angeles Lakers -...,Total Points by Los Angeles Lakers - Including...,,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598718071,Minder dan,Under,1720.0,111500.0,,OT_UNDER,2024-12-25T11:03:05Z,7/10,-139.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
49782,2481423983,1001159855,Totaal Aantal Punten door Golden State Warrior...,Total Points by Golden State Warriors - Includ...,,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598718122,Meer dan,Over,2050.0,114500.0,,OT_OVER,2024-12-25T11:03:05Z,21/20,105.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
49784,2481423983,1001159855,Totaal Aantal Punten door Golden State Warrior...,Total Points by Golden State Warriors - Includ...,,FULL_TIME_OVERTIME,6,Over/Onder,Over/Under,1021415723,3598718124,Minder dan,Under,1740.0,114500.0,,OT_UNDER,2024-12-25T11:03:05Z,8/11,-136.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
50536,2481543437,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415723,3599172421,Meer dan,Over,1670.0,112500.0,,OT_OVER,2024-12-25T11:03:05Z,4/6,-150.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
50538,2481543437,1001159947,Totaal Aantal Punten - 1e Helft,Total Points - 1st Half,POINTS,,6,Over/Onder,Over/Under,1021415723,3599172426,Minder dan,Under,2100.0,112500.0,,OT_UNDER,2024-12-25T11:03:05Z,11/10,110.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
50556,2481543442,1001159802,Totaal Aantal Punten - 1e Kwart,Total Points - Quarter 1,POINTS,,6,Over/Onder,Over/Under,1021415723,3599172434,Meer dan,Over,1750.0,56500.0,,OT_OVER,2024-12-25T11:03:05Z,3/4,-134.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,
50558,2481543442,1001159802,Totaal Aantal Punten - 1e Kwart,Total Points - Quarter 1,POINTS,,6,Over/Onder,Over/Under,1021415723,3599172435,Minder dan,Under,2020.0,56500.0,,OT_UNDER,2024-12-25T11:03:05Z,Evens,102.0,OPEN,ENABLED,,,Golden State Warriors vs Los Angeles Lakers,BASKETBALL,


In [156]:
import pandas as pd
from fuzzywuzzy import fuzz, process


# Step 1: Standardize Event Names with Fuzzy Matching
def match_event_names(kambi_events, toto_events):
    matched_events = {}
    for event in kambi_events:
        match, score = process.extractOne(event, toto_events, scorer=fuzz.token_sort_ratio)
        if score > 90:  # Adjust the threshold as necessary
            matched_events[event] = match
        else:
            matched_events[event] = event  # Fallback to original if no good match
    return matched_events

# Map matched events from kambi to toto
matched_events = match_event_names(kambi_filtered_basketbal['event_name'].unique(), toto_filtered_basketbal['Event Name'].unique())
kambi_filtered_basketbal['standard_event_name'] = kambi_filtered_basketbal['event_name'].map(matched_events)

# Reverse the matched events dictionary for use in the toto table
reversed_matched_events = {v: k for k, v in matched_events.items()}
toto_filtered_basketbal['standard_event_name'] = toto_filtered_basketbal['Event Name'].map(reversed_matched_events).fillna(toto_filtered_basketbal['Event Name'])

# Step 2: Ensure Line Consistency
toto_filtered_basketbal['Line'] = kambi_filtered_basketbal['line'] / 10000  # Adjust to decimal
# kambi['Line'] = kambi['Line'].fillna('Placeholder')
toto_filtered_basketbal['Line'] = toto_filtered_basketbal['Market Name'].str.extract(r'(\d+\.?\d*)').astype(float)
# toto['Line'] = toto['Line'].fillna('Placeholder')

# Step 3: Standardize Betting Types
bet_type_mapping = {
    'goals over/under': 'Total Goals',
    'Winnaar Gevecht': 'Wedstrijdnotering'
    # Add more mappings as necessary
}
kambi_filtered_basketbal['standard_bet_type'] = kambi_filtered_basketbal['criterion_label'].replace(bet_type_mapping)
toto_filtered_basketbal['standard_bet_type'] = toto_filtered_basketbal['Market Name'].replace(bet_type_mapping)

# Step 4
kambi_filtered_basketbal['standard_outcome'] = kambi_filtered_basketbal['outcome_label'].apply(
    lambda x: ' '.join(x.split(', ')[::-1]) if ',' in x else x
)

# Assign kambi 'outcome_label' directly to 'standard_outcome' as it's already in the desired format
toto_filtered_basketbal['standard_outcome'] = toto_filtered_basketbal['Outcome Name']

# Step 5: Merge DataFrames on Standardized Columns
merged_data = pd.merge(
    kambi_filtered_basketbal,
    toto_filtered_basketbal,
    left_on=['standard_event_name', 'standard_bet_type', 'standard_outcome', 'line'],
    right_on=['standard_event_name', 'standard_bet_type', 'standard_outcome', 'Line'],
    suffixes=('_kambi', '_toto')
)

# Output merged results with odds side by side
print(merged_data[['event_name', 'Market Name', 'criterion_label', 'outcome_label', 'Line']])

Empty DataFrame
Columns: [event_name, Market Name, criterion_label, outcome_label, Line]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal['standard_event_name'] = kambi_filtered_basketbal['event_name'].map(matched_events)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kambi_filtered_basketbal['standard_bet_type'] = kambi_filtered_basketbal['criterion_label'].replace(bet_type_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a

In [118]:
merged_data = merged_data[['event_id_toto', 'sport_toto','event_id_kambi', 'sport_kambi', 'standard_event_name', 'standard_bet_type', 'standard_outcome', 'Outcome Type', 'bet_offer_type_name','Line', 'odds', 'Odds (Decimal)']].drop_duplicates()
merged_data['odds'] = merged_data['odds'] / 1000

In [119]:
merged_data

Unnamed: 0,event_id_toto,sport_toto,event_id_kambi,sport_kambi,standard_event_name,standard_bet_type,standard_outcome,Outcome Type,bet_offer_type_name,Line,odds,Odds (Decimal)


## Analyze results match

In [35]:
# Calculate the absolute difference between 'odds' and 'Odds (Decimal)'
merged_data['difference'] = abs(merged_data['odds'] - merged_data['Odds (Decimal)'])

# Only keep records where there are 2 outcomes
merged_data = merged_data.groupby(
    ['event_id_toto', 'sport_toto', 'event_id_kambi', 'sport_kambi', 'standard_event_name', 'standard_bet_type']
).filter(lambda x: len(x) == 2)

# Sort by the difference in descending order
top_differences = merged_data.sort_values(by='difference', ascending=False)

# Get the top records with the biggest differences
top_records = top_differences.head(20)

top_records

Unnamed: 0,event_id_toto,sport_toto,event_id_kambi,sport_kambi,standard_event_name,standard_bet_type,standard_outcome,Line,odds,Odds (Decimal),difference


In [36]:
# Split 'standard_event_name' into two parts
split_names = merged_data['standard_event_name'].str.split(' vs ', expand=True)

# Recode 'standard_outcome'
merged_data['standard_outcome'] = merged_data.apply(
    lambda row: '1' if row['standard_outcome'] == split_names.loc[row.name, 0] 
                else ('2' if row['standard_outcome'] == split_names.loc[row.name, 1] else row['standard_outcome']),
    axis=1
)

merged_data['standard_outcome'] = merged_data['standard_outcome'].astype(str)

ValueError: Columns must be same length as key

In [None]:
merged_data[merged_data['standard_bet_type'] == 'Jannik Sinner Wint een Set']

Unnamed: 0,event_id_toto,sport_toto,event_id_kambi,sport_kambi,standard_event_name,standard_bet_type,standard_outcome,Line,odds,Odds (Decimal),difference
2,5944135,Tennis,1022071872,TENNIS,Jannik Sinner vs Taylor Fritz,Jannik Sinner Wint een Set,Ja,,1.04,1.04,0.0
3,5944135,Tennis,1022071872,TENNIS,Jannik Sinner vs Taylor Fritz,Jannik Sinner Wint een Set,Nee,,12.5,8.25,4.25


In [None]:
## Get odds for single event on 1 record
# Filter groups with exactly two rows
filtered_df = merged_data.groupby(
    ['event_id_toto', 'sport_toto', 'event_id_kambi', 'sport_kambi', 'standard_event_name', 'standard_bet_type']
).filter(lambda x: len(x) == 2)

# Proceed with pivoting only if there are any valid groups left
if not filtered_df.empty:
    # Pivot the filtered DataFrame
    reshaped_df = filtered_df.pivot(
        index=['event_id_toto', 'sport_toto', 'event_id_kambi', 'sport_kambi', 'standard_event_name', 'standard_bet_type'],
        columns='standard_outcome',
        values=['Line', 'odds', 'Odds (Decimal)', 'difference']
    )

    # Flatten the multi-index columns for readability
    reshaped_df.columns = ['_'.join(col).strip() for col in reshaped_df.columns.values]

    # Reset index to turn multi-index into columns
    reshaped_df.reset_index(inplace=True)
else:
    reshaped_df = pd.DataFrame()  # Create an empty DataFrame if no groups qualify

reshaped_df

Unnamed: 0,event_id_toto,sport_toto,event_id_kambi,sport_kambi,standard_event_name,standard_bet_type,Line_1,Line_2,Line_Ja,Line_Nee,odds_1,odds_2,odds_Ja,odds_Nee,Odds (Decimal)_1,Odds (Decimal)_2,Odds (Decimal)_Ja,Odds (Decimal)_Nee,difference_1,difference_2,difference_Ja,difference_Nee
0,5468089,Mixed Martial Arts,1021633417,MARTIAL_ARTS,Jon Jones vs Stipe Miocic,Wedstrijdnotering,,,,,1.15,5.75,,,1.14,5.25,,,0.01,0.5,,
1,5699912,Voetbal,1020719873,FOOTBALL,San Marino vs Gibraltar,Beide Teams Scoren,,,,,,,2.35,1.53,,,2.33,1.54,,,0.02,0.01
2,5825551,Mixed Martial Arts,1021633419,MARTIAL_ARTS,Bo Nickal vs Paul Craig,Wedstrijdnotering,,,,,1.11,7.0,,,1.08,6.75,,,0.03,0.25,,
3,5848540,Voetbal,1021874423,FOOTBALL,Ecuador vs Bolivia,Beide Teams Scoren,,,,,,,2.08,1.65,,,2.32,1.54,,,0.24,0.11
4,5850619,Voetbal,1021874421,FOOTBALL,Uruguay vs Colombia,Beide Teams Scoren,,,,,,,2.17,1.6,,,2.12,1.64,,,0.05,0.04
5,5850658,Voetbal,1021874447,FOOTBALL,Bolivia vs Paraguay,Beide Teams Scoren,,,,,,,2.14,1.65,,,2.1,1.65,,,0.04,0.0
6,5850661,Voetbal,1021874448,FOOTBALL,Colombia vs Ecuador,Beide Teams Scoren,,,,,,,2.63,1.44,,,2.47,1.48,,,0.16,0.04
7,5865420,Mixed Martial Arts,1022019255,MARTIAL_ARTS,Chris Weidman vs Eryk Anders,Wedstrijdnotering,,,,,2.18,1.68,,,2.1,1.67,,,0.08,0.01,,
8,5880676,Mixed Martial Arts,1022019259,MARTIAL_ARTS,Jonathan Martinez vs Marcus McGhee,Wedstrijdnotering,,,,,2.1,1.74,,,2.1,1.67,,,0.0,0.07,,
9,5885309,Mixed Martial Arts,1022019261,MARTIAL_ARTS,Jim Miller vs Damon Jackson,Wedstrijdnotering,,,,,2.23,1.65,,,2.25,1.59,,,0.02,0.06,,


In [None]:
reshaped_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,Line,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,odds,Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),Odds (Decimal),difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference,difference
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,standard_outcome,Adam Mould,Bo Nickal,Cameron Menzies,Charles Oliveira,Chris Weidman,Damon Jackson,Danny Noppert,Eryk Anders,Ja,James Llontop,James Wade,Jermaine Wattimena,Jim Miller,Johann Brouwer,Jon Jones,Jonathan Martinez,Josh Rock,Lourence Ilagan,Luke Littler,Marcus McGhee,Martin Lukeman,Mauricio Ruffy,Mensur Suljovic,Mickey Gall,Nee,Paul Craig,Ramiz Brahimaj,Reece Robinson,Richie Burnett,Ritchie Edhouse,Rob Cross,Ross Smith,Stephen Bunting,Stipe Miocic,Tommy Morris,Adam Mould,Bo Nickal,Cameron Menzies,Charles Oliveira,Chris Weidman,Damon Jackson,Danny Noppert,Eryk Anders,Ja,James Llontop,James Wade,Jermaine Wattimena,Jim Miller,Johann Brouwer,Jon Jones,Jonathan Martinez,Josh Rock,Lourence Ilagan,Luke Littler,Marcus McGhee,Martin Lukeman,Mauricio Ruffy,Mensur Suljovic,Mickey Gall,Nee,Paul Craig,Ramiz Brahimaj,Reece Robinson,Richie Burnett,Ritchie Edhouse,Rob Cross,Ross Smith,Stephen Bunting,Stipe Miocic,Tommy Morris,Adam Mould,Bo Nickal,Cameron Menzies,Charles Oliveira,Chris Weidman,Damon Jackson,Danny Noppert,Eryk Anders,Ja,James Llontop,James Wade,Jermaine Wattimena,Jim Miller,Johann Brouwer,Jon Jones,Jonathan Martinez,Josh Rock,Lourence Ilagan,Luke Littler,Marcus McGhee,Martin Lukeman,Mauricio Ruffy,Mensur Suljovic,Mickey Gall,Nee,Paul Craig,Ramiz Brahimaj,Reece Robinson,Richie Burnett,Ritchie Edhouse,Rob Cross,Ross Smith,Stephen Bunting,Stipe Miocic,Tommy Morris,Adam Mould,Bo Nickal,Cameron Menzies,Charles Oliveira,Chris Weidman,Damon Jackson,Danny Noppert,Eryk Anders,Ja,James Llontop,James Wade,Jermaine Wattimena,Jim Miller,Johann Brouwer,Jon Jones,Jonathan Martinez,Josh Rock,Lourence Ilagan,Luke Littler,Marcus McGhee,Martin Lukeman,Mauricio Ruffy,Mensur Suljovic,Mickey Gall,Nee,Paul Craig,Ramiz Brahimaj,Reece Robinson,Richie Burnett,Ritchie Edhouse,Rob Cross,Ross Smith,Stephen Bunting,Stipe Miocic,Tommy Morris
event_id_toto,sport_toto,event_id_kambi,sport_kambi,standard_event_name,standard_bet_type,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2
5468088,Mixed Martial Arts,1021633418,MARTIAL_ARTS,Charles Oliveira vs Michael Chandler,Wedstrijdnotering,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.38,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.36,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5468089,Mixed Martial Arts,1021633417,MARTIAL_ARTS,Jon Jones vs Stipe Miocic,Wedstrijdnotering,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.15,,,,,,,,,,,,,,,,,,,5.75,,,,,,,,,,,,,,,,1.14,,,,,,,,,,,,,,,,,,,5.25,,,,,,,,,,,,,,,,0.01,,,,,,,,,,,,,,,,,,,0.5,
5699912,Voetbal,1020719873,FOOTBALL,San Marino vs Gibraltar,Beide Teams Scoren,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.35,,,,,,,,,,,,,,,,1.53,,,,,,,,,,,,,,,,,,,2.33,,,,,,,,,,,,,,,,1.54,,,,,,,,,,,,,,,,,,,0.02,,,,,,,,,,,,,,,,0.01,,,,,,,,,,
5825551,Mixed Martial Arts,1021633419,MARTIAL_ARTS,Bo Nickal vs Paul Craig,Wedstrijdnotering,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.11,,,,,,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,1.08,,,,,,,,,,,,,,,,,,,,,,,,6.75,,,,,,,,,,,0.03,,,,,,,,,,,,,,,,,,,,,,,,0.25,,,,,,,,,
5848540,Voetbal,1021874423,FOOTBALL,Ecuador vs Bolivia,Beide Teams Scoren,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.08,,,,,,,,,,,,,,,,1.65,,,,,,,,,,,,,,,,,,,2.32,,,,,,,,,,,,,,,,1.54,,,,,,,,,,,,,,,,,,,0.24,,,,,,,,,,,,,,,,0.11,,,,,,,,,,
5850619,Voetbal,1021874421,FOOTBALL,Uruguay vs Colombia,Beide Teams Scoren,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.17,,,,,,,,,,,,,,,,1.6,,,,,,,,,,,,,,,,,,,2.12,,,,,,,,,,,,,,,,1.64,,,,,,,,,,,,,,,,,,,0.05,,,,,,,,,,,,,,,,0.04,,,,,,,,,,
5850658,Voetbal,1021874447,FOOTBALL,Bolivia vs Paraguay,Beide Teams Scoren,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.14,,,,,,,,,,,,,,,,1.65,,,,,,,,,,,,,,,,,,,2.1,,,,,,,,,,,,,,,,1.65,,,,,,,,,,,,,,,,,,,0.04,,,,,,,,,,,,,,,,0.0,,,,,,,,,,
5850661,Voetbal,1021874448,FOOTBALL,Colombia vs Ecuador,Beide Teams Scoren,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.63,,,,,,,,,,,,,,,,1.44,,,,,,,,,,,,,,,,,,,2.47,,,,,,,,,,,,,,,,1.48,,,,,,,,,,,,,,,,,,,0.16,,,,,,,,,,,,,,,,0.04,,,,,,,,,,
5865420,Mixed Martial Arts,1022019255,MARTIAL_ARTS,Chris Weidman vs Eryk Anders,Wedstrijdnotering,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.18,,,1.68,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.1,,,1.67,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.08,,,0.01,,,,,,,,,,,,,,,,,,,,,,,,,,,
5880676,Mixed Martial Arts,1022019259,MARTIAL_ARTS,Jonathan Martinez vs Marcus McGhee,Wedstrijdnotering,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.1,,,,1.74,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.1,,,,1.67,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,0.07,,,,,,,,,,,,,,,


In [None]:
matched_events

{'Daniil Medvedev vs De Alex Minaur': 'Daniil Medvedev vs Alex De Minaur',
 'Jannik Sinner vs Taylor Fritz': 'Jannik Sinner vs Taylor Fritz',
 'Carlos Alcaraz vs Andrey Rublev': 'Carlos Alcaraz vs Andrey Rublev',
 'Alexander Zverev vs Casper Ruud': 'Alexander Zverev vs Casper Ruud',
 'Reece Robinson vs Sebastian Białecki': 'Reece Robinson vs Sebastian Bialecki',
 'Richie Burnett vs Adam Mould': 'Richie Burnett vs Adam Mould',
 'Tommy Morris vs Johann Brouwer': 'Tommy Morris vs Johann Brouwer',
 'Adam Mould vs Reece Robinson': 'Adam Mould vs Reece Robinson',
 'Sebastian Białecki vs Tommy Morris': 'Sebastian Bialecki vs Tommy Morris',
 'Johann Brouwer vs Richie Burnett': 'Johann Brouwer vs Richie Burnett',
 'Sebastian Białecki vs Adam Mould': 'Sebastian Bialecki vs Adam Mould',
 'Reece Robinson vs Johann Brouwer': 'Reece Robinson vs Johann Brouwer',
 'Tommy Morris vs Richie Burnett': 'Tommy Morris vs Richie Burnett',
 'Johann Brouwer vs Sebastian Białecki': 'Johann Brouwer vs Sebastian B

In [None]:
# event name is key
# event keys: Wedstrijd = Wedstrijd, outcome = 1 vs 2
#             Over/Under criterion_label + line : 'Totaal Aantal Gewonnen Games door Alcaraz, Carlos' + 125000.0 vs Market Name: 'Carlos Alcaraz Aantal Games - Over/Under 12.5'
set(kambi_filtered[kambi_filtered['sport'] == 'TENNIS'][['criterion_label',
       'criterion_english_label', 'occurrence_type', 'lifetime',
       'bet_offer_type_id', 'bet_offer_type_name',
       'bet_offer_type_english_name', 'event_id', 'outcome_id',
       'outcome_label', 'outcome_english_label', 'odds', 'line', 'participant',
       'type', 'status',
       'cash_out_status', 'home_score', 'away_score', 'event_name']].criterion_label)

{'Alex Minaur Wint een Set',
 'Alexander Zverev Wint een Set',
 'Andrey Rublev Wint een Set',
 'Carlos Alcaraz Wint een Set',
 'Casper Ruud Wint een Set',
 'Correcte Score - Set 1',
 'Daniil Medvedev Wint een Set',
 'Game Handicap',
 'Game Handicap - Set 1',
 'Jannik Sinner Wint een Set',
 'Meeste aces',
 'Set Handicap',
 'Setwedden',
 'Taylor Fritz Wint een Set',
 'Totaal Aantal Games',
 'Totaal Aantal Games - Set 1',
 'Totaal Aantal Games - Set 2',
 'Totaal Aantal Gewonnen Games door Alcaraz, Carlos',
 'Totaal Aantal Gewonnen Games door De Minaur, Alex',
 'Totaal Aantal Gewonnen Games door Fritz, Taylor',
 'Totaal Aantal Gewonnen Games door Medvedev, Daniil',
 'Totaal Aantal Gewonnen Games door Rublev, Andrey',
 'Totaal Aantal Gewonnen Games door Ruud, Casper',
 'Totaal Aantal Gewonnen Games door Sinner, Jannik',
 'Totaal Aantal Gewonnen Games door Zverev, Alexander',
 'Totaal Aantal Sets',
 'Totaal Aantal Tiebreaks',
 'Totaal aantal minuten',
 'Totaal aantal servicebreaks',
 'Wedstr

In [None]:
kambi_filtered[kambi_filtered['criterion_label']!='Wedstrijd'][['criterion_label',
       'criterion_english_label', 'occurrence_type', 'lifetime',
       'bet_offer_type_id', 'bet_offer_type_name',
       'bet_offer_type_english_name', 'event_id', 'outcome_id',
       'outcome_label', 'outcome_english_label', 'odds', 'line', 'participant',
       'type', 'status',
       'cash_out_status', 'home_score', 'away_score', 'event_name']].tail(60)

Unnamed: 0,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,status,cash_out_status,home_score,away_score,event_name
81408,Dubbele Kans - 1e Helft,Double Chance - 1st Half,GOALS,,12,Dubbele kans,Double Chance,1021152946,3565547609,12,12,1640.0,,,OT_ONE_OR_TWO,OPEN,ENABLED,,,Port Vale vs Wrexham
81409,Dubbele Kans - 1e Helft,Double Chance - 1st Half,GOALS,,12,Dubbele kans,Double Chance,1021152946,3565547615,X2,X2,1230.0,,,OT_CROSS_OR_TWO,OPEN,ENABLED,,,Port Vale vs Wrexham
81410,3-Way Handicap,3-Way Handicap,GOALS,FULL_TIME,11,3-Way handicap,3-Way Handicap,1021152946,3565547608,1,1,6750.0,-1000.0,Port Vale,1,OPEN,ENABLED,,,Port Vale vs Wrexham
81411,3-Way Handicap,3-Way Handicap,GOALS,FULL_TIME,11,3-Way handicap,3-Way Handicap,1021152946,3565547614,X,X,4700.0,-1000.0,,OT_CROSS,OPEN,ENABLED,,,Port Vale vs Wrexham
81412,3-Way Handicap,3-Way Handicap,GOALS,FULL_TIME,11,3-Way handicap,3-Way Handicap,1021152946,3565547622,2,2,1290.0,-1000.0,Wrexham,2,OPEN,ENABLED,,,Port Vale vs Wrexham
81413,Beide Teams Scoren in Beide Helften,Both Teams to Score in Both Halves,,,18,Ja/Nee,Yes/No,1021152946,3565547638,Ja,Yes,15000.0,,,OT_YES,OPEN,ENABLED,,,Port Vale vs Wrexham
81414,Beide Teams Scoren in Beide Helften,Both Teams to Score in Both Halves,,,18,Ja/Nee,Yes/No,1021152946,3565547667,Nee,No,1020.0,,,OT_NO,OPEN,ENABLED,,,Port Vale vs Wrexham
81415,Correcte Score,Correct Score,GOALS,FULL_TIME,3,Correcte Score,Correct Score,1021152946,3565547619,0-0,0-0,9500.0,,,OT_UNTYPED,OPEN,ENABLED,0.0,0.0,Port Vale vs Wrexham
81416,Correcte Score,Correct Score,GOALS,FULL_TIME,3,Correcte Score,Correct Score,1021152946,3565547627,0-1,0-1,7500.0,,,OT_UNTYPED,OPEN,ENABLED,0.0,1.0,Port Vale vs Wrexham
81417,Correcte Score,Correct Score,GOALS,FULL_TIME,3,Correcte Score,Correct Score,1021152946,3565547635,0-2,0-2,10500.0,,,OT_UNTYPED,OPEN,ENABLED,0.0,2.0,Port Vale vs Wrexham


In [None]:
toto_filtered[(toto_filtered['sport']=='Tennis') & (toto_filtered['event_id']==5943214)].drop_duplicates().head(50)

Unnamed: 0,event_id,Event Name,Market Name,Outcome Name,Odds (Decimal),Price Numerator,Price Denominator,Outcome Type,Outcome SubType,sport,competition
0,5943214,Daniil Medvedev vs Alex De Minaur,Alex De Minaur Wint een Set,Ja,1.35,7,20,--,,Tennis,"ATP Finals, Enkelspel"
3,5943214,Daniil Medvedev vs Alex De Minaur,Alex De Minaur Wint een Set,Nee,2.95,39,20,--,,Tennis,"ATP Finals, Enkelspel"
6,5943214,Daniil Medvedev vs Alex De Minaur,Wedstrijd,Alex De Minaur,1.93,93,100,HH,2,Tennis,"ATP Finals, Enkelspel"
9,5943214,Daniil Medvedev vs Alex De Minaur,Wedstrijd,Daniil Medvedev,1.92,23,25,HH,1,Tennis,"ATP Finals, Enkelspel"
12,5943214,Daniil Medvedev vs Alex De Minaur,Aantal Games - Odd/Even,Odd,1.87,87,100,OE,1,Tennis,"ATP Finals, Enkelspel"
15,5943214,Daniil Medvedev vs Alex De Minaur,Aantal Games - Odd/Even,Even,1.87,87,100,OE,2,Tennis,"ATP Finals, Enkelspel"
18,5943214,Daniil Medvedev vs Alex De Minaur,Set - Handicap 1.5,Alex De Minaur,2.95,39,20,WH,2,Tennis,"ATP Finals, Enkelspel"
21,5943214,Daniil Medvedev vs Alex De Minaur,Set - Handicap 1.5,Daniil Medvedev,1.35,7,20,WH,1,Tennis,"ATP Finals, Enkelspel"
24,5943214,Daniil Medvedev vs Alex De Minaur,Exact Sets,2 Sets,1.59,59,100,--,,Tennis,"ATP Finals, Enkelspel"
27,5943214,Daniil Medvedev vs Alex De Minaur,Exact Sets,3 Sets,2.27,127,100,--,,Tennis,"ATP Finals, Enkelspel"


In [None]:
import pandas as pd
import re
from fuzzywuzzy import fuzz, process

# Function to normalize names
def normalize_name(name):
    name = re.sub(r'\b(de|het|een)\b', '', name, flags=re.IGNORECASE)  # Remove articles
    name = re.sub(r'\s+', ' ', name).strip()  # Remove extra spaces
    return name

# Function to match names using fuzzy matching
def match_names(kambi_names, toto_names):
    normalized_toto = {name: normalize_name(name) for name in toto_names}
    normalized_kambi = {name: normalize_name(name) for name in kambi_names}
    matched_names = {}
    for kambi_name, normalized_kambi_name in normalized_kambi.items():
        match, score = process.extractOne(normalized_kambi_name, normalized_toto.values(), scorer=fuzz.token_sort_ratio)
        if score > 85:  # Adjust threshold
            matched_names[kambi_name] = [k for k, v in normalized_toto.items() if v == match][0]
        else:
            matched_names[kambi_name] = kambi_name
    return matched_names

# Normalize and map names in Kambi and Toto
kambi_names = kambi_filtered['bet_offer_type_name'].unique()
toto_names = toto_filtered['Market Name'].unique()
matched_names = match_names(kambi_names, toto_names)
kambi_filtered['normalized_bet_offer_type'] = kambi_filtered['bet_offer_type_name'].map(matched_names)
toto_filtered['normalized_market_name'] = toto_filtered['Market Name'].apply(normalize_name)

# Merge DataFrames using normalized columns
merged_data = pd.merge(
    kambi_filtered,
    toto_filtered,
    left_on=['standard_event_name', 'normalized_bet_offer_type', 'standard_outcome', 'Line'],
    right_on=['standard_event_name', 'normalized_market_name', 'standard_outcome', 'Line'],
    suffixes=('_kambi', '_toto')
)

# Output merged results
print(merged_data[['event_name', 'Market Name', 'criterion_label', 'outcome_label', 'Line']].drop_duplicates())

                          event_name Market Name             criterion_label  \
0      Jannik Sinner vs Taylor Fritz   Wedstrijd           Wedstrijdnotering   
1      Jannik Sinner vs Taylor Fritz   Wedstrijd           Wedstrijdnotering   
2    Carlos Alcaraz vs Andrey Rublev   Wedstrijd           Wedstrijdnotering   
3    Carlos Alcaraz vs Andrey Rublev   Wedstrijd           Wedstrijdnotering   
4    Alexander Zverev vs Casper Ruud   Wedstrijd           Wedstrijdnotering   
..                               ...         ...                         ...   
236       Mark Selby vs Shaun Murphy   Wedstrijd                     Frame 1   
238       Mark Selby vs Shaun Murphy   Wedstrijd       Meeste Century Breaks   
240       Mark Selby vs Shaun Murphy   Wedstrijd       Meeste Century Breaks   
242       Mark Selby vs Shaun Murphy   Wedstrijd  Meeste Half-Century Breaks   
244       Mark Selby vs Shaun Murphy   Wedstrijd  Meeste Half-Century Breaks   

         outcome_label  Line  
0       

In [None]:
merged_data[['event_name', 'Market Name', 'criterion_label', 'outcome_label', 'Line', 'odds', 'Odds (Decimal)','sport_toto']].drop_duplicates()

Unnamed: 0,event_name,Market Name,criterion_label,outcome_label,Line,odds,Odds (Decimal),sport_toto
0,Jannik Sinner vs Taylor Fritz,Wedstrijd,Wedstrijdnotering,"Sinner, Jannik",,1150.0,1.15,Tennis
1,Jannik Sinner vs Taylor Fritz,Wedstrijd,Wedstrijdnotering,"Fritz, Taylor",,5800.0,6.00,Tennis
2,Carlos Alcaraz vs Andrey Rublev,Wedstrijd,Wedstrijdnotering,"Alcaraz, Carlos",,1340.0,1.30,Tennis
3,Carlos Alcaraz vs Andrey Rublev,Wedstrijd,Wedstrijdnotering,"Rublev, Andrey",,3350.0,3.65,Tennis
4,Alexander Zverev vs Casper Ruud,Wedstrijd,Wedstrijdnotering,"Zverev, Alexander",,1140.0,1.15,Tennis
...,...,...,...,...,...,...,...,...
236,Mark Selby vs Shaun Murphy,Wedstrijd,Frame 1,"Murphy, Shaun",,1910.0,2.10,Snooker
238,Mark Selby vs Shaun Murphy,Wedstrijd,Meeste Century Breaks,"Selby, Mark",,3000.0,1.73,Snooker
240,Mark Selby vs Shaun Murphy,Wedstrijd,Meeste Century Breaks,"Murphy, Shaun",,3250.0,2.10,Snooker
242,Mark Selby vs Shaun Murphy,Wedstrijd,Meeste Half-Century Breaks,"Selby, Mark",,2000.0,1.73,Snooker


In [None]:
    left_on=['standard_event_name', 'normalized_bet_offer_type', 'standard_outcome', 'Line'],
    right_on['standard_event_name', 'normalized_market_name', 'standard_outcome', 'Line']=,

In [None]:
toto_filtered[(toto_filtered['sport']=='Voetbal') & (toto_filtered['Event Name'].str.lower().str.contains('peru')) & (toto_filtered['normalized_market_name'].str.contains('Over/Under'))][['standard_event_name', 'normalized_market_name', 'standard_outcome', 'Line']]

Unnamed: 0,standard_event_name,normalized_market_name,standard_outcome,Line
14439,Peru vs Chile,Aantal Goals - Over/Under 2.5,Over,2.5
14441,Peru vs Chile,Aantal Goals - Over/Under 2.5,Under,2.5
14443,Peru vs Chile,Aantal Goals - Over/Under 3.5,Over,3.5
14445,Peru vs Chile,Aantal Goals - Over/Under 3.5,Under,3.5
14447,Peru vs Chile,Aantal Goals - Over/Under 0.5,Under,0.5
14449,Peru vs Chile,Aantal Goals - Over/Under 0.5,Over,0.5
14451,Peru vs Chile,Aantal Goals - Over/Under 1.5,Over,1.5
14453,Peru vs Chile,Aantal Goals - Over/Under 1.5,Under,1.5
14455,Peru vs Chile,Aantal Goals - Over/Under 4.5,Under,4.5
14457,Peru vs Chile,Aantal Goals - Over/Under 4.5,Over,4.5


In [None]:
kambi_filterd[(kambi_filtered['sport']=='FOOTBALL') & kambi_filtered['event_name'].str.lower().str.contains('peru') & (kambi_filtered['normalized_bet_offer_type']=='Over/Onder')].head(60)

Unnamed: 0,bet_offer_id,criterion_id,criterion_label,criterion_english_label,occurrence_type,lifetime,bet_offer_type_id,bet_offer_type_name,bet_offer_type_english_name,event_id,outcome_id,outcome_label,outcome_english_label,odds,line,participant,type,changed_date,odds_fractional,odds_american,status,cash_out_status,home_score,away_score,event_name,sport,group_name,standard_event_name,Line,standard_bet_type,standard_outcome,normalized_bet_offer_type
67964,2470331702,1001159633,Totaal Aantal Doelpunten door Chili,Total Goals by Chile,,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403762,Meer dan,Over,3550.0,1500.0,,OT_OVER,2024-11-11T12:45:13Z,5/2,255.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.15,Totaal Aantal Doelpunten door Chili,Meer dan,Over/Onder
67965,2470331702,1001159633,Totaal Aantal Doelpunten door Chili,Total Goals by Chile,,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403766,Minder dan,Under,1250.0,1500.0,,OT_UNDER,2024-11-11T12:45:13Z,1/4,-400.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.15,Totaal Aantal Doelpunten door Chili,Minder dan,Over/Onder
67986,2470331715,1001159926,Totaal Aantal Doelpunten,Total Goals,GOALS,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403804,Meer dan,Over,6000.0,3500.0,,OT_OVER,2024-11-11T12:45:13Z,5/1,500.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.35,Totaal Aantal Doelpunten,Meer dan,Over/Onder
67987,2470331715,1001159926,Totaal Aantal Doelpunten,Total Goals,GOALS,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403806,Minder dan,Under,1130.0,3500.0,,OT_UNDER,2024-11-11T12:45:13Z,1/8,-770.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.35,Totaal Aantal Doelpunten,Minder dan,Over/Onder
67991,2470331718,1001159967,Totaal Aantal Doelpunten door Peru,Total Goals by Peru,,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403809,Meer dan,Over,2800.0,1500.0,,OT_OVER,2024-11-11T12:45:13Z,9/5,180.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.15,Totaal Aantal Doelpunten door Peru,Meer dan,Over/Onder
67992,2470331718,1001159967,Totaal Aantal Doelpunten door Peru,Total Goals by Peru,,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403812,Minder dan,Under,1380.0,1500.0,,OT_UNDER,2024-11-11T12:45:13Z,4/11,-265.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.15,Totaal Aantal Doelpunten door Peru,Minder dan,Over/Onder
68006,2470331724,1001159532,Totaal Aantal Doelpunten - 1e Helft,Total Goals - 1st Half,GOALS,,6,Over/Onder,Over/Under,1021874459,3560403827,Meer dan,Over,1560.0,500.0,,OT_OVER,2024-11-11T12:45:13Z,11/20,-180.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.05,Totaal Aantal Doelpunten - 1e Helft,Meer dan,Over/Onder
68007,2470331724,1001159532,Totaal Aantal Doelpunten - 1e Helft,Total Goals - 1st Half,GOALS,,6,Over/Onder,Over/Under,1021874459,3560403833,Minder dan,Under,2230.0,500.0,,OT_UNDER,2024-11-11T12:45:13Z,6/5,123.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.05,Totaal Aantal Doelpunten - 1e Helft,Minder dan,Over/Onder
68008,2470331725,1001159926,Totaal Aantal Doelpunten,Total Goals,GOALS,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403835,Meer dan,Over,2750.0,2500.0,,OT_OVER,2024-11-11T12:45:13Z,7/4,175.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.25,Totaal Aantal Doelpunten,Meer dan,Over/Onder
68009,2470331725,1001159926,Totaal Aantal Doelpunten,Total Goals,GOALS,FULL_TIME,6,Over/Onder,Over/Under,1021874459,3560403842,Minder dan,Under,1450.0,2500.0,,OT_UNDER,2024-11-11T12:45:13Z,4/9,-225.0,OPEN,ENABLED,,,Peru vs Chile,FOOTBALL,,Peru vs Chili,0.25,Totaal Aantal Doelpunten,Minder dan,Over/Onder


In [None]:
# toto: kambi
voetbal_map = {'Resultaat': 'Wedstrijd',
               'Aantal Goals - Over/Under 0.5': 'Totaal Aantal Doelpunten' #icm line & kambi['outcome_english_label'] = 'Over' &
               } 