In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree
from sklearn import tree
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from utils.updateStats import getStats, updateStats, createStats
pd.set_option('display.max_columns', None)
import json, hashlib

In [2]:
clean_data = pd.read_csv("../data/cleanedDataset.csv")
previous_stats = createStats()

# Iterate through each row in clean_data
for index, row in tqdm(clean_data.iterrows(), total=len(clean_data)):
    # update stats
    previous_stats = updateStats(row, previous_stats)

100%|██████████| 95375/95375 [00:07<00:00, 13621.28it/s]


In [26]:
# Load the model 
xgb_model = XGBClassifier()
xgb_model.load_model("../models/final_xgb_model.json")

# mapper to make the results easier to read
mapper = np.vectorize(lambda x: "Player 2 Wins" if x == 0 else "Player 1 Wins")

In [36]:
player2 = {
    "Name": "Jannik Sinner",                    # name, not necessary
    "ID": 206173,                               # ID
    "ATP_POINTS": 0,                         # ATP points
    "ATP_RANK": 1,                              # ATP rank
    "AGE": 23,                                  # age 
    "HEIGHT": 191,                              # height
}

player1 = {
    "Name": "Carlos Alcaraz",
    "ID": 207989,
    "ATP_POINTS": 0,
    "ATP_RANK": 2,
    "AGE": 22,
    "HEIGHT": 183,
}

match = {
    "BEST_OF": 5,                               # 3 or 5 (grand slams)
    "DRAW_SIZE": 128,                           
    "SURFACE": "Clay",                          # Surface of the match. Options are (Hard, Clay, Grass, Carpet)
}

# Call getStatsPlayers function
output = getStats(player1, player2, match, previous_stats)
# print(output)


match_data = pd.DataFrame([dict(sorted(output.items()))])
mapper(xgb_model.predict(np.array(match_data, dtype=object)))

{'BEST_OF': 5, 'DRAW_SIZE': 128, 'AGE_DIFF': -1, 'HEIGHT_DIFF': -8, 'ATP_RANK_DIFF': 1, 'ATP_POINTS_DIFF': 0, 'ELO_DIFF': -153.16437404770863, 'ELO_SURFACE_DIFF': 108.21368656080813, 'N_GAMES_DIFF': -77, 'H2H_DIFF': 2, 'H2H_SURFACE_DIFF': 0, 'WIN_LAST_3_DIFF': 0, 'ELO_GRAD_LAST_3_DIFF': np.float64(1.7327271277320402), 'P_ACE_LAST_3_DIFF': -0.9046534524275529, 'P_DF_LAST_3_DIFF': 0.26887536623844954, 'P_1STIN_LAST_3_DIFF': -6.015610357875765, 'P_1STWON_LAST_3_DIFF': 10.43896452683994, 'P_2NDWON_LAST_3_DIFF': 14.050224466891144, 'P_BPSAVED_LAST_3_DIFF': -22.222222222222214, 'WIN_LAST_5_DIFF': -1, 'ELO_GRAD_LAST_5_DIFF': np.float64(2.04631826295353), 'P_ACE_LAST_5_DIFF': -0.6334950193023374, 'P_DF_LAST_5_DIFF': 1.6146338159088085, 'P_1STIN_LAST_5_DIFF': -1.3387003726310454, 'P_1STWON_LAST_5_DIFF': 3.171083080704875, 'P_2NDWON_LAST_5_DIFF': 4.582410795259101, 'P_BPSAVED_LAST_5_DIFF': -15.833333333333329, 'WIN_LAST_10_DIFF': -3, 'ELO_GRAD_LAST_10_DIFF': np.float64(-8.11266189248028), 'P_ACE

array(['Player 2 Wins'], dtype='<U13')

In [5]:
player1 = {
    "Name": "Fritz",                            # name, not necessary
    "ID": 126203,                               # ID
    "ATP_POINTS": 4815,                         # ATP points
    "ATP_RANK": 4,                              # ATP rank
    "AGE": 28,                                  # age 
    "HEIGHT": 196,                              # height
}

player2 = {
    "Name": "Ruud",
    "ID": 134770,
    "ATP_POINTS": 2915,
    "ATP_RANK": 14,
    "AGE": 27,
    "HEIGHT": 183,
}

match = {
    "BEST_OF": 3,                               # 3 or 5 (grand slams)
    "DRAW_SIZE": 128,                           
    "SURFACE": "Clay",                          # Surface of the match. Options are (Hard, Clay, Grass, Carpet)
}

# Call getStatsPlayers function
output = getStats(player1, player2, match, previous_stats)

match_data = pd.DataFrame([dict(sorted(output.items()))])
mapper(xgb_model.predict(np.array(match_data, dtype=object)))

array(['Player 2 Wins'], dtype='<U13')

In [6]:
match_data

Unnamed: 0,AGE_DIFF,ATP_POINTS_DIFF,ATP_RANK_DIFF,BEST_OF,DRAW_SIZE,ELO_DIFF,ELO_GRAD_LAST_100_DIFF,ELO_GRAD_LAST_10_DIFF,ELO_GRAD_LAST_200_DIFF,ELO_GRAD_LAST_25_DIFF,ELO_GRAD_LAST_3_DIFF,ELO_GRAD_LAST_50_DIFF,ELO_GRAD_LAST_5_DIFF,ELO_SURFACE_DIFF,H2H_DIFF,H2H_SURFACE_DIFF,HEIGHT_DIFF,N_GAMES_DIFF,P_1STIN_LAST_100_DIFF,P_1STIN_LAST_10_DIFF,P_1STIN_LAST_200_DIFF,P_1STIN_LAST_25_DIFF,P_1STIN_LAST_3_DIFF,P_1STIN_LAST_50_DIFF,P_1STIN_LAST_5_DIFF,P_1STWON_LAST_100_DIFF,P_1STWON_LAST_10_DIFF,P_1STWON_LAST_200_DIFF,P_1STWON_LAST_25_DIFF,P_1STWON_LAST_3_DIFF,P_1STWON_LAST_50_DIFF,P_1STWON_LAST_5_DIFF,P_2NDWON_LAST_100_DIFF,P_2NDWON_LAST_10_DIFF,P_2NDWON_LAST_200_DIFF,P_2NDWON_LAST_25_DIFF,P_2NDWON_LAST_3_DIFF,P_2NDWON_LAST_50_DIFF,P_2NDWON_LAST_5_DIFF,P_ACE_LAST_100_DIFF,P_ACE_LAST_10_DIFF,P_ACE_LAST_200_DIFF,P_ACE_LAST_25_DIFF,P_ACE_LAST_3_DIFF,P_ACE_LAST_50_DIFF,P_ACE_LAST_5_DIFF,P_BPSAVED_LAST_100_DIFF,P_BPSAVED_LAST_10_DIFF,P_BPSAVED_LAST_200_DIFF,P_BPSAVED_LAST_25_DIFF,P_BPSAVED_LAST_3_DIFF,P_BPSAVED_LAST_50_DIFF,P_BPSAVED_LAST_5_DIFF,P_DF_LAST_100_DIFF,P_DF_LAST_10_DIFF,P_DF_LAST_200_DIFF,P_DF_LAST_25_DIFF,P_DF_LAST_3_DIFF,P_DF_LAST_50_DIFF,P_DF_LAST_5_DIFF,WIN_LAST_100_DIFF,WIN_LAST_10_DIFF,WIN_LAST_200_DIFF,WIN_LAST_25_DIFF,WIN_LAST_3_DIFF,WIN_LAST_50_DIFF,WIN_LAST_5_DIFF
0,1,1900,-10,3,128,152.524575,0.36581,6.712093,0.562577,8.732341,-2.265762,3.497649,0.005462,-119.995234,-1,-1,13,81,-2.954803,-2.953234,-3.496765,-4.302408,-10.89653,-2.890145,-8.223294,6.476002,4.654115,5.357632,5.571129,2.546896,7.958218,6.923672,2.13115,-2.187207,0.270346,1.109648,0.31746,1.091473,1.56074,6.716255,3.484558,5.78068,4.073385,2.616184,7.069818,5.276409,7.23054,28.785714,5.540188,21.949533,19.444444,19.476543,24.428571,-0.322109,-0.932926,-0.137076,-0.256273,2.173708,-0.191981,1.304225,6,3,7,6,0,3,1


In [37]:
# see how sure of the prediction the model is
probs = xgb_model.predict_proba(np.array(match_data, dtype=object))

# Extract probability of each class
prob_player1_wins = probs[0][1]
prob_player2_wins = probs[0][0]

print(f"Probability of {player1['Name']} winning: {prob_player1_wins:.2%}")
print(f"Probability of {player2['Name']} winning: {prob_player2_wins:.2%}")

Probability of Carlos Alcaraz winning: 48.40%
Probability of Jannik Sinner winning: 51.60%


In [22]:
df = pd.read_csv("../data/aus_open_2025.csv")

reversed_df = df.copy()

p1_cols = [c for c in df.columns if c.startswith("p1_")]
for p1 in p1_cols:
    p2 = "p2_" + p1[3:]                   # corresponding p2_ column
    reversed_df[p1] = df[p2]              # swap values
    reversed_df[p2] = df[p1]


if "RESULT" in df.columns:
    reversed_df["RESULT"] = 1 - df["RESULT"]

augmented = pd.concat([df, reversed_df], ignore_index=True)

aus_open_data = augmented
aus_open_data

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT
0,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,117357,0.0,0.0,0.0,0.0,183.0,0.0,27.873374,200384,0.0,0.0,0.0,0.0,173.0,0.0,24.514716,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179,323,81,703,0
1,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,202261,0.0,0.0,0.0,0.0,193.0,0.0,23.780287,209950,0.0,0.0,0.0,0.0,185.0,0.0,20.804928,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93,627,21,2280,0
2,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,105453,0.0,0.0,0.0,0.0,178.0,0.0,35.257358,106329,0.0,0.0,0.0,0.0,183.0,0.0,30.837782,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76,743,105,566,1
3,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,134770,0.0,0.0,0.0,0.0,183.0,0.0,26.276523,144719,0.0,0.0,0.0,0.0,183.0,0.0,27.908966,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,4210,62,922,1
4,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207352,0.0,0.0,0.0,0.0,185.0,0.0,23.199863,200273,0.0,0.0,0.0,0.0,188.0,0.0,26.613279,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67,784,219,264,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,104925,0.0,0.0,0.0,0.0,188.0,0.0,37.862423,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7,3900,3,7010,1
228,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,132283,0.0,0.0,0.0,0.0,191.0,0.0,29.892539,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55,1026,20,2280,0
229,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,200282,0.0,0.0,0.0,0.0,183.0,0.0,26.120465,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,8,3535,1
230,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,20,2280,1


In [4]:
aus_open_data = pd.read_csv("../data/aus_open_2025.csv")
aus_open_data

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT
0,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,117357,0.0,0.0,0.0,0.0,183.0,0.0,27.873374,200384,0.0,0.0,0.0,0.0,173.0,0.0,24.514716,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179,323,81,703,0
1,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,202261,0.0,0.0,0.0,0.0,193.0,0.0,23.780287,209950,0.0,0.0,0.0,0.0,185.0,0.0,20.804928,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93,627,21,2280,0
2,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,105453,0.0,0.0,0.0,0.0,178.0,0.0,35.257358,106329,0.0,0.0,0.0,0.0,183.0,0.0,30.837782,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76,743,105,566,1
3,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,134770,0.0,0.0,0.0,0.0,183.0,0.0,26.276523,144719,0.0,0.0,0.0,0.0,183.0,0.0,27.908966,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,4210,62,922,1
4,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207352,0.0,0.0,0.0,0.0,185.0,0.0,23.199863,200273,0.0,0.0,0.0,0.0,188.0,0.0,26.613279,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67,784,219,264,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,104925,0.0,0.0,0.0,0.0,188.0,0.0,37.862423,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,7010,7,3900,0
112,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,132283,0.0,0.0,0.0,0.0,191.0,0.0,29.892539,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20,2280,55,1026,1
113,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,200282,0.0,0.0,0.0,0.0,183.0,0.0,26.120465,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8,3535,1,11830,0
114,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20,2280,1,11830,0


In [23]:
aus_open_predict = aus_open_data
predictions = []
probs_p1 = []
probs_p2 = []

for index, row in tqdm(aus_open_predict.iterrows(), total=len(aus_open_predict)):
    player1 = {
        "ID": row["p1_id"],
        "ATP_POINTS": row["p1_rank_points"],
        "ATP_RANK": row["p1_rank"],
        "AGE": row["p1_age"],
        "HEIGHT": row["p1_ht"],
    }

    player2 = {
        "ID": row["p2_id"],
        "ATP_POINTS": row["p2_rank_points"],
        "ATP_RANK": row["p2_rank"],
        "AGE": row["p2_age"],
        "HEIGHT": row["p2_ht"],
    }

    match = {
        "BEST_OF": row["best_of"],
        "DRAW_SIZE": row["draw_size"],
        "SURFACE": row["surface"],
    }

    # Call getStatsPlayers function
    output = getStats(player1, player2, match, previous_stats)

    match_data = pd.DataFrame([dict(sorted(output.items()))])

    # Predict Match Outcome
    prediction = xgb_model.predict(np.array(match_data, dtype=object))
    predictions.append(prediction[0])

    # Predict to Get Probabilities
    probs = xgb_model.predict_proba(np.array(match_data, dtype=object))

    # Extract probability of each class
    prob_player1_wins = probs[0][1]
    prob_player2_wins = probs[0][0]

    probs_p1.append(prob_player1_wins)
    probs_p2.append(prob_player2_wins)
    

# Convert final dataset to DataFrame
aus_open_predict["PREDICTION"] = predictions
aus_open_predict["% Player 1 Wins"] = probs_p1
aus_open_predict["% Player 2 Wins"] = probs_p2

 12%|█▎        | 29/232 [00:00<00:00, 281.74it/s]

HASH: c792e38d391773bf004193b8dddf288b
{'AGE_DIFF': 3.358658453114302, 'ATP_POINTS_DIFF': -380, 'ATP_RANK_DIFF': 98, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -62.15804406073471, 'ELO_GRAD_LAST_100_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_10_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_200_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_25_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_3_DIFF': -2.1983976111736983, 'ELO_GRAD_LAST_50_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_5_DIFF': -3.063589658597922, 'ELO_SURFACE_DIFF': 14.044006231705907, 'H2H_DIFF': 0, 'H2H_SURFACE_DIFF': 0, 'HEIGHT_DIFF': 10.0, 'N_GAMES_DIFF': -91, 'P_1STIN_LAST_100_DIFF': 10.052860266118053, 'P_1STIN_LAST_10_DIFF': 8.735535381011488, 'P_1STIN_LAST_200_DIFF': 10.052860266118053, 'P_1STIN_LAST_25_DIFF': 9.362823259187032, 'P_1STIN_LAST_3_DIFF': 7.286214319709238, 'P_1STIN_LAST_50_DIFF': 9.62795829761832, 'P_1STIN_LAST_5_DIFF': 5.9904626080750845, 'P_1STWON_LAST_100_DIFF': -2.5715293731756645, 'P_1STWON_LAST_10_DIFF': -3.43235

 38%|███▊      | 87/232 [00:00<00:00, 279.07it/s]

HASH: 2d9193c5ac32bbfa70bb52a79120ca80
{'AGE_DIFF': -8.835728952772072, 'ATP_POINTS_DIFF': -3000, 'ATP_RANK_DIFF': 103, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -154.45588984319443, 'ELO_GRAD_LAST_100_DIFF': 5.102350315399597, 'ELO_GRAD_LAST_10_DIFF': 12.696176735571559, 'ELO_GRAD_LAST_200_DIFF': 5.102350315399597, 'ELO_GRAD_LAST_25_DIFF': 5.102350315399597, 'ELO_GRAD_LAST_3_DIFF': 12.627925361374908, 'ELO_GRAD_LAST_50_DIFF': 5.102350315399597, 'ELO_GRAD_LAST_5_DIFF': 10.811820341741068, 'ELO_SURFACE_DIFF': -162.03902545118422, 'H2H_DIFF': 0, 'H2H_SURFACE_DIFF': 0, 'HEIGHT_DIFF': -3.0, 'N_GAMES_DIFF': -490, 'P_1STIN_LAST_100_DIFF': 3.2402820847445284, 'P_1STIN_LAST_10_DIFF': 5.377644478334389, 'P_1STIN_LAST_200_DIFF': 3.7431429965963687, 'P_1STIN_LAST_25_DIFF': 3.8006295640581413, 'P_1STIN_LAST_3_DIFF': 6.7255706724299955, 'P_1STIN_LAST_50_DIFF': 4.275780814506106, 'P_1STIN_LAST_5_DIFF': 7.531160561783224, 'P_1STWON_LAST_100_DIFF': -6.092382946341843, 'P_1STWON_LAST_10_DIFF': -3.700

 62%|██████▎   | 145/232 [00:00<00:00, 268.39it/s]

HASH: bbfad2fe6de208d19c1eea41c83d21df
{'AGE_DIFF': 0.07392197125256672, 'ATP_POINTS_DIFF': 4440, 'ATP_RANK_DIFF': -9, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': 144.1518983996848, 'ELO_GRAD_LAST_100_DIFF': -0.6128580575425211, 'ELO_GRAD_LAST_10_DIFF': 2.7590727777424364, 'ELO_GRAD_LAST_200_DIFF': -0.5562372186020335, 'ELO_GRAD_LAST_25_DIFF': 0.601334002272697, 'ELO_GRAD_LAST_3_DIFF': 24.765201195664137, 'ELO_GRAD_LAST_50_DIFF': -1.772559063948611, 'ELO_GRAD_LAST_5_DIFF': 11.601056893223888, 'ELO_SURFACE_DIFF': 157.85825526664485, 'H2H_DIFF': -2, 'H2H_SURFACE_DIFF': -2, 'HEIGHT_DIFF': 13.0, 'N_GAMES_DIFF': 351, 'P_1STIN_LAST_100_DIFF': 10.445255946921726, 'P_1STIN_LAST_10_DIFF': 9.702110442068594, 'P_1STIN_LAST_200_DIFF': 9.08868419942224, 'P_1STIN_LAST_25_DIFF': 5.346546105897836, 'P_1STIN_LAST_3_DIFF': 18.66408404060128, 'P_1STIN_LAST_50_DIFF': 7.171734765746798, 'P_1STIN_LAST_5_DIFF': 14.962803012242759, 'P_1STWON_LAST_100_DIFF': 4.1651113815011485, 'P_1STWON_LAST_10_DIFF': 9.92622

 88%|████████▊ | 203/232 [00:00<00:00, 253.38it/s]

HASH: 1c1b3435459b0f6187d1cea4a89edc95
{'AGE_DIFF': 11.002737850787131, 'ATP_POINTS_DIFF': 61, 'ATP_RANK_DIFF': -3, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -15.014706159187881, 'ELO_GRAD_LAST_100_DIFF': -0.7261739265210783, 'ELO_GRAD_LAST_10_DIFF': -3.8156344155803508, 'ELO_GRAD_LAST_200_DIFF': -0.26574081153708184, 'ELO_GRAD_LAST_25_DIFF': -0.9418662286166182, 'ELO_GRAD_LAST_3_DIFF': 16.882167029925327, 'ELO_GRAD_LAST_50_DIFF': 0.12350193010333765, 'ELO_GRAD_LAST_5_DIFF': 2.2521163298460296, 'ELO_SURFACE_DIFF': -37.039781211644595, 'H2H_DIFF': -3, 'H2H_SURFACE_DIFF': -2, 'HEIGHT_DIFF': -2.0, 'N_GAMES_DIFF': 298, 'P_1STIN_LAST_100_DIFF': 6.792336848395053, 'P_1STIN_LAST_10_DIFF': 5.902456418504897, 'P_1STIN_LAST_200_DIFF': 6.3801413912090865, 'P_1STIN_LAST_25_DIFF': 4.291715995012979, 'P_1STIN_LAST_3_DIFF': 6.703531936714924, 'P_1STIN_LAST_50_DIFF': 7.544752485678515, 'P_1STIN_LAST_5_DIFF': 9.080157044496168, 'P_1STWON_LAST_100_DIFF': -8.073852086820807, 'P_1STWON_LAST_10_DIFF': -1

100%|██████████| 232/232 [00:00<00:00, 237.59it/s]

HASH: 13c0082e1e7b7cfc7141a86693da458c
{'AGE_DIFF': 6.206023271731691, 'ATP_POINTS_DIFF': -5565, 'ATP_RANK_DIFF': 30, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -418.216074681568, 'ELO_GRAD_LAST_100_DIFF': -0.3902054461802056, 'ELO_GRAD_LAST_10_DIFF': 3.754967361159097, 'ELO_GRAD_LAST_200_DIFF': -0.3902054461802056, 'ELO_GRAD_LAST_25_DIFF': 4.838400814713545, 'ELO_GRAD_LAST_3_DIFF': -16.936920910728407, 'ELO_GRAD_LAST_50_DIFF': 2.090413906134561, 'ELO_GRAD_LAST_5_DIFF': -15.494485952498277, 'ELO_SURFACE_DIFF': -388.2801263600386, 'H2H_DIFF': -1, 'H2H_SURFACE_DIFF': 0, 'HEIGHT_DIFF': 2.0, 'N_GAMES_DIFF': -178, 'P_1STIN_LAST_100_DIFF': -1.519379961549859, 'P_1STIN_LAST_10_DIFF': -2.6211025008167113, 'P_1STIN_LAST_200_DIFF': -1.9556427067182227, 'P_1STIN_LAST_25_DIFF': -0.4859883308749602, 'P_1STIN_LAST_3_DIFF': 11.013556933728168, 'P_1STIN_LAST_50_DIFF': -2.7234247845938953, 'P_1STIN_LAST_5_DIFF': -1.605430053327325, 'P_1STWON_LAST_100_DIFF': -4.727865740829714, 'P_1STWON_LAST_10_DIFF':




In [24]:
accuracy_score(aus_open_predict["PREDICTION"], aus_open_predict["RESULT"])

0.7370689655172413

In [7]:
aus_open_predict[(aus_open_predict["p1_id"] == 206173) | (aus_open_predict["p2_id"] == 206173.0)]

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT,PREDICTION,% Player 1 Wins,% Player 2 Wins
19,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,111797,0.0,0.0,0.0,0.0,201.0,0.0,29.473648,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,36,1340,1,1,0.916674,0.083326
87,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,209262,0.0,0.0,0.0,0.0,183.0,0.0,24.095825,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,173,336,1,1,0.935713,0.064287
102,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,106218,0.0,0.0,0.0,0.0,180.0,0.0,31.689938,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,46,1175,1,1,0.937351,0.062649
107,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,208029,0.0,0.0,0.0,0.0,188.0,0.0,21.925394,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,11830,13,2910,1,1,0.884937,0.115063
113,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,200282,0.0,0.0,0.0,0.0,183.0,0.0,26.120465,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8,3535,1,11830,0,0,0.122388,0.877612
114,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20,2280,1,11830,0,0,0.117114,0.882886
115,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,100644,0.0,0.0,0.0,0.0,198.0,0.0,27.950034,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,7635,1,11830,0,0,0.258674,0.741326


In [8]:
aus_open_predict[(aus_open_predict["p1_id"] == 207989) | (aus_open_predict["p2_id"] == 207989)]

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT,PREDICTION,% Player 1 Wins,% Player 2 Wins
30,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207686,0.0,0.0,0.0,0.0,185.0,0.0,24.339493,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,77,743,3,7010,0,0,0.071731,0.928269
60,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,106415,0.0,0.0,0.0,0.0,170.0,0.0,29.511978,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,7010,65,807,1,1,0.907844,0.092156
90,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,132686,0.0,0.0,0.0,0.0,185.0,0.0,28.11499,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,7010,33,1445,1,1,0.905995,0.094005
111,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207989,0.0,0.0,0.0,0.0,183.0,0.0,21.908966,104925,0.0,0.0,0.0,0.0,188.0,0.0,37.862423,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,7010,7,3900,0,1,0.556514,0.443486


In [9]:
# Correct Results
aus_open_predict[((aus_open_predict["RESULT"] == 1) & (aus_open_predict["PREDICTION"] == 1)) 
                 | ((aus_open_predict["RESULT"] == 0) & (aus_open_predict["PREDICTION"] == 0))]

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT,PREDICTION,% Player 1 Wins,% Player 2 Wins
0,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,117357,0.0,0.0,0.0,0.0,183.0,0.0,27.873374,200384,0.0,0.0,0.0,0.0,173.0,0.0,24.514716,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179,323,81,703,0,0,0.426625,0.573375
1,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,202261,0.0,0.0,0.0,0.0,193.0,0.0,23.780287,209950,0.0,0.0,0.0,0.0,185.0,0.0,20.804928,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93,627,21,2280,0,0,0.266226,0.733774
2,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,105453,0.0,0.0,0.0,0.0,178.0,0.0,35.257358,106329,0.0,0.0,0.0,0.0,183.0,0.0,30.837782,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76,743,105,566,1,1,0.711803,0.288197
3,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,134770,0.0,0.0,0.0,0.0,183.0,0.0,26.276523,144719,0.0,0.0,0.0,0.0,183.0,0.0,27.908966,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,4210,62,922,1,1,0.858474,0.141526
5,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,208103,0.0,0.0,0.0,0.0,185.0,0.0,23.396988,111454,0.0,0.0,0.0,0.0,183.0,0.0,28.848734,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29,1660,168,342,1,1,0.804986,0.195014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,100644,0.0,0.0,0.0,0.0,198.0,0.0,27.950034,126205,0.0,0.0,0.0,0.0,185.0,0.0,27.876112,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,7635,11,3195,1,1,0.773293,0.226707
112,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,132283,0.0,0.0,0.0,0.0,191.0,0.0,29.892539,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20,2280,55,1026,1,1,0.794378,0.205622
113,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,200282,0.0,0.0,0.0,0.0,183.0,0.0,26.120465,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8,3535,1,11830,0,0,0.122388,0.877612
114,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210097,0.0,0.0,0.0,0.0,193.0,0.0,22.479124,206173,0.0,0.0,0.0,0.0,191.0,0.0,23.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20,2280,1,11830,0,0,0.117114,0.882886


In [14]:
# Wrong Results
aus_open_predict[((aus_open_predict["RESULT"] == 0) & (aus_open_predict["PREDICTION"] == 1)) 
                 | ((aus_open_predict["RESULT"] == 1) & (aus_open_predict["PREDICTION"] == 0))]

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,p1_id,p1_seed,p1_entry,p1_name,p1_hand,p1_ht,p1_ioc,p1_age,p2_id,p2_seed,p2_entry,p2_name,p2_hand,p2_ht,p2_ioc,p2_age,score,best_of,round,minutes,p1_ace,p1_df,p1_svpt,p1_1stIn,p1_1stWon,p1_2ndWon,p1_SvGms,p1_bpSaved,p1_bpFaced,p2_ace,p2_df,p2_svpt,p2_1stIn,p2_1stWon,p2_2ndWon,p2_SvGms,p2_bpSaved,p2_bpFaced,p1_rank,p1_rank_points,p2_rank,p2_rank_points,RESULT,PREDICTION,% Player 1 Wins,% Player 2 Wins
4,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207352,0.0,0.0,0.0,0.0,185.0,0.0,23.199863,200273,0.0,0.0,0.0,0.0,188.0,0.0,26.613279,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67,784,219,264,0,1,0.672703,0.327297
12,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,106148,0.0,0.0,0.0,0.0,183.0,0.0,32.027379,126214,0.0,0.0,0.0,0.0,188.0,0.0,27.832307,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57,981,26,1705,1,0,0.460849,0.539151
14,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,210506,0.0,0.0,0.0,0.0,193.0,0.0,20.602327,126774,0.0,0.0,0.0,0.0,193.0,0.0,26.637919,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,42,1270,12,3195,1,0,0.364411,0.635589
17,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,105902,0.0,0.0,0.0,0.0,183.0,0.0,33.194387,208502,0.0,0.0,0.0,0.0,183.0,0.0,22.626968,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89,637,304,173,1,0,0.401684,0.598316
18,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,211776,0.0,0.0,0.0,0.0,191.0,0.0,19.229979,210317,0.0,0.0,0.0,0.0,188.0,0.0,21.741958,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,142,414,258,216,0,1,0.515862,0.484138
39,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,106432,0.0,0.0,0.0,0.0,188.0,0.0,28.380561,106426,0.0,0.0,0.0,0.0,185.0,0.0,28.84052,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87,639,150,382,0,1,0.563495,0.436505
42,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,208659,0.0,0.0,0.0,0.0,203.0,0.0,21.733744,104792,0.0,0.0,0.0,0.0,193.0,0.0,38.583162,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30,1651,41,1280,0,1,0.554583,0.445417
44,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,200267,0.0,0.0,0.0,0.0,185.0,0.0,25.829569,207680,0.0,0.0,0.0,0.0,183.0,0.0,24.295688,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60,948,73,758,0,1,0.682218,0.317782
46,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207681,0.0,0.0,0.0,0.0,178.0,0.0,24.487337,127157,0.0,0.0,0.0,0.0,188.0,0.0,26.553046,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,85,662,101,582,0,1,0.564593,0.435407
50,AUSTRALIAN_OPEN_2025,0.0,Hard,128,0.0,0.0,0.0,207729,0.0,0.0,0.0,0.0,188.0,0.0,23.947296,105948,0.0,0.0,0.0,0.0,180.0,0.0,33.065708,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,136,448,96,617,1,0,0.414628,0.585372


In [28]:
# Accuracy test based on Elo

elo_preds = []          

for _, row in tqdm(aus_open_predict.iterrows(), total=len(aus_open_predict)):
    player1 = {
        "ID":         row["p1_id"],
        "ATP_POINTS": row["p1_rank_points"],
        "ATP_RANK":   row["p1_rank"],
        "AGE":        row["p1_age"],
        "HEIGHT":     row["p1_ht"],
    }
    player2 = {
        "ID":         row["p2_id"],
        "ATP_POINTS": row["p2_rank_points"],
        "ATP_RANK":   row["p2_rank"],
        "AGE":        row["p2_age"],
        "HEIGHT":     row["p2_ht"],
    }
    match = {
        "BEST_OF":   row["best_of"],
        "DRAW_SIZE": row["draw_size"],
        "SURFACE":   row["surface"],
    }

    output = getStats(player1, player2, match, previous_stats)   # returns dict with elo_diff
    diff = output.get("ELO_DIFF", np.nan)
    print(diff)
    if pd.isna(diff) or diff == 0:
        elo_preds.append(np.nan)
    else:
        elo_preds.append(1 if diff > 0 else 0)

aus_open_predict["ELO_PRED"] = elo_preds

mask_elo = aus_open_predict["ELO_PRED"].notna()
elo_acc = accuracy_score(
    aus_open_predict.loc[mask_elo, "RESULT"],
    aus_open_predict.loc[mask_elo, "ELO_PRED"]
)

print(f"Elo-only baseline accuracy: {elo_acc:.2%} "
      f"on {mask_elo.sum()} of {len(aus_open_predict)} matches")


 88%|████████▊ | 203/232 [00:00<00:00, 1013.75it/s]

HASH: c792e38d391773bf004193b8dddf288b
{'AGE_DIFF': 3.358658453114302, 'ATP_POINTS_DIFF': -380, 'ATP_RANK_DIFF': 98, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -62.15804406073471, 'ELO_GRAD_LAST_100_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_10_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_200_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_25_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_3_DIFF': -2.1983976111736983, 'ELO_GRAD_LAST_50_DIFF': 0.6146704661162781, 'ELO_GRAD_LAST_5_DIFF': -3.063589658597922, 'ELO_SURFACE_DIFF': 14.044006231705907, 'H2H_DIFF': 0, 'H2H_SURFACE_DIFF': 0, 'HEIGHT_DIFF': 10.0, 'N_GAMES_DIFF': -91, 'P_1STIN_LAST_100_DIFF': 10.052860266118053, 'P_1STIN_LAST_10_DIFF': 8.735535381011488, 'P_1STIN_LAST_200_DIFF': 10.052860266118053, 'P_1STIN_LAST_25_DIFF': 9.362823259187032, 'P_1STIN_LAST_3_DIFF': 7.286214319709238, 'P_1STIN_LAST_50_DIFF': 9.62795829761832, 'P_1STIN_LAST_5_DIFF': 5.9904626080750845, 'P_1STWON_LAST_100_DIFF': -2.5715293731756645, 'P_1STWON_LAST_10_DIFF': -3.43235

100%|██████████| 232/232 [00:00<00:00, 973.92it/s] 

HASH: 4faeaa29f12eb5feebad267834a3153b
{'AGE_DIFF': 0.4688569472963735, 'ATP_POINTS_DIFF': -11494, 'ATP_RANK_DIFF': 172, 'BEST_OF': 5, 'DRAW_SIZE': 128, 'ELO_DIFF': -677.0250931684106, 'ELO_GRAD_LAST_100_DIFF': -3.4891798058411787, 'ELO_GRAD_LAST_10_DIFF': -3.4891798058411787, 'ELO_GRAD_LAST_200_DIFF': -3.4891798058411787, 'ELO_GRAD_LAST_25_DIFF': -3.4891798058411787, 'ELO_GRAD_LAST_3_DIFF': -0.01717488281838664, 'ELO_GRAD_LAST_50_DIFF': -3.4891798058411787, 'ELO_GRAD_LAST_5_DIFF': -4.445329398296652, 'ELO_SURFACE_DIFF': -651.402494384567, 'H2H_DIFF': 0, 'H2H_SURFACE_DIFF': 0, 'HEIGHT_DIFF': -8.0, 'N_GAMES_DIFF': -335, 'P_1STIN_LAST_100_DIFF': -4.565738274689913, 'P_1STIN_LAST_10_DIFF': -7.134578272098267, 'P_1STIN_LAST_200_DIFF': -2.1365434490268242, 'P_1STIN_LAST_25_DIFF': -4.559032308255034, 'P_1STIN_LAST_3_DIFF': -5.2410772917388755, 'P_1STIN_LAST_50_DIFF': -3.8941317081364417, 'P_1STIN_LAST_5_DIFF': -5.305338561321861, 'P_1STWON_LAST_100_DIFF': -3.177229067868538, 'P_1STWON_LAST_1




In [15]:
# ATP rank wins
def rank_points_baseline(row):
    r1, r2 = row['p1_rank'],        row['p2_rank']
    p1, p2 = row['p1_rank_points'], row['p2_rank_points']

    if pd.notna(r1) and pd.notna(r2) and r1 != r2:
        return 1 if r1 < r2 else 0

    if pd.notna(p1) and pd.notna(p2) and p1 != p2:
        return 1 if p1 > p2 else 0

    if pd.notna(r1) and pd.isna(r2): return 1
    if pd.isna(r1) and pd.notna(r2): return 0
    if pd.notna(p1) and pd.isna(p2): return 1
    if pd.isna(p1) and pd.notna(p2): return 0

    return np.nan          # no basis to decide

# create prediction column
aus_open_predict['RANK_POINTS_PRED'] = aus_open_predict.apply(rank_points_baseline, axis=1)

# accuracy (ignore rows where prediction is NaN)
mask = aus_open_predict['RANK_POINTS_PRED'].notna()
baseline_acc = accuracy_score(
    aus_open_predict.loc[mask, 'RESULT'],
    aus_open_predict.loc[mask, 'RANK_POINTS_PRED']
)

print(f"Baseline (rank→points) accuracy: {baseline_acc:.2%} "
      f"on {mask.sum()} of {len(aus_open_predict)} matches")


Baseline (rank→points) accuracy: 68.10% on 116 of 116 matches
