<a href="https://colab.research.google.com/github/ansonkwokth/TableTennisPrediction/blob/main/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/ansonkwokth/TableTennisPrediction.git
%cd TableTennisPrediction

Cloning into 'TableTennisPrediction'...
remote: Enumerating objects: 151, done.[K
remote: Counting objects: 100% (151/151), done.[K
remote: Compressing objects: 100% (127/127), done.[K
remote: Total 151 (delta 68), reused 42 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (151/151), 477.82 KiB | 1.82 MiB/s, done.
Resolving deltas: 100% (68/68), done.
/content/TableTennisPrediction


In [2]:
import pandas as pd
from utils import data_loader as dl

import numpy as np
from model.Elo import Elo
from model.ModifiedElo import ModifiedElo
from model.ensemble import BaggingRatingSystem

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

import copy

import warnings
warnings.filterwarnings('ignore')

In [3]:
# GAME = 'TTStar'
# GAME = 'TTCup'
# GAME = 'SetkaCup'
GAME = 'SetkaCupWomen'
# GAME = 'LigaPro'


In [4]:
match GAME:
    case 'TTStar':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'TTCup':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'SetkaCup':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'SetkaCupWomen':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'LigaPro':
        years = [2022, 2023, 2024]
    case _:
        raise ValueError("Invalid game selected.")


text_data_game = dl.load_game_data(GAME, years, '../')
text_data = {
    year: text_data_game[year] for year in years
}
df = dl.create_game_dfs(GAME, years, text_data)

Loading ..//SetkaCupWomen2020.txt
Loading ..//SetkaCupWomen2021.txt
Loading ..//SetkaCupWomen2022.txt
Loading ..//SetkaCupWomen2023.txt
Loading ..//SetkaCupWomen2024.txt


In [5]:
# Generate ID indices for each pair of rows in the DataFrame
idx_lt = [i for i in range(len(df) // 2) for _ in range(2)]
df['ID'] = idx_lt  # Assign to the 'ID' column

# Reset the DataFrame index to ensure it's sequential
df.reset_index(drop=True, inplace=True)

# Get unique players and store them in player_lt
player_lt = df['Player'].unique()



In [6]:
def format_to_array(df: pd.DataFrame) -> np.ndarray:

    # info_col = ['ID', 'Round', 'Datetime', 'Game', 'Date', 'Time']
    info_col = ['Round', 'Datetime', 'Game', 'Date', 'Time']
    col = [item for item in df.columns if item not in info_col]

    df[[c for c in col if "Set" in c]] = df[[c for c in col if "Set" in c]].astype(float)
    X = df[col].values.reshape(-1, 2, len(col))
    return X

In [7]:
X_all = format_to_array(df)

In [8]:
modelMElo = ModifiedElo()
modelMElo.fit(X_all)


Training model: 100%|██████████| 34579/34579 [00:01<00:00, 18490.25it/s]


In [17]:
def find_player(name):
    p_lt = []
    for k, v in modelMElo.params.items():
        if name in k:
            print(k)
            p_lt.append(k)
    return p_lt

In [106]:
def find_OU(player1, player2):
    set1_dt = modelMElo.predict_set_config(player1, player2)[-1]
    set2_dt = modelMElo.predict_set_config(player2, player1)[-1]

    set_sum_dt = {}
    for k1, v1 in set1_dt.items():
        v2 = set2_dt[k1]
        set_sum_dt[k1] = v1 + v2



    p_O = 0
    p_U = 0
    for k, v in set_sum_dt.items():
        if k > line:
            p_O += v
        else:
            p_U += v
    return p_O, p_U

In [187]:
p1_lt = find_player("Ivch")
print()
p2_lt = find_player("Gord")

Ivchenko O.

Gordeets M.


In [188]:
player1 = p1_lt[0]
player2 = p2_lt[0]
line = 18.5

date = "Jan 25"
time = "20:05"


found_p1, found_p2, p_game = modelMElo.predict_game(player1, player2)
found_p1, found_p2, p_set = modelMElo.predict_set(player1, player2)
p_O, p_U = find_OU(player1, player2)

In [190]:

if (found_p1 and found_p2):
    winner = player1 if p_game>0.5 else player2
    p_game_win = p_game if p_game > 0.5 else 1-p_game
    p_set_win = p_set if p_set > 0.5 else 1-p_set
    OU = "O" if p_O > p_U else "U"
    p_OU = p_O if p_O > p_U else p_U

    st = f"\t{date} | {time} |  {player1:<15} v.s. {player2:>15}  |  winner: {winner}\t\t\t\t|\n "+\
    f"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t|  p(set)={p_set_win*100:.1f}%, p(game)={p_game_win*100:.1f}%\t\t|\n"+\
    f"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t|  p({OU} {line})={p_OU*100:.1f}%\t\t\t\t\t|\n\n"
    print(st)

	Jan 25 | 20:05 |  Ivchenko O.     v.s.     Gordeets M.  |  winner: Ivchenko O.				|
 															|  p(set)=51.5%, p(game)=52.8%		|
															|  p(O 18.5)=51.9%					|




In [184]:
"""
	Jan 25 | 17:00 |  Lifanova O.     v.s.      Volgina A.  |  winner: Lifanova O.				|
 															|  p(set)=53.8%, p(game)=57.1%		|
															|  p(O 18.5)=51.8%					|

	Jan 25 | 20:05 |  Ivchenko O.     v.s.     Gordeets M.  |  winner: Ivchenko O.				|
 															|  p(set)=51.5%, p(game)=52.8%		|
															|  p(O 18.5)=51.9%					|


""";