<a href="https://colab.research.google.com/github/ansonkwokth/TableTennisPrediction/blob/main/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/ansonkwokth/TableTennisPrediction.git
%cd TableTennisPrediction

Cloning into 'TableTennisPrediction'...
remote: Enumerating objects: 175, done.[K
remote: Counting objects: 100% (175/175), done.[K
remote: Compressing objects: 100% (151/151), done.[K
remote: Total 175 (delta 84), reused 42 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (175/175), 682.30 KiB | 3.36 MiB/s, done.
Resolving deltas: 100% (84/84), done.
/content/TableTennisPrediction


In [2]:
import pandas as pd
from tabulate import tabulate
from utils import data_loader as dl

import numpy as np
from model.Elo import Elo
from model.ModifiedElo import ModifiedElo
from model.ensemble import BaggingRatingSystem

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

import copy

import warnings
warnings.filterwarnings('ignore')

In [3]:
# GAME = 'TTStar'
# GAME = 'TTCup'
# GAME = 'SetkaCup'
GAME = 'SetkaCupWomen'
# GAME = 'LigaPro'


In [4]:
match GAME:
    case 'TTStar':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'TTCup':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'SetkaCup':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'SetkaCupWomen':
        years = [2020, 2021, 2022, 2023, 2024]
    case 'LigaPro':
        years = [2022, 2023, 2024]
    case _:
        raise ValueError("Invalid game selected.")


text_data_game = dl.load_game_data(GAME, years, '../')
text_data = {
    year: text_data_game[year] for year in years
}
df = dl.create_game_dfs(GAME, years, text_data)

Loading ..//SetkaCupWomen2020.txt
Loading ..//SetkaCupWomen2021.txt
Loading ..//SetkaCupWomen2022.txt
Loading ..//SetkaCupWomen2023.txt
Loading ..//SetkaCupWomen2024.txt


In [5]:
# Generate ID indices for each pair of rows in the DataFrame
idx_lt = [i for i in range(len(df) // 2) for _ in range(2)]
df['ID'] = idx_lt  # Assign to the 'ID' column

# Reset the DataFrame index to ensure it's sequential
df.reset_index(drop=True, inplace=True)

# Get unique players and store them in player_lt
player_lt = df['Player'].unique()



In [6]:
def format_to_array(df: pd.DataFrame) -> np.ndarray:

    # info_col = ['ID', 'Round', 'Datetime', 'Game', 'Date', 'Time']
    info_col = ['Round', 'Datetime', 'Game', 'Date', 'Time']
    col = [item for item in df.columns if item not in info_col]

    df[[c for c in col if "Set" in c]] = df[[c for c in col if "Set" in c]].astype(float)
    X = df[col].values.reshape(-1, 2, len(col))
    return X

In [7]:
X_all = format_to_array(df)

In [8]:
modelMElo = ModifiedElo()
modelMElo.fit(X_all)


Training model: 100%|██████████| 34579/34579 [00:04<00:00, 8591.04it/s]


# Print

In [9]:
def find_player(name):
    p_lt = []
    for k, v in modelMElo.params.items():
        if name in k:
            print(k)
            p_lt.append(k)
    return p_lt

In [10]:
def find_OU(player1, player2):
    set1_dt = modelMElo.predict_set_config(player1, player2)[-1]
    set2_dt = modelMElo.predict_set_config(player2, player1)[-1]

    set_sum_dt = {}
    for k1, v1 in set1_dt.items():
        v2 = set2_dt[k1]
        set_sum_dt[k1] = v1 + v2



    p_O = 0
    p_U = 0
    for k, v in set_sum_dt.items():
        if k > line:
            p_O += v
        else:
            p_U += v
    return p_O, p_U

In [11]:
df_lt = []

In [115]:
p1_lt = find_player("Gord")
print("-"*30)
p2_lt = find_player("Volg")

Gordeets M.
------------------------------
Volgina A.


In [116]:
player1 = p1_lt[0]
player2 = p2_lt[0]
player1, player2

('Gordeets M.', 'Volgina A.')

In [117]:
time = "21:35"

date = "Jan 31"


line = 18.5


found_p1, found_p2, p_game = modelMElo.predict_game(player1, player2)
found_p1, found_p2, p_set = modelMElo.predict_set(player1, player2)
p_O, p_U = find_OU(player1, player2)
winner = player1 if p_game > 0.5 else player2
p_set_win = p_set if winner == player1 else 1 - p_set
p_game_win = p_game if winner == player1 else 1 - p_game
p_OU = p_O if p_O > p_U else p_U
OU = "O" if p_O > p_U else "U"

In [118]:
if (found_p1 and found_p2):
    df_row = pd.DataFrame({
                         "Time": [time],
                         "Player1": [player1], "Player2": [player2], "Winner": [winner],
                         "Set": [f"{p_set_win*100:.1f}%"], "Game": [f"{p_game_win*100:.1f}%"],
                         "O/U": [f"{OU} {line}: {p_OU*100:.1f}%"]})

    print(tabulate(df_row, headers = 'keys', tablefmt = 'psql', showindex=False))


+--------+-------------+------------+-------------+-------+--------+---------------+
| Time   | Player1     | Player2    | Winner      | Set   | Game   | O/U           |
|--------+-------------+------------+-------------+-------+--------+---------------|
| 21:35  | Gordeets M. | Volgina A. | Gordeets M. | 53.5% | 56.6%  | O 18.5: 51.8% |
+--------+-------------+------------+-------------+-------+--------+---------------+


In [119]:
p_U, p_O

(0.4820232229268789, 0.5179767770731206)

In [120]:
df_lt.append(df_row)

In [121]:
df_print = pd.concat(df_lt)
df_print.index = range(len(df_lt))
print(tabulate(df_print, headers = 'keys', tablefmt = 'psql', showindex=False))
# print(tabulate(df_print, headers = 'keys', tablefmt = 'psql'))

+--------+---------------------+-------------+-------------+-------+--------+---------------+
| Time   | Player1             | Player2     | Winner      | Set   | Game   | O/U           |
|--------+---------------------+-------------+-------------+-------+--------+---------------|
| 14:35  | Hordynska-Sheiko A. | Ivchenko O. | Ivchenko O. | 69.4% | 82.9%  | U 18.5: 52.3% |
| 15:05  | Lapa H.             | Volgina A.  | Volgina A.  | 55.3% | 59.8%  | O 18.5: 51.6% |
| 15:35  | Lifanova O.         | Gordeets M. | Lifanova O. | 50.3% | 50.6%  | O 18.5: 51.9% |
| 16:05  | Hordynska-Sheiko A. | Volgina A.  | Volgina A.  | 65.7% | 77.6%  | U 18.5: 50.8% |
| 16:35  | Gordeets M.         | Ivchenko O. | Ivchenko O. | 51.5% | 52.8%  | O 18.5: 51.9% |
| 17:05  | Lapa H.             | Lifanova O. | Lifanova O. | 58.9% | 66.3%  | O 18.5: 51.1% |
| 17:35  | Hordynska-Sheiko A. | Gordeets M. | Gordeets M. | 68.7% | 81.9%  | U 18.5: 51.9% |
| 18:05  | Lifanova O.         | Volgina A.  | Lifanova O. |