In [79]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, f1_score, precision_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

In [94]:
def find_players_many_points(df):
    """
    Find players with at least x points in train and y points in test.
    """
    df_res = df.groupby('server').filter(lambda x: (x['data_split_2024'] == 'train').sum() > 3650 and (x['data_split_2024'] == 'test').sum() > 100)
    return df_res['server'].unique()

In [81]:
df = pd.read_csv('/Users/anika/Desktop/BEM:EC 120/generated_datasets/atp_features_0518.csv')
df = df.drop(['server_bp_saved_recent_matches', 'returner_depth_recent_matches_1st/2nd', 'returner_depth_recent_matches_wide', 'returner_depth_recent_matches_T', 'returner_depth_recent_matches_body'], axis=1)
df = df.dropna()
df = df[df['first_shot_loc'] != 'unknown']
df = df[df['first_serve_attempt'] != 'unknown']
df = df[df['serve_loc'] != 'unknown']
print(len(df))

# If any values have a space in them, make it a _
df = df.replace(' ', '_', regex=True)

# make everything lowercase
df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)
df = df[df['first_shot_loc'] != 'no_first_shot']
print(len(df))

# check if first/second serve by seeing if 'first_serve_attempt' is 'made_first_serve'
df['serve_type'] = df.apply(lambda row: 1 if row['first_serve_attempt'] == 'made_first_serve' else 2, axis=1)

df['s+1'] = df['serve_loc'] + '_' + df['first_shot_loc']
df['side_s+1'] = df.apply(lambda row: f"{row['serve_loc']}_{row['first_shot_loc']}_{row['court_side']}", axis=1)
df['1st/2nd_s+1'] = df['serve_loc'] + '_' + df['first_shot_loc'] + '_' + df['serve_type'].astype(str) + '_' + df['court_side']
df['side_serve_loc'] = df['serve_loc'] + '_on_' + df['court_side']
df['side_first_shot_loc'] = df['first_shot_loc'] + '_on_' + df['court_side']
df['1st/2nd_serve_loc'] = df['serve_loc'] + '_on_' + df['court_side'] + '_' + df['serve_type'].astype(str)
df['1st/2nd_first_shot_loc'] = df['first_shot_loc'] + '_on_' + df['court_side'] + '_' + df['serve_type'].astype(str)
df['date'] = pd.to_datetime(df['match_id'].str.split('-').str[0], format='%Y%m%d', errors='coerce')

541611
358650


In [83]:
def add_rolling_win_rates(df, n=5):
    """
    Add rolling win rates based on s+1, serve_loc, and first_shot_loc.
    """
    df = df.sort_values(by=['server', 'date']).copy()

    def compute_rolling_win_pct(df, group_col, prefix):
        summary = (
            df.groupby(['server', group_col, 'match_id'])
              .agg(total_pts=('won_pt', 'count'), won_pts=('won_pt', 'sum'))
              .reset_index()
        )
        summary['rolling_win_pct'] = (
            summary
            .groupby(['server', group_col])
            .apply(lambda g: g[['won_pts', 'total_pts']]
                   .shift()
                   .rolling(n, min_periods=1)
                   .sum()
                   .eval('won_pts / total_pts'))
            .reset_index(level=[0,1], drop=True)
        )
        pivoted = (
            summary.pivot(index=['server', 'match_id'], 
                          columns=group_col, 
                          values='rolling_win_pct')
                   .reset_index()
        )
        pivoted.columns = ['server', 'match_id'] + [f'{prefix}_{c}' for c in pivoted.columns[2:]]
        return pivoted

    # Compute for each feature
    s1_pivot = compute_rolling_win_pct(df, '1st/2nd_s+1', 'win_pct_s1')
    print('done1')
    serve_loc_pivot = compute_rolling_win_pct(df, '1st/2nd_serve_loc', 'win_pct_serve')
    print('done2')
    shot_loc_pivot = compute_rolling_win_pct(df, '1st/2nd_first_shot_loc', 'win_pct_first_shot')
    print('done3')

    # Drop old win_pct_* columns if any
    df = df.drop(columns=[col for col in df.columns if col.startswith('win_pct_')], errors='ignore')

    # Merge all
    df = df.merge(s1_pivot, on=['server', 'match_id'], how='left')
    df = df.merge(serve_loc_pivot, on=['server', 'match_id'], how='left')
    df = df.merge(shot_loc_pivot, on=['server', 'match_id'], how='left')

    # Fill NaNs
    win_pct_cols = [col for col in df.columns if col.startswith('win_pct_')]
    df[win_pct_cols] = df[win_pct_cols].fillna(0.5)

    return df

In [84]:
df = add_rolling_win_rates(df)

done1
done2
done3


In [85]:
def add_interaction_features(df):
    """
    Add features based on interaction between categorical variables for locations
    for the serve and first shot.
    """

    df = df.copy()

    # Columns to one-hot encode

    one_hot_cols = [
        'surface', 'hand_combo', 'court_side', 'first_serve_attempt',
        'serve_loc', 'first_shot_loc', 'is_break_point', 'is_game_point',
    ]

    # One-hot encode categorical variables
    df = pd.get_dummies(df, columns=one_hot_cols)

    # Scale numerical variables
    scaler = StandardScaler()
    numerical_to_scale = ['game_advantage', 'set_advantage', 'point_advantage', 'pts_in_game', 'pts_in_match', 
                          'height', 'height_difference', 'rally_lengths_won_pts',
                          'rally_lengths_lost_pts', 'win_streak_server', 'win_streak_opponent']
    
    df[numerical_to_scale] = scaler.fit_transform(df[numerical_to_scale])

    mean_return = df[['wide_returner_win_recent_matches', 
                    'body_returner_win_recent_matches',
                    'T_returner_win_recent_matches']].mean(axis=1)
        
    df['wide_return_advantage'] = df['serve_loc_wide'] * (df['wide_returner_win_recent_matches'] - mean_return)
    df['body_return_advantage'] = df['serve_loc_body'] * (df['body_returner_win_recent_matches'] - mean_return)
    df['T_return_advantage'] = df['serve_loc_t'] * (df['T_returner_win_recent_matches'] - mean_return)
    
    df['returner_success_on_this_serve_recent_matches'] = (
        df['serve_loc_wide'] * df['wide_return_advantage'] + 
        df['serve_loc_body'] * df['body_return_advantage'] +
        df['serve_loc_t'] * df['body_return_advantage']
    )

    df['interaction_returns_wide'] = df['serve_loc_wide'] * df['wide_returner_win_recent_matches']
    df['interaction_returns_body'] = df['serve_loc_body'] * df['body_returner_win_recent_matches']
    df['interaction_returns_T'] = df['serve_loc_t'] * df['T_returner_win_recent_matches']

    df['opponent_error_rate_from_+1_loc'] = (
        df['first_shot_loc_deuce_court'] * df['op_pct_errors_deuce_side'] + 
        df['first_shot_loc_ad_court'] * df['op_pct_errors_ad_side'] +
        df['first_shot_loc_middle'] * df['op_pct_errors_middle']
    )

    df['interaction_+1_deuce'] = df['first_shot_loc_deuce_court'] * df['op_pct_errors_deuce_side']
    df['interaction_+1_ad'] = df['first_shot_loc_ad_court'] * df['op_pct_errors_ad_side']
    df['interaction_+1_middle'] = df['first_shot_loc_middle'] * df['op_pct_errors_middle']

    df['opponent_pct_shots_are_errors_from_+1_loc'] = (
        df['first_shot_loc_deuce_court'] * df['op_pct_shots_errors_deuce_side'] + 
        df['first_shot_loc_ad_court'] * df['op_pct_shots_errors_ad_side'] +
        df['first_shot_loc_middle'] * df['op_pct_shots_errors_middle']
    )

    df['interaction_+1_deuce_shots'] = df['first_shot_loc_deuce_court'] * df['op_pct_shots_errors_deuce_side']
    df['interaction_+1_ad_shots'] = df['first_shot_loc_ad_court'] * df['op_pct_shots_errors_ad_side']
    df['interaction_+1_middle_shots'] = df['first_shot_loc_middle'] * df['op_pct_shots_errors_middle']
    
    df['rolling_server_success_on_this_serve'] = (
        df['serve_loc_wide'] * df['wide_server_win_rate'] +
        df['serve_loc_body'] * df['body_server_win_rate'] +
        df['serve_loc_t'] * df['T_server_win_rate']
    )

    mean_serve = df[['wide_server_win_rate', 'body_server_win_rate', 'T_server_win_rate']].mean(axis=1)

    df['interaction_serve_wide'] = df['serve_loc_wide'] * df['wide_server_win_rate']
    df['interaction_serve_body'] = df['serve_loc_body'] * df['body_server_win_rate']
    df['interaction_serve_T'] = df['serve_loc_t'] * df['T_server_win_rate']

    df['wide_server_advantage'] = df['serve_loc_wide'] * (df['wide_server_win_rate'] - mean_serve)
    df['body_server_advantage'] = df['serve_loc_body'] * (df['body_server_win_rate'] - mean_serve)
    df['T_server_advantage'] = df['serve_loc_t'] * (df['T_server_win_rate'] - mean_serve)

    df['gp_won_recent_matches'] = df['is_game_point_True'] * df['server_gp_won_recent_matches']
    df['s+1_success_rate_recent_matches'] = df.apply(lambda row: row[f'win_pct_s1_{row["1st/2nd_s+1"]}'], axis=1)
    df['serve_success_rate_recent_matches'] = df.apply(lambda row: row[f'win_pct_serve_{row["1st/2nd_serve_loc"]}'], axis=1)
    df['first_shot_success_rate_recent_matches'] = df.apply(lambda row: row[f'win_pct_first_shot_{row["1st/2nd_first_shot_loc"]}'], axis=1)

    # Label encode server and opponent (for embeddings)
    label_encoders = {}
    for col in ['server', 'opponent', 'side_serve_loc', 'side_first_shot_loc', 'side_s+1', '1st/2nd_first_shot_loc',
                '1st/2nd_s+1', '1st/2nd_serve_loc']:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

    return df, label_encoders

In [95]:
players_lst = find_players_many_points(df)
print(len(players_lst))
df = df[df['server'].isin(players_lst)]
df_preprocessed, label_encoders = add_interaction_features(df)

15


In [129]:
# Features and target
categorical_cols = ['side_serve_loc', 'side_first_shot_loc']

# After one-hot encoding, collect all new columns
one_hot_prefixes = ['surface_']

numerical_cols = [col for col in df_preprocessed.columns if any(col.startswith(prefix) for prefix in one_hot_prefixes)] + [
    'is_break_point_True',
    'is_game_point_True',
    'first_serve_attempt_made_first_serve',
    'court_side_deuce',
    'point_advantage',
    'set_advantage',
    'game_advantage',
    'pts_in_match',
    'win_streak_server',
    'height',
    'height_difference',
    '1st/2nd_server_win_rate',
    'returner_win_1st/2nd_recent_matches',
    'same_serve_last',
    'same_shot_last',
    's1_success_rate_whole_match',
    'good_pts_server_minus_opp',
    'wide_return_advantage', 'body_return_advantage', 'T_return_advantage',
    'interaction_+1_deuce', 'interaction_+1_ad', 'opponent_error_rate_from_middle',
    'interaction_serve_wide', 'interaction_serve_body', 'interaction_serve_T',
    'gp_won_recent_matches',
    's+1_success_rate_recent_matches',
    'serve_success_rate_recent_matches',
]

target_cols = ['won_pt']


In [130]:
class TennisDataset(Dataset):
    def __init__(self, df, categorical_cols, numerical_cols, target_cols):
        self.categorical_data = df[categorical_cols].values.astype(np.int64)
        self.numerical_data = df[numerical_cols].values.astype(np.float32)
        self.targets = df[target_cols].values.astype(np.float32)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        x_cat = torch.tensor(self.categorical_data[idx], dtype=torch.long)
        x_num = torch.tensor(self.numerical_data[idx], dtype=torch.float32)
        y = torch.tensor(self.targets[idx], dtype=torch.float32)  # shape: [num_targets]
        return x_cat, x_num, y


In [131]:
class MultiTaskServePlusOneModel(nn.Module):
    def __init__(self, embedding_sizes, num_numerical, hidden_units):
        super().__init__()
        self.embeddings = nn.ModuleList([
            nn.Sequential(
                nn.Embedding(categories, size),
            ) for categories, size in embedding_sizes
        ])
        self.emb_dim = sum(e[0].embedding_dim for e in self.embeddings)

        self.shared_fc = nn.Sequential(
            nn.Linear(self.emb_dim + num_numerical, hidden_units[0]),
            nn.ReLU(),
            nn.Dropout(0.2),  # Add dropout
            nn.Linear(hidden_units[0], hidden_units[1]),
            nn.ReLU(),
        )      

        # One output head per task
        self.output_won = nn.Linear(hidden_units[1], 1)

    def forward(self, x_cat, x_num):
        if x_cat is not None:
            emb_outs = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
            emb_cat = torch.cat(emb_outs, dim=1)
        else:
            emb_cat = torch.zeros(x_num.size(0), self.emb_dim, device=x_num.device)

        x = torch.cat([emb_cat, x_num], dim=1)
        x = self.shared_fc(x)
        return self.output_won(x)

In [132]:
def train_model(model, dataloader, optimizer, criterion_won, device='cpu'):
    model.train().to(device)  # Ensure model is on correct device
    total_loss = 0
    total_samples = 0

    for x_cat, x_num, y in dataloader:
        x_cat, x_num, y = x_cat.to(device), x_num.to(device), y.to(device)
        optimizer.zero_grad()

        # Forward pass
        if x_cat.size(1) == 0: # check is x_cat tensor is empty
            out_won = model(None, x_num)
        else:
            out_won = model(x_cat, x_num)

        # Calculate loss
        loss = criterion_won(out_won.squeeze(), y[:, 0])

        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        # Accumulate loss
        total_loss += loss.item() * x_cat.size(0)  # Weight by batch size
        total_samples += x_cat.size(0)

    return total_loss / total_samples  # Return average loss per sample


def evaluate_model(model, dataloader, criterion_won, device='cpu'):
    model.eval().to(device)
    total_loss = 0
    total_samples = 0
    
    won_preds = []
    won_targets = []
    
    with torch.no_grad():
        for x_cat, x_num, y in dataloader:
            x_cat, x_num, y = x_cat.to(device), x_num.to(device), y.to(device)
            if x_cat.size(1) == 0: # check is x_cat tensor is empty
                out_won = model(None, x_num)
            else:
                out_won = model(x_cat, x_num)

            # Ensure shapes are compatible
            if out_won.dim() == 0:  # If out_won is a scalar
                out_won = out_won.unsqueeze(0)  # Add batch dimension
            else:
                out_won = out_won.squeeze()  # Remove extra dimensions if necessary

            y_target = y[:, 0].squeeze()  # Ensure target has the correct shape
            
            loss = criterion_won(out_won, y_target)
            total_loss += loss.item() * x_cat.size(0)
            total_samples += x_cat.size(0)
            
            won_preds.extend(torch.sigmoid(out_won).view(-1).cpu().tolist())
            won_targets.extend(y_target.view(-1).cpu().tolist())
    
    avg_loss = total_loss / total_samples
    
    won_metrics = calculate_binary_metrics(
        np.array(won_preds),
        np.array(won_targets),
        threshold=0.5
    )
        
    return {
        'loss': avg_loss,
        'won_metrics': won_metrics,
    }

def calculate_binary_metrics(preds, targets, threshold=0.5):
    # Apply threshold to get binary predictions
    binary_preds = (preds >= threshold).astype(int)
    
    # Calculate metrics
    accuracy = accuracy_score(targets, binary_preds) # Accuracy is the ratio of correct predictions to total predictions
    precision = precision_score(targets, binary_preds, zero_division=0) # Precision (combos that were predicted to win actually won) 
    recall = recall_score(targets, binary_preds, zero_division=0) # Recall (combos that won were actually predicted to win)
    f1 = f1_score(targets, binary_preds, zero_division=0) # F1 score = 2 * (precision * recall) / (precision + recall)
    roc_auc = roc_auc_score(targets, preds) # ability to distinguish between classes
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc,
        'pred_probs': preds,   # Raw probabilities for further analysis
        'targets': targets,    # Ground truth labels
        'ratio': np.mean(binary_preds)
    }

In [133]:
def train_model_full(categorical_cols, numerical_cols, target_cols, embedding_size, lr, batch_size, epochs, hidden_units = [64, 32], pos_weight = True, notes = ''):
    embedding_sizes = [(len(label_encoders[col].classes_), min(embedding_size, (len(label_encoders[col].classes_) + 1) // 2))
                    for col in categorical_cols]

    # Create dataset and dataloader
    df_train = df_preprocessed[df_preprocessed['data_split_2024'] == 'train'].drop('data_split_2024', axis=1)
    df_fine_tune = df_preprocessed[df_preprocessed['data_split_2024'] == 'test'].drop('data_split_2024', axis=1)

    dataset_train = TennisDataset(df_train, categorical_cols, numerical_cols, target_cols)
    dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

    dataset_fine_tune = TennisDataset(df_fine_tune, categorical_cols, numerical_cols, target_cols)
    dataloader_fine_tune = DataLoader(dataset_fine_tune, batch_size=batch_size, shuffle=True)

    model = MultiTaskServePlusOneModel(embedding_sizes, num_numerical=len(numerical_cols), hidden_units=hidden_units)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)  # L2 regularization
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)

    def init_weights(m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)

    model.apply(init_weights)

    criterion_won = nn.BCEWithLogitsLoss()

    # Because we have a sligtly unbalanced dataset
    if pos_weight:
        pos_ratio = df_train[target_cols[0]].mean()
        neg_ratio = 1 - pos_ratio
        pos_weight = torch.tensor(neg_ratio / pos_ratio)
        criterion_won = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    train_losses, val_losses = [], []
    for epoch in range(epochs):
        train_loss = train_model(model, dataloader_train, optimizer, criterion_won)
        val_results = evaluate_model(model, dataloader_fine_tune, criterion_won)
        val_loss = val_results['loss']
        val_metrics = val_results['won_metrics']

        scheduler.step(val_loss)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Accuracy = {val_metrics['accuracy']:.4f}, Val Precision = {val_metrics['precision']:.4f}, Val Recall = {val_metrics['recall']:.4f}, Val F1 = {val_metrics['f1']:.4f}, Val Ratio = {val_metrics['ratio']:.4f}")

    return model

In [134]:
def train_and_evaluate(player, categorical_cols, numerical_cols, target_cols, embedding_size, lr, batch_size, epochs, hidden_units = [64, 32], pos_weight = True, notes = '', results_dict = {}, model = None):
    df_player = df_preprocessed[df_preprocessed['server'] == player]

    embedding_sizes = [(len(label_encoders[col].classes_), min(embedding_size, (len(label_encoders[col].classes_) + 1) // 2))
                    for col in categorical_cols]

    # Create dataset and dataloader
    print(len(df_player))
    df_train = df_player[df_player['data_split_2024'] == 'train'].drop('data_split_2024', axis=1)
    df_test = df_player[df_player['data_split_2024'] == 'test'].drop('data_split_2024', axis=1)
    df_test_fs = df_test[df_test['first_serve_attempt_made_first_serve'] == 1]
    df_test_ss = df_test[df_test['first_serve_attempt_made_first_serve'] == 0]
    df_fine_tune = df_player[df_player['data_split_2024'] == 'test'].drop('data_split_2024', axis=1)

    dataset_train = TennisDataset(df_train, categorical_cols, numerical_cols, target_cols)
    dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

    dataset_test = TennisDataset(df_test, categorical_cols, numerical_cols, target_cols)
    dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

    dataset_test_fs = TennisDataset(df_test_fs, categorical_cols, numerical_cols, target_cols)
    dataloader_test_fs = DataLoader(dataset_test_fs, batch_size=batch_size, shuffle=False)

    dataset_test_ss = TennisDataset(df_test_ss, categorical_cols, numerical_cols, target_cols)
    dataloader_test_ss = DataLoader(dataset_test_ss, batch_size=batch_size, shuffle=False)

    dataset_fine_tune = TennisDataset(df_fine_tune, categorical_cols, numerical_cols, target_cols)
    dataloader_fine_tune = DataLoader(dataset_fine_tune, batch_size=batch_size, shuffle=True)

    criterion_won = nn.BCEWithLogitsLoss()

    if model is None:
        model = MultiTaskServePlusOneModel(embedding_sizes, num_numerical=len(numerical_cols), hidden_units=hidden_units)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # Because we have a sligtly unbalanced dataset
        if pos_weight:
            pos_ratio = df_train[target_cols[0]].mean()
            neg_ratio = 1 - pos_ratio
            pos_weight = torch.tensor(neg_ratio / pos_ratio)
            criterion_won = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

        train_losses, val_losses = [], []
        for epoch in range(epochs):
            train_loss = train_model(model, dataloader_train, optimizer, criterion_won)
            val_results = evaluate_model(model, dataloader_fine_tune, criterion_won)
            val_loss = val_results['loss']
            val_metrics = val_results['won_metrics']

            train_losses.append(train_loss)
            val_losses.append(val_loss)

        
            print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Accuracy = {val_metrics['accuracy']:.4f}, Val Precision = {val_metrics['precision']:.4f}, Val Recall = {val_metrics['recall']:.4f}, Val F1 = {val_metrics['f1']:.4f}, Val Ratio = {val_metrics['ratio']:.4f}")

    test_results_all = evaluate_model(model, dataloader_test, criterion_won)
    test_results_fs = evaluate_model(model, dataloader_test_fs, criterion_won)
    test_results_ss = evaluate_model(model, dataloader_test_ss, criterion_won)
    test_loss = test_results_all['loss']
    test_metrics = test_results_all['won_metrics']

    print(f"Test Loss = {test_loss:.4f}, Test Accuracy = {test_metrics['accuracy']:.4f}, Test Precision = {test_metrics['precision']:.4f}, Test Recall = {test_metrics['recall']:.4f}, Test F1 = {test_metrics['f1']:.4f}, Test Ratio = {test_metrics['ratio']:.4f}")

    results_dict[player]['test_metrics']['all'] = test_results_all['won_metrics']
    results_dict[player]['test_metrics']['fs'] = test_results_fs['won_metrics']
    results_dict[player]['test_metrics']['ss'] = test_results_ss['won_metrics']


In [135]:
# Model Training
results_dict = defaultdict(lambda : defaultdict(lambda: defaultdict(list))) # defaultdict
players_lst = find_players_many_points(df_preprocessed)

model = train_model_full(categorical_cols, numerical_cols, target_cols, embedding_size=32, lr=1e-4,
                         batch_size=32, epochs=10, hidden_units=[128, 16], pos_weight=False, notes = '')

Epoch 1: Train Loss = 0.6933, Val Loss = 0.6777, Val Accuracy = 0.5763, Val Precision = 0.5811, Val Recall = 0.9432, Val F1 = 0.7191, Val Ratio = 0.9335
Epoch 2: Train Loss = 0.6833, Val Loss = 0.6753, Val Accuracy = 0.5791, Val Precision = 0.5833, Val Recall = 0.9387, Val F1 = 0.7195, Val Ratio = 0.9255
Epoch 3: Train Loss = 0.6817, Val Loss = 0.6740, Val Accuracy = 0.5838, Val Precision = 0.5878, Val Recall = 0.9254, Val F1 = 0.7189, Val Ratio = 0.9055
Epoch 4: Train Loss = 0.6810, Val Loss = 0.6734, Val Accuracy = 0.5836, Val Precision = 0.5912, Val Recall = 0.8950, Val F1 = 0.7120, Val Ratio = 0.8707
Epoch 5: Train Loss = 0.6802, Val Loss = 0.6734, Val Accuracy = 0.5841, Val Precision = 0.5933, Val Recall = 0.8801, Val F1 = 0.7088, Val Ratio = 0.8532
Epoch 6: Train Loss = 0.6796, Val Loss = 0.6722, Val Accuracy = 0.5854, Val Precision = 0.5931, Val Recall = 0.8893, Val F1 = 0.7116, Val Ratio = 0.8624
Epoch 7: Train Loss = 0.6796, Val Loss = 0.6725, Val Accuracy = 0.5855, Val Precis

In [136]:
# Evaluate the model for each player
for player in players_lst:
    train_and_evaluate(player, categorical_cols, numerical_cols, target_cols, embedding_size=32, lr=1e-4, batch_size=32, epochs=10, hidden_units=[128, 16],
                       pos_weight=False, results_dict = results_dict, model = model)

6320
Test Loss = 0.6678, Test Accuracy = 0.6001, Test Precision = 0.6098, Test Recall = 0.8941, Test F1 = 0.7251, Test Ratio = 0.8647
5987
Test Loss = 0.6609, Test Accuracy = 0.6084, Test Precision = 0.6180, Test Recall = 0.8800, Test F1 = 0.7261, Test Ratio = 0.8399
6064
Test Loss = 0.6839, Test Accuracy = 0.5673, Test Precision = 0.5682, Test Recall = 0.7576, Test F1 = 0.6494, Test Ratio = 0.7051
4534
Test Loss = 0.6822, Test Accuracy = 0.5390, Test Precision = 0.5400, Test Recall = 0.7687, Test F1 = 0.6344, Test Ratio = 0.7407
7374
Test Loss = 0.6696, Test Accuracy = 0.5881, Test Precision = 0.6067, Test Recall = 0.8708, Test F1 = 0.7151, Test Ratio = 0.8522
5524
Test Loss = 0.6697, Test Accuracy = 0.5817, Test Precision = 0.5947, Test Recall = 0.8666, Test F1 = 0.7054, Test Ratio = 0.8420
10227
Test Loss = 0.6769, Test Accuracy = 0.5754, Test Precision = 0.5856, Test Recall = 0.8030, Test F1 = 0.6773, Test Ratio = 0.7610
8297
Test Loss = 0.6909, Test Accuracy = 0.5640, Test Precisi

In [137]:
# Evaluation Results
f1_scores_all = [results_dict[player]['test_metrics']['all']['f1'] for player in players_lst]
f1_scores_fs = [results_dict[player]['test_metrics']['fs']['f1'] for player in players_lst]
f1_scores_ss = [results_dict[player]['test_metrics']['ss']['f1'] for player in players_lst]

acc_scores_all = [results_dict[player]['test_metrics']['all']['accuracy'] for player in players_lst]
acc_scores_fs = [results_dict[player]['test_metrics']['fs']['accuracy'] for player in players_lst]
acc_scores_ss = [results_dict[player]['test_metrics']['ss']['accuracy'] for player in players_lst]

print(f"Mean F1 score (all): {np.mean(f1_scores_all):.4f}, Std F1 score (all): {np.std(f1_scores_all):.4f}")
print(f"Mean Accuracy score (all): {np.mean(acc_scores_all):.4f}, Std Accuracy score (all): {np.std(acc_scores_all):.4f}")

print(f"Mean F1 score (fs): {np.mean(f1_scores_fs):.4f}, Std F1 score (fs): {np.std(f1_scores_fs):.4f}")
print(f"Mean Accuracy score (fs): {np.mean(acc_scores_fs):.4f}, Std Accuracy score (fs): {np.std(acc_scores_fs):.4f}")

print(f"Mean F1 score (ss): {np.mean(f1_scores_ss):.4f}, Std F1 score (ss): {np.std(f1_scores_ss):.4f}")
print(f"Mean Accuracy score (ss): {np.mean(acc_scores_ss):.4f}, Std Accuracy score (ss): {np.std(acc_scores_ss):.4f}")


Mean F1 score (all): 0.6952, Std F1 score (all): 0.0359
Mean Accuracy score (all): 0.5808, Std Accuracy score (all): 0.0266
Mean F1 score (fs): 0.7423, Std F1 score (fs): 0.0282
Mean Accuracy score (fs): 0.6068, Std Accuracy score (fs): 0.0330
Mean F1 score (ss): 0.6039, Std F1 score (ss): 0.0719
Mean Accuracy score (ss): 0.5427, Std Accuracy score (ss): 0.0327
