In [1]:
import numpy as np
import pandas as pd
import random
import itertools
from scipy.optimize import linear_sum_assignment
from joblib import Parallel, delayed
import time
import multiprocessing
import sys
import os
sys.path.append(os.path.abspath("../scripts"))
from PickerDefinitions import Picker, BestOddsPicker, MaxOddsPicker, MaxOddsWithDecayPicker, SlidingWindowPicker, TopKOddsPicker

# Average Weeks Survived

In this notebook I will test the various picking strategies to see the average number of weeks that each one survives across real and simulated seasons. We will do tests on relatively small numbers of simulations as we refine our hyperparameters and test our pickers before running the final strategies on many simulations to get our final results

In [2]:
real_historical_seasons = pd.read_csv("../data/cleaned_odds.csv")
simulated_nfl_histories = pd.read_csv("../data/simulated_nfl_histories")
simulated_upcoming_season = pd.read_csv("../data/simulated_upcoming_season")
season_2024 = real_historical_seasons[real_historical_seasons['Season'] == 2024]

dfs = {'2024 Season': season_2024,
      'Real History': real_historical_seasons,
      'Simulated Histories': simulated_nfl_histories,
      'Simulations of 2025 Season': simulated_upcoming_season}

In [3]:
def evaluate_picker_over_history(picker_class, historical_data, **kwargs):
    """
    Evaluates a picker’s performance over multiple seasons of historical data.

    Parameters:
        picker_class (class): A picker class implementing make_season_picks() and evaluate_performance().
        historical_data (pd.DataFrame): DataFrame containing a 'Season' column and other required game data.
        **kwargs: Additional keyword arguments passed to the picker class constructor.

    Returns:
        float: The average number of weeks survived across all seasons (rounded to 3 decimal places).
    """
    seasons = sorted(np.unique(historical_data['Season']))
    num_weeks_made_per_season = {}
    for s in seasons:
        this_season = historical_data[historical_data['Season'] == s]
        this_picker = picker_class(this_season, **kwargs)
        this_picker.make_season_picks()
        num_weeks_made = this_picker.evaluate_performance()
        num_weeks_made_per_season[s] = num_weeks_made
    #print(num_weeks_made_per_season)
    return np.round(np.mean(list(num_weeks_made_per_season.values())), 3)

def EvaluateAverageWeeksSurvived(picker_class, df_names_to_df_dict, **kwargs):
    """
    Evaluates average weeks survived by a picker across multiple datasets.

    Parameters:
        picker_class (class): The picker class to be tested.
        df_names_to_df_dict (dict): Dictionary mapping dataset names to their corresponding DataFrames.
        **kwargs: Additional keyword arguments passed to the picker evaluation function.

    Returns:
        dict: A mapping from dataset name to average weeks survived.
    """
    performances_dict = {}
    print(picker_class.__name__, 'performance across different test dfs')
    for df_name, df in df_names_to_df_dict.items():
        perf = evaluate_picker_over_history(picker_class, df, **kwargs)
        performances_dict[df_name] = perf
        print(f'{df_name}: {perf}')
    return performances_dict


def test_hyperparameters(picker, hp_names_to_options_dict, dfs=dfs):
    """
    Tests all combinations of specified hyperparameters for a picker across datasets.

    Parameters:
        picker (class): The picker class to test.
        hp_names_to_options_dict (dict): Dictionary mapping hyperparameter names to lists of possible values.
        dfs (dict): Dictionary mapping dataset names to DataFrames (defaults to `dfs`).

    Returns:
        tuple:
            - dict: Mapping from hyperparameter combinations (as tuples) to performance dicts.
            - dict: Best hyperparameters and performance per dataset.
    """
    combos = []
    keys = hp_names_to_options_dict.keys()
    values = hp_names_to_options_dict.values()
    for combo in itertools.product(*values):
        hp_combo = dict(zip(keys, combo))
        combos.append(hp_combo)
        
    performances_across_hps = {}
    for hps in combos:
        print(hps)
        performances = EvaluateAverageWeeksSurvived(picker, dfs, **hps)
        performances_across_hps[tuple(hps.values())] = performances
        print('')

    best_performers = {name: {"performance": -float("inf"), "hyperparameters": None} for name in dfs.keys()}

    for hps, perf_dict in performances_across_hps.items():
        for name, perf in perf_dict.items():
            if perf > best_performers[name]["performance"]:
                best_performers[name] = {
                    "performance": perf,
                    "hyperparameters": dict(zip(keys, hps))
                }

    return performances_across_hps, best_performers

## Random Picker

We will first test our random picker. This will serve as a baseline performance and will also provide randomness in our head to head simulations.

In [4]:
EvaluateAverageWeeksSurvived(Picker, dfs)

Picker performance across different test dfs
2024 Season: 0.0
Real History: 0.474
Simulated Histories: 0.978
Simulations of 2025 Season: 1.003


{'2024 Season': 0.0,
 'Real History': 0.474,
 'Simulated Histories': 0.978,
 'Simulations of 2025 Season': 1.003}

Our random picker survives one week on average, and this poor performance it to be expected. We will now test a more intelligent picker that picks the team with the best odds each week out of those that it hasn't picked yet.

## Best Odds Picker

In [5]:
EvaluateAverageWeeksSurvived(BestOddsPicker, dfs)

BestOddsPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.421
Simulated Histories: 4.373
Simulations of 2025 Season: 3.346


{'2024 Season': 0.0,
 'Real History': 4.421,
 'Simulated Histories': 4.373,
 'Simulations of 2025 Season': 3.346}

As expected, picking the best odds each week performs much better than random picking. From here we will look to improve on this basic strategy. 

## TopKOddsPicker

In [6]:
EvaluateAverageWeeksSurvived(TopKOddsPicker, dfs)

TopKOddsPicker performance across different test dfs
2024 Season: 0.0
Real History: 5.316
Simulated Histories: 3.517
Simulations of 2025 Season: 2.764


{'2024 Season': 0.0,
 'Real History': 5.316,
 'Simulated Histories': 3.517,
 'Simulations of 2025 Season': 2.764}

While this picker may perform worse in terms of average weeks survived, my thinking is that it may outperform some other pickers when put head to head as it will avoid picking the same winner that a lot of the others will pick due to randomness, resulting in more outright victories and more winnings. 

We will test different values of k to determine the value that maximizes survival length 

In [7]:
ks = {'k': [2, 3, 4, 5]}
best_hps_dict= {}
_, best_ks = test_hyperparameters(TopKOddsPicker, ks, dfs=dfs)
best_hps_dict['TopKOddsPicker'] = best_ks
best_ks

{'k': 2}
TopKOddsPicker performance across different test dfs
2024 Season: 1.0
Real History: 3.842
Simulated Histories: 3.867
Simulations of 2025 Season: 3.036

{'k': 3}
TopKOddsPicker performance across different test dfs
2024 Season: 1.0
Real History: 2.895
Simulated Histories: 3.471
Simulations of 2025 Season: 2.774

{'k': 4}
TopKOddsPicker performance across different test dfs
2024 Season: 0.0
Real History: 2.632
Simulated Histories: 3.141
Simulations of 2025 Season: 2.585

{'k': 5}
TopKOddsPicker performance across different test dfs
2024 Season: 1.0
Real History: 2.947
Simulated Histories: 2.986
Simulations of 2025 Season: 2.442



{'2024 Season': {'performance': 1.0, 'hyperparameters': {'k': 2}},
 'Real History': {'performance': 3.842, 'hyperparameters': {'k': 2}},
 'Simulated Histories': {'performance': 3.867, 'hyperparameters': {'k': 2}},
 'Simulations of 2025 Season': {'performance': 3.036,
  'hyperparameters': {'k': 2}}}

## MaxOdds Picker

This picker uses the hungarian algorithm to calculate the order of picks that maximizes the joint probability of 'surviving' the entire season and picks each week based on that order.

In [8]:
EvaluateAverageWeeksSurvived(MaxOddsPicker, dfs)

MaxOddsPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906


{'2024 Season': 0.0,
 'Real History': 4.737,
 'Simulated Histories': 4.358,
 'Simulations of 2025 Season': 2.906}

## MaxOddsWithDecayPicker

We will introduce a decay factor to our MaxOddsPicker. The goal of this is to decrease the value of later weeks, meaning that choosing higher odds in earlier weeks would be favored over later weeks. This is important because, while increasing the probability of surviving the whole season is good, none of that matters if you get out early on and another player in the league is still alive.

We will test different hyperparameters and choose the best performing decay factor as our default in our class.

In [9]:
decay_factors = {'decay_factor': [0.8, 0.85, 0.9, 0.95, 0.98]}
_, best_decay = test_hyperparameters(MaxOddsWithDecayPicker, decay_factors)
best_hps_dict['MaxOddsWithDecayPicker'] = best_decay

best_decay

{'decay_factor': 0.8}
MaxOddsWithDecayPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906

{'decay_factor': 0.85}
MaxOddsWithDecayPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906

{'decay_factor': 0.9}
MaxOddsWithDecayPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906

{'decay_factor': 0.95}
MaxOddsWithDecayPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906

{'decay_factor': 0.98}
MaxOddsWithDecayPicker performance across different test dfs
2024 Season: 0.0
Real History: 4.737
Simulated Histories: 4.358
Simulations of 2025 Season: 2.906



{'2024 Season': {'performance': 0.0, 'hyperparameters': {'decay_factor': 0.8}},
 'Real History': {'performance': 4.737,
  'hyperparameters': {'decay_factor': 0.8}},
 'Simulated Histories': {'performance': 4.358,
  'hyperparameters': {'decay_factor': 0.8}},
 'Simulations of 2025 Season': {'performance': 2.906,
  'hyperparameters': {'decay_factor': 0.8}}}

## SlidingWindowPicker

This picker uses the same underlying method as the prior two Max Odds picker to maximize joint probability of survival, but instead of doing it over the whole season, it maximizes the probability over a predefined window size and chooses the next week based on that. 

We will test different window size/decay factor combinations to find the pair that maximizes performance.

In [10]:
hps = {'decay_factor': [0.8, 0.85, 0.9, 0.95, 0.98], 'window_size': [2, 3, 4, 5, 6, 7, 8, 9, 10]}
_, best_hps = test_hyperparameters(SlidingWindowPicker, hps)
best_hps_dict['SlidingWindowPicker'] = best_hps
best_hps

{'decay_factor': 0.8, 'window_size': 2}
SlidingWindowPicker performance across different test dfs
2024 Season: 0.0
Real History: 3.895
Simulated Histories: 4.452
Simulations of 2025 Season: 3.312

{'decay_factor': 0.8, 'window_size': 3}
SlidingWindowPicker performance across different test dfs
2024 Season: 1.0
Real History: 5.211
Simulated Histories: 4.521
Simulations of 2025 Season: 3.385

{'decay_factor': 0.8, 'window_size': 4}
SlidingWindowPicker performance across different test dfs
2024 Season: 1.0
Real History: 5.211
Simulated Histories: 4.531
Simulations of 2025 Season: 3.379

{'decay_factor': 0.8, 'window_size': 5}
SlidingWindowPicker performance across different test dfs
2024 Season: 1.0
Real History: 5.316
Simulated Histories: 4.492
Simulations of 2025 Season: 3.381

{'decay_factor': 0.8, 'window_size': 6}
SlidingWindowPicker performance across different test dfs
2024 Season: 1.0
Real History: 5.211
Simulated Histories: 4.338
Simulations of 2025 Season: 3.361

{'decay_factor'

{'2024 Season': {'performance': 1.0,
  'hyperparameters': {'decay_factor': 0.8, 'window_size': 3}},
 'Real History': {'performance': 5.316,
  'hyperparameters': {'decay_factor': 0.8, 'window_size': 5}},
 'Simulated Histories': {'performance': 4.556,
  'hyperparameters': {'decay_factor': 0.95, 'window_size': 4}},
 'Simulations of 2025 Season': {'performance': 3.434,
  'hyperparameters': {'decay_factor': 0.9, 'window_size': 6}}}

In [16]:
with open("../outputs/best_hyperparameters.txt", "w") as f:
    f.write(str(best_hps_dict))

## Final Results

In [17]:
pickers = [Picker, BestOddsPicker, TopKOddsPicker, MaxOddsPicker, MaxOddsWithDecayPicker, SlidingWindowPicker]
performance_comparison_df = []
for pkr in pickers:
    historical_avg = evaluate_picker_over_history(pkr, simulated_nfl_histories)
    row = {'Picker': pkr.__name__, 'Average Number of Weeks Survived': historical_avg}
    performance_comparison_df.append(row)


In [25]:
performance_comparison_df = pd.DataFrame(performance_comparison_df)
performance_comparison_df = performance_comparison_df.sort_values(by='Average Number of Weeks Survived', ascending=False)
performance_comparison_df

Unnamed: 0,Picker,Average Number of Weeks Survived
5,SlidingWindowPicker,4.477
1,BestOddsPicker,4.373
3,MaxOddsPicker,4.358
4,MaxOddsWithDecayPicker,4.355
2,TopKOddsPicker,3.481
0,Picker,1.005


In [26]:
performance_comparison_df.to_csv("../outputs/picker_performance_comparisons")