
### NCAA Tournament Simulation using historical probabilities

- https://en.wikipedia.org/wiki/NCAA_Division_I_Men%27s_Basketball_Tournament
- https://www.betfirm.com/seeds-national-championship-odds/

In [1]:
import json
import random
import pandas as pd

In [2]:
rounds = [
  '1st Round',
  '2nd Round',
  'Sweet Sixteen',
  'Regional Finals',
  'Final Four',
]

probabilities = {}
with open('../../data/ncaa/historical_tourny_win_perc.json', 'rb') as tourny_percs:
    probabilities = json.loads(tourny_percs.read())

In [3]:
def was_match_played(tournament_round: str, s1: str, s2: str) -> bool:
    if not s1 in probabilities:
        return False
    
    if not tournament_round in probabilities[s1]:
        return False

    if not s2 in probabilities[s1][tournament_round]:
        return False

    return True

def get_probability(tournament_round: str, s1: str, s2: str) -> float:
    if not was_match_played(tournament_round, s1, s2):
        return 1

    probability = random.random() / 10000
    probability += probabilities[s1][tournament_round][s2]

    return 1 if probability == 0 else probability

def play_game(tournament_round: str, s1: int, s2: int) -> int:
    weights = [
        get_probability(tournament_round, str(s1), str(s2)),
        get_probability(tournament_round, str(s2), str(s1))
    ]

    return random.choices(
        [s1, s2],
        weights,
        k=1
    )[0]

In [4]:
from typing import List, Tuple


def chunk(items: List[int], chunk_size: int) -> List[int]:
    array = []
    for i in range(0, len(items), chunk_size):
        array.append(
            tuple(sorted(items[i:i + chunk_size]))
        )

    return array


def play_regional() -> List[List[Tuple[int, int]]]:
    regional = [
      [
        (1, 16),
        (8, 9),
        (5, 12),
        (4, 13),
        (6, 11),
        (3, 14),
        (7, 10),
        (2, 15),
      ]
    ]

    for tournament_round in rounds[:-1]:
        winners = chunk(
          [
            play_game(tournament_round, s1, s2)
            for s1, s2
            in regional[-1]
          ],
          2
        )

        regional.append(winners)

    return regional

In [5]:
n = 100000
rds = [0, 1, 2, 3, 4]
output = {
  rd: { i: 0 for i in range(1, 17) }
  for rd in rds
}

for _ in range(n):
    regional = play_regional()
    for i, games in enumerate(regional):
        for game in games:
            output[i][game[0]] += 1
            if len(game) > 1:
              output[i][game[1]] += 1

In [6]:
import pandas as pd

df = pd.DataFrame(output)
for rd in [0, 1, 2, 3, 4]:
    df[rd] = df[rd] / n

df.columns = rounds
df

Unnamed: 0,1st Round,2nd Round,Sweet Sixteen,Regional Finals,Final Four
1,1.0,0.98703,0.84329,0.6627,0.40212
2,1.0,0.92738,0.62555,0.44236,0.20529
3,1.0,0.85634,0.52659,0.25502,0.11566
4,1.0,0.78733,0.47507,0.1479,0.08809
5,1.0,0.65019,0.34263,0.07374,0.04974
6,1.0,0.62133,0.28376,0.10824,0.02232
7,1.0,0.61239,0.18957,0.05867,0.00893
8,1.0,0.48639,0.10622,0.06835,0.03383
9,1.0,0.51361,0.04761,0.02725,0.01867
10,1.0,0.38761,0.15521,0.05458,0.00888


### Simulation of a Regional

In [7]:
regional_results = play_regional()

for i, regional_round in enumerate(rounds):
    results = regional_results[i]

    print(regional_round)
    print(results)
    print()


1st Round
[(1, 16), (8, 9), (5, 12), (4, 13), (6, 11), (3, 14), (7, 10), (2, 15)]

2nd Round
[(1, 8), (4, 5), (3, 6), (2, 10)]

Sweet Sixteen
[(1, 5), (3, 10)]

Regional Finals
[(1, 10)]

Final Four
[(10,)]



### Simulate Final Four Teams

In [8]:
regions = ['North', 'South', 'East', 'West']

for region in regions:

  print(region)
  regional_results = play_regional()

  for i, regional_round in enumerate(rounds):
      if i < 3:
          continue

      results = regional_results[i]

      print(regional_round)
      print(results)
      print()

North
Regional Finals
[(2, 4)]

Final Four
[(4,)]

South
Regional Finals
[(1, 3)]

Final Four
[(3,)]

East
Regional Finals
[(2, 5)]

Final Four
[(2,)]

West
Regional Finals
[(3, 5)]

Final Four
[(3,)]

