
### NCAA Tournament Simulation using historical probabilities

- https://en.wikipedia.org/wiki/NCAA_Division_I_Men%27s_Basketball_Tournament
- https://www.betfirm.com/seeds-national-championship-odds/

In [6]:
import json
import random
import pandas as pd

In [7]:
rounds = [
  '1st Round',
  '2nd Round',
  'Sweet Sixteen',
  'Regional Finals',
  'Final Four',
]

probabilities = {}
with open('../../data/ncaa/historical_tourny_win_perc.json', 'rb') as tourny_percs:
    probabilities = json.loads(tourny_percs.read())

In [8]:
def was_match_played(tournament_round: str, s1: str, s2: str) -> bool:
    if not s1 in probabilities:
        return False
    
    if not tournament_round in probabilities[s1]:
        return False

    if not s2 in probabilities[s1][tournament_round]:
        return False

    return True

def get_probability(tournament_round: str, s1: str, s2: str) -> float:
    if not was_match_played(tournament_round, s1, s2):
        return 1

    probability = random.random() / 10000
    probability += probabilities[s1][tournament_round][s2]

    return 1 if probability == 0 else probability

def play_game(tournament_round: str, s1: int, s2: int) -> int:
    weights = [
        get_probability(tournament_round, str(s1), str(s2)),
        get_probability(tournament_round, str(s2), str(s1))
    ]

    return random.choices(
        [s1, s2],
        weights,
        k=1
    )[0]

In [9]:
from typing import List, Tuple


def chunk(items: List[int], chunk_size: int) -> List[int]:
    array = []
    for i in range(0, len(items), chunk_size):
        array.append(
            tuple(sorted(items[i:i + chunk_size]))
        )

    return array


def play_regional() -> List[List[Tuple[int, int]]]:
    regional = [
      [
        (1, 16),
        (8, 9),
        (5, 12),
        (4, 13),
        (6, 11),
        (3, 14),
        (7, 10),
        (2, 15),
      ]
    ]

    for tournament_round in rounds[:-1]:
        winners = chunk(
          [
            play_game(tournament_round, s1, s2)
            for s1, s2
            in regional[-1]
          ],
          2
        )

        regional.append(winners)

    return regional

In [16]:
n = 100000
rds = [0, 1, 2, 3, 4]
output = {
  rd: { i: 0 for i in range(1, 17) }
  for rd in rds
}

for _ in range(n):
    regional = play_regional()
    for i, games in enumerate(regional):
        for game in games:
            output[i][game[0]] += 1
            if len(game) > 1:
              output[i][game[1]] += 1

In [20]:
import pandas as pd

df = pd.DataFrame(output)
for rd in [0, 1, 2, 3, 4]:
    df[rd] = df[rd] / n

df.columns = rounds
df

Unnamed: 0,1st Round,2nd Round,Sweet Sixteen,Regional Finals,Final Four
1,1.0,0.9935,0.8548,0.6968,0.4112
2,1.0,0.9368,0.632,0.4489,0.1951
3,1.0,0.8412,0.5272,0.2551,0.1199
4,1.0,0.7901,0.4731,0.1419,0.0924
5,1.0,0.6392,0.3381,0.058,0.0432
6,1.0,0.6243,0.2829,0.0977,0.0214
7,1.0,0.6089,0.1936,0.0637,0.0088
8,1.0,0.4896,0.0972,0.0614,0.0303
9,1.0,0.5104,0.0463,0.0237,0.014
10,1.0,0.3911,0.1552,0.0524,0.0119


### Simulation of a Regional

In [12]:
regional_results = play_regional()

for i, regional_round in enumerate(rounds):
    results = regional_results[i]

    print(regional_round)
    print(results)
    print()


1st Round
[(1, 16), (8, 9), (5, 12), (4, 13), (6, 11), (3, 14), (7, 10), (2, 15)]

2nd Round
[(1, 8), (4, 12), (6, 14), (2, 7)]

Sweet Sixteen
[(1, 4), (6, 7)]

Regional Finals
[(1, 7)]

Final Four
[(1,)]



### Simulate Final Four Teams

In [13]:
regions = ['North', 'South', 'East', 'West']

for region in regions:

  print(region)
  regional_results = play_regional()

  for i, regional_round in enumerate(rounds):
      if i < 3:
          continue

      results = regional_results[i]

      print(regional_round)
      print(results)
      print()

North
Regional Finals
[(1, 3)]

Final Four
[(1,)]

South
Regional Finals
[(1, 2)]

Final Four
[(1,)]

East
Regional Finals
[(3, 8)]

Final Four
[(3,)]

West
Regional Finals
[(1, 11)]

Final Four
[(1,)]

