
### NCAA Tournament Simulation using historical probabilities

- https://en.wikipedia.org/wiki/NCAA_Division_I_Men%27s_Basketball_Tournament
- https://www.betfirm.com/seeds-national-championship-odds/

In [1]:
import json
import random
import pandas as pd

In [2]:
rounds = [
  '1st Round',
  '2nd Round',
  'Sweet Sixteen',
  'Regional Finals',
  'Final Four',
]

historical_probabilities = {}
with open('../../data/ncaa/historical_tourny_win_perc.json', 'rb') as tourny_percs:
    historical_probabilities = json.loads(tourny_percs.read())

In [3]:
def chunk(items, chunk_size):
    array = []
    for i in range(0, len(items), chunk_size):
        array.append(
            tuple(sorted(items[i:i + chunk_size]))
        )

    return array

def play_game(tournament_round, team1, team2):
    def get_probability(tournament_round, t1, t2):
        t1_key = str(t1)
        t2_key = str(t2)

        probability = random.random() / 10000
        if t1_key in historical_probabilities:
            if tournament_round in historical_probabilities[t1_key]:
              if t2_key in historical_probabilities[t1_key][tournament_round]:
                  probability += historical_probabilities[t1_key][tournament_round][t2_key]

        return probability

    team1_probability = get_probability(tournament_round, team1, team2)
    team2_probability = get_probability(tournament_round, team2, team1)

    total_probability = team1_probability + team2_probability

    team1_probability /= total_probability
    team2_probability /= total_probability

    assert round(team1_probability + team2_probability) == 1, 'sum of probabilities does not equal 1.'

    sequence = [team1, team2]
    probabilities = [team1_probability, team2_probability]

    return random.choices(
      sequence,
      probabilities,
      k=1
    )[0]

def play_regional():
    regional = [
      [
        (1, 16),
        (8, 9),
        (5, 12),
        (4, 13),
        (6, 11),
        (3, 14),
        (7, 10),
        (2, 15),
      ]
    ]

    for tournament_round in rounds[:-1]:
        games_to_play = regional[-1]
        winners = chunk(
          [
            play_game(tournament_round, t1, t2)
            for t1, t2
            in games_to_play
          ],
          2
        )

        regional.append(winners)

    return regional


In [4]:
n = 10000
rds = [0, 1, 2, 3, 4]
output = { rd: { i:0 for i in range(1, 17) } for rd in rds }

for _ in range(n):
    regional = play_regional()
    for i, games in enumerate(regional):
        for game in games:
            t1 = game[0]
            output[i][t1] += 1

            if len(game) > 1:
              t2 = game[1]
              output[i][t2] += 1

In [5]:
import pandas as pd

df = pd.DataFrame(output)
for rd in rds:
    df[rd] = df[rd] / n

df.columns = rounds
df

Unnamed: 0,1st Round,2nd Round,Sweet Sixteen,Regional Finals,Final Four
1,1.0,0.9931,0.8566,0.693,0.421
2,1.0,0.9374,0.6283,0.4446,0.1922
3,1.0,0.8482,0.5191,0.249,0.1148
4,1.0,0.7789,0.4573,0.1423,0.0924
5,1.0,0.6477,0.348,0.0595,0.0464
6,1.0,0.6268,0.2931,0.1073,0.0198
7,1.0,0.6149,0.2041,0.0663,0.0095
8,1.0,0.499,0.0988,0.0643,0.0334
9,1.0,0.501,0.0431,0.0219,0.0144
10,1.0,0.3851,0.1508,0.05,0.0084


### Simulation of a Regional

In [6]:
regional_results = play_regional()

for i, regional_round in enumerate(rounds):
    results = regional_results[i]

    print(regional_round)
    print(results)
    print()


1st Round
[(1, 16), (8, 9), (5, 12), (4, 13), (6, 11), (3, 14), (7, 10), (2, 15)]

2nd Round
[(1, 9), (4, 5), (3, 11), (10, 15)]

Sweet Sixteen
[(1, 4), (3, 10)]

Regional Finals
[(1, 3)]

Final Four
[(3,)]



### Simulate Final Four Teams

In [7]:
regions = ['North', 'South', 'East', 'West']

for region in regions:

  print(region)
  regional_results = play_regional()

  for i, regional_round in enumerate(rounds):
      if i < 3:
          continue

      results = regional_results[i]

      print(regional_round)
      print(results)
      print()

North
Regional Finals
[(1, 2)]

Final Four
[(2,)]

South
Regional Finals
[(3, 9)]

Final Four
[(3,)]

East
Regional Finals
[(2, 5)]

Final Four
[(5,)]

West
Regional Finals
[(1, 7)]

Final Four
[(1,)]

