
### NCAA Tournament Simulation using historical probabilities

- https://en.wikipedia.org/wiki/NCAA_Division_I_Men%27s_Basketball_Tournament
- https://www.betfirm.com/seeds-national-championship-odds/

In [1]:
import json
import random
import pandas as pd

In [2]:
rounds = [
  '1st Round',
  '2nd Round',
  'Sweet Sixteen',
  'Regional Finals',
  'Final Four',
]

historical_probabilities = {}
with open('../../data/ncaa/historical_tourny_win_perc.json', 'rb') as tourny_percs:
    historical_probabilities = json.loads(tourny_percs.read())

In [3]:
def chunk(items, chunk_size):
    array = []
    for i in range(0, len(items), chunk_size):
        array.append(
            tuple(sorted(items[i:i + chunk_size]))
        )

    return array

def play_game(tournament_round, team1, team2):
    def get_probability(tournament_round, t1, t2):
        t1_key = str(t1)
        t2_key = str(t2)

        probability = random.random() / 10000
        if t1_key in historical_probabilities:
            if tournament_round in historical_probabilities[t1_key]:
              if t2_key in historical_probabilities[t1_key][tournament_round]:
                  probability += historical_probabilities[t1_key][tournament_round][t2_key]

        return probability

    team1_probability = get_probability(tournament_round, team1, team2)
    team2_probability = get_probability(tournament_round, team2, team1)

    total_probability = team1_probability + team2_probability

    team1_probability /= total_probability
    team2_probability /= total_probability

    assert round(team1_probability + team2_probability) == 1, 'sum of probabilities does not equal 1.'

    sequence = [team1, team2]
    probabilities = [team1_probability, team2_probability]

    return random.choices(
      sequence,
      probabilities,
      k=1
    )[0]

def play_regional():
    regional = [
      [
        (1, 16),
        (8, 9),
        (5, 12),
        (4, 13),
        (6, 11),
        (3, 14),
        (7, 10),
        (2, 15),
      ]
    ]

    for tournament_round in rounds[:-1]:
        games_to_play = regional[-1]
        winners = chunk(
          [
            play_game(tournament_round, t1, t2)
            for t1, t2
            in games_to_play
          ],
          2
        )

        regional.append(winners)

    return regional


In [4]:
n = 10000
output = {
    0: { i:0 for i in range(1, 17) },
    1: { i:0 for i in range(1, 17) },
    2: { i:0 for i in range(1, 17) },
    3: { i:0 for i in range(1, 17) },
    4: { i:0 for i in range(1, 17) },
}

for _ in range(n):
    regional = play_regional()
    for i, games in enumerate(regional):
        for game in games:
            t1 = game[0]
            output[i][t1] += 1

            if len(game) > 1:
              t2 = game[1]
              output[i][t2] += 1

In [5]:
import pandas as pd

df = pd.DataFrame(output)
for i in [0, 1, 2, 3, 4]:
    df[i] = df[i] / n

df.columns = rounds
df

Unnamed: 0,1st Round,2nd Round,Sweet Sixteen,Regional Finals,Final Four
1,1.0,0.991,0.8558,0.693,0.419
2,1.0,0.9385,0.6326,0.4526,0.1977
3,1.0,0.8479,0.5152,0.2427,0.111
4,1.0,0.7837,0.4535,0.1423,0.0898
5,1.0,0.6549,0.357,0.0605,0.047
6,1.0,0.6254,0.2953,0.108,0.0225
7,1.0,0.6166,0.2006,0.0667,0.0092
8,1.0,0.4949,0.0971,0.0624,0.0313
9,1.0,0.5051,0.0452,0.0225,0.0135
10,1.0,0.3834,0.1462,0.0476,0.0083


### Simulation of a Regional

In [6]:
regional_results = play_regional()

for i, regional_round in enumerate(rounds):
    results = regional_results[i]

    print(regional_round)
    print(results)
    print()


1st Round
[(1, 16), (8, 9), (5, 12), (4, 13), (6, 11), (3, 14), (7, 10), (2, 15)]

2nd Round
[(1, 9), (4, 12), (3, 6), (2, 10)]

Sweet Sixteen
[(1, 4), (3, 10)]

Regional Finals
[(3, 4)]

Final Four
[(4,)]



### Simulate Final Four Teams

In [7]:
regions = ['North', 'South', 'East', 'West']

for region in regions:

  print(region)
  regional_results = play_regional()

  for i, regional_round in enumerate(rounds):
      if i < 3:
          continue

      results = regional_results[i]

      print(regional_round)
      print(results)
      print()

North
Regional Finals
[(1, 7)]

Final Four
[(1,)]

South
Regional Finals
[(1, 2)]

Final Four
[(2,)]

East
Regional Finals
[(2, 4)]

Final Four
[(2,)]

West
Regional Finals
[(1, 6)]

Final Four
[(1,)]

