
### NCAA Tournament Simulation using historical probabilities

- https://en.wikipedia.org/wiki/NCAA_Division_I_Men%27s_Basketball_Tournament
- https://www.betfirm.com/seeds-national-championship-odds/

In [160]:
import random
import pandas as pd

In [161]:
rounds = [
  '1st Round',
  '2nd Round',
  'Sweet Sixteen',
  'Regional Finals'
]

historical_probabilities = {
  '1st Round': {
    1: {
      16: 143 / (143 + 1),
    },
    2: {
      15: 135 / (135 + 9),
    },
    3: {
      14: 122 / (122 + 22),
    },
    4: {
      13: 113 / (113 + 31),
    },
    5: {
      12: 93 / (93 + 51),
    },
    6: {
      11: 90 / (90 + 54),
    },
    7: {
      10: 87 / (87 + 57),
    },
    8: {
      9: 71 / (71 + 73),
    },
    9: {
      8: 73 / (73 + 71),
    },
    10: {
      7: 57 / (57 + 87),
    },
    11: {
      6: 54 / (54 + 90),
    },
    12: {
      5: 51 / (51 + 93),
    },
    13: {
      4: 31 / (31 + 113),
    },
    14: {
      3: 22 / (22 + 122),
    },
    15: {
      2: 9 / (9 + 135),
    },
    16: {
      1: 1 / (1 + 143),
    },
  },
  '2nd Round': {
    1: {
      8: 57 / (57 + 14),
      9: 66 / (66 + 6),
    },
    2: {
      7: 57 / (57 + 26),
      10: 34 / (34 + 18),
    },
    3: {
      6: 45 / (45 + 29),
      11: 30 / (30 + 18),
    },
    4: {
      5: 41 / (41 + 33),
      12: 26 / (26 + 13),
    },
    5: {
      4: 33 / (33 + 41),
      13: 16 / (16 + 3),
    },
    6: {
      3: 29 / (45 + 29),
      14: 14 / (14 + 2),
    },
    7: {
      2: 26 / (26 + 57),
      15: 2 / (2 + 2),
    },
    8: {
      1: 14 / (14 + 57),
    },
    9: {
      1: 6 / (6 + 66),
      16: 1 / (1 + 0),
    },
    10: {
      2: 18 / (18 + 34),
      15: 5 / (5 + 0),
    },
    11: {
      3: 18 / (18 + 30),
      14: 6 / (6 + 0),
    },
    12: {
      4: 13 / (13 + 26),
      13: 9 / (9 + 3),
    },
    13: {
      5: 3 / (3 + 16),
      12: 3 / (3 + 9),
    },
    14: {
      6: 2 / (2 + 14),
      11: 0 / (0 + 6),
    },
    15: {
      7: 2 / (2 + 2),
      10: 0 / (0 + 5),
    },
    16: {
      9: 0 / (0 + 1),
    }
  },
  'Sweet Sixteen': {
    1: {
      4: 40 / (40 + 15),
      5: 36 / (36 + 8),
      12: 20 / (20 + 0),
      13: 4 / (4 + 0),
    },
    2: {
      3: 28 / (28 + 17),
      6: 23 / (23 + 6),
      11: 14 / (14 + 3),
    },
    3: {
      2: 17 / (17 + 28),
      7: 9 / (9 + 6),
      10: 9 / (9 + 4),
      15: 2 / (2 + 0),
    },
    4: {
      1: 15 / (15 + 40),
      8: 4 / (4 + 5),
      9: 2 / (2 + 1),
    },
    5: {
      1: 8 / (8 + 36),
      8: 0 / (0 + 2),
      9: 1 / (1 + 2),
    },
    6: {
      1: 2 / (2 + 8),
      4: 1 / (1 + 2),
      5: 0 / (0 + 1),
      8: 0 / (0 + 1),
    },
    7: {
      1: 0 / (0 + 4),
      4: 3 / (3 + 2),
      8: 0 / (0 + 1),
    },
    8: {
      4: 5 / (5 + 4),
      5: 2 / (2 + 0),
      12: 0 / (0 + 2),
      13: 1 / (1 + 0),
    },
    9: {
      4: 1 / (1 + 2),
      5: 2 / (2 + 1),
      13: 1 / (1 + 0),
    },
    10: {
      1: 1 / (1 + 4),
      4: 0 / (0 + 2),
      5: 0 / (0 + 1),
    },
    11: {
      1: 4 / (4 + 4),
    },
    12: {
      1: 0 / (0 + 20),
      8: 0 / (0 + 2),
    },
    13: {
      1: 0 / (0 + 4),
      8: 0 / (0 + 1),
      9: 0 / (0 + 1),
    }
  },
  'Regional Finals': {
    1: {
      2: 23 / (23 + 24),
      3: 16 / (16 + 10),
      6: 8 / (8 + 2),
      7: 4 / (4 + 0),
      10: 4 / (4 + 1),
      11: 4 / (4 + 4),
    },
    2: {
      1: 24 / (24 + 23),
      4: 2 / (2 + 4),
      5: 0 / (0 + 4),
      8: 2 / (2 + 3),
      9: 0 / (0 + 1),
      12: 0 / (0 + 2),
    },
    3: {
      1: 10 / (16 + 10),
      4: 2 / (2 + 3),
      5: 2 / (2 + 1),
      8: 0 / (0 + 1),
      9: 0 / (0 + 2),
    },
    4: {
      2: 4 / (4 + 2),
      3: 3 / (3 + 2),
      6: 2 / (2 + 1),
      7: 2 / (2 + 3),
      10: 2 / (2 + 0),
    },
    5: {
      2: 4 / (4 + 0),
      3: 1 / (1 + 2),
      6: 1 / (1 + 0),
      10: 1 / (1 + 0),
    },
    6: {
      1: 2 / (2 + 8),
      4: 1 / (1 + 2),
      5: 0 / (0 + 1),
      8: 0 / (0 + 1),
    },
    7: {
      1: 0 / (0 + 4),
      4: 3 / (3 + 2),
      8: 0 / (0 + 1),
    },
    8: {
      2: 3 / (3 + 2),
      3: 0 / (0 + 1),
      6: 1 / (1 + 0),
      7: 1 / (1 + 0),
    },
    9: {
      2: 1 / (1 + 0),
      3: 0 / (0 + 2),
      11: 0 / (0 + 1),
    },
    10: {
      1: 1 / (1 + 4),
      4: 0 / (0 + 2),
      5: 0 / (0 + 1),
    },
    11: {
      1: 4 / (4 + 4),
      9: 0 / (0 + 1),
    },
    12: {
      2: 0 / (0 + 2),
    },
  },
}

In [162]:
def chunk(items, chunk_size):
    array = []
    for i in range(0, len(items), chunk_size):
        array.append(
            tuple(sorted(items[i:i + chunk_size]))
        )

    return array

def play_game(tournament_round, team1, team2):
    def get_probability(tournament_round, t1, t2):
        probability = random.random() / 10000
        if t1 in historical_probabilities[tournament_round]:
            if t2 in historical_probabilities[tournament_round][t1]:
                probability += historical_probabilities[tournament_round][t1][t2]

        return probability

    team1_probability = get_probability(tournament_round, team1, team2)
    team2_probability = get_probability(tournament_round, team2, team1)

    total_probability = team1_probability + team2_probability

    team1_probability /= total_probability
    team2_probability /= total_probability

    assert round(team1_probability + team2_probability) == 1, 'sum of probabilities does not equal 1.'

    sequence = [team1, team2]
    probabilities = [team1_probability, team2_probability]

    return random.choices(
      sequence,
      probabilities,
      k=1
    )[0]

def play_regional():
    regional = [
      [
        (1, 16),
        (8, 9),
        (5, 12),
        (4, 13),
        (6, 11),
        (3, 14),
        (7, 10),
        (2, 15),
      ]
    ]

    for tournament_round in rounds:
        winners = chunk(
          [
            play_game(tournament_round, t1, t2)
            for t1, t2
            in regional[-1]
          ],
          2
        )

        regional.append(winners)

    return regional


In [163]:
n = 10000
output = {
    0: { i:0 for i in range(1, 17) },
    1: { i:0 for i in range(1, 17) },
    2: { i:0 for i in range(1, 17) },
    3: { i:0 for i in range(1, 17) },
    4: { i:0 for i in range(1, 17) },
}

for _ in range(n):
    regional = play_regional()
    for i, games in enumerate(regional):
        for game in games:
            t1 = game[0]
            output[i][t1] += 1

            if len(game) > 1:
              t2 = game[1]
              output[i][t2] += 1

In [164]:
import pandas as pd

df = pd.DataFrame(output)
for i in [0, 1, 2, 3, 4]:
    df[i] = df[i] / n

df.columns = ['1st Round', '2nd Round', 'Sweet Sixteen', 'Regional Finals', 'Final Four']
df

Unnamed: 0,1st Round,2nd Round,Sweet Sixteen,Regional Finals,Final Four
1,1.0,0.9936,0.8508,0.6845,0.4087
2,1.0,0.9331,0.6202,0.4967,0.2095
3,1.0,0.8482,0.5183,0.3169,0.1293
4,1.0,0.7839,0.4668,0.1431,0.0874
5,1.0,0.6466,0.3484,0.0639,0.0482
6,1.0,0.6219,0.2926,0.0554,0.0106
7,1.0,0.6035,0.2002,0.0491,0.0084
8,1.0,0.4986,0.1032,0.0751,0.0448
9,1.0,0.5014,0.0443,0.0215,0.017
10,1.0,0.3965,0.1591,0.0389,0.007
