In [None]:
import math # math is used to perform some computations like factorial or binomial coefficient
from scipy import stats  # library for stats stuff, for example random variables
import numpy as np # package for scientific computing (dealing with arrays)

In [None]:
# for this exercise we need to know how to compute the mean of a vector
x = np.array((1, 2, 3))
mean_x =
print(mean_x)

# The birthday problem
There are $k$ people in a room. Assume each birthday is equally probable to be any of the 365 days of the year and that people's birthday are independent.

(a) What is the probability that at least one pair of people in the group have the same birthday?

(b) What is the probability that a given pair has the same birthday?

(c) If $k = 50$ what is the expected number of pairs with the same birthdays?

In [None]:
# set number of repetition of the experiment and a possible value for k
num_experiment = 100
k = 50

# define the random variable from which we will sample the birthdays: a Discrete Uniform over 1...365.
# In python, this is randint(a, b) (which can be found in scipy stats https://docs.scipy.org/doc/scipy/tutorial/stats/discrete_randint.html)
X =

## a)  
What is the probability that at least one pair of people in the group have the same birthday?

In [None]:
# let's run the experiment. The idea is similar to the last weeks: repeat the experiment many times (for loop)
# and each time count the successes ('at least one pair with same birthday')
# this will mean to iterate over pairs, so we'll have to introduce the concept of nested for loops!

success = 0 # initialize the variable that counts how many successes we observe (i.e., how many times we observe at least one pair with same bday)

for i in  # iterate over the repetitions of the experiments
  x =  # sample the vector of birthdays
  count_pairs_i =  # initialize the variable which will save the success of this single experiment i
  # iterate over the first member of the pair
  for j in
    # for each individual j, we need to iterate over the second member of the pair.
    # To avoid repetitions and to avoid the pair (j, j), this person needs to start from j+1 and go up until k
    for l in
      same_bday =  # check if j and l have the same bday
      count_pairs_i =  # increment result of experiment i
  # at the end of each experiment, we need to check if we had a success of not:
  # if the pairs in experiment i with same birthday are more than 1, then increament success:
  if


PS: there's a nice pythonic way to increment a variable. Instead of
`success = success + 1` you can use `success += 1`

In [None]:
empirical =
print('empirical result', empirical)

theory = 1 - math.comb(365, k) * math.factorial(k) / (365**k)
print('theoretical result', theory)

## b)
What is the probability that a given pair has the same birthday?

In [None]:
# Here, we want to count the proportion of pairs in the room with the same birthday

# we initialize a vector of zeros of dimension num_experiment. For each experiment we will store the counts of pairs with the same birthday for each experiment
count_pairs = np.zeros(num_experiment)
# we initialize a vector of zeros of dimension num_experiment. For each experiment we will save the proportion of pairs with the same birthday
proportion_pairs =

# same idea: repeat an experiment many times. This time, we don't have to save a success, but the actual number of pairs with the same birthday
for i in range(num_experiment):
  x = X.rvs(k) # sample bdays
  # iterate over pairs of people
  for j in range(k):
    for l in range(j+1, k):
      same_bday = (x[j] == x[l])
      count_pairs[i] =  # increment the count if the pair has the same birthday
  # for each experiment i we compute the proportion of pairs with the same bday as the counts divided by the total number of pairs
  proportion_pairs[i] =

# to get the empirical result we will simply take the mean of the vector proportion_pairs:
print('empirical', )
print('theory', 1/365)

## c)
If $k = 50$ what is the expected number of pairs with the same birthdays?

In [None]:
# everything we need was already computed in b)
# we simply need to take the average of the counts of pairs with the same birthday
print('empirical', )
print('theory', k*(k-1)/(2*365))

## Extra 1)
#### Condensed way of solving all points together by running just one repetition of the experiments and saving/using all the right variables at the right time

In [None]:
# we initialize a vector of zeros of dimension num_experiment. For each experiment we will store the counts of pairs with the same birthday for each experiment
count_pairs = np.zeros(num_experiment)
# we initialize a vector of zeros of dimension num_experiment. For each experiment we will save the proportion of pairs with the same birthday
proportion_pairs = np.zeros(num_experiment)
for i in range(num_experiment):
  x = X.rvs(k) # sample bdays
  for j in range(k):
    for l in range(j+1, k):
      same_bday = (x[j] == x[l])
      count_pairs[i] = count_pairs[i] + same_bday # increment the count if the pair has the same birthday
  # for each experiment i we compute the proportion of pairs with the same bday as the counts divided by the total number of pairs
  proportion_pairs[i] = count_pairs[i] / math.comb(k, 2)

# a)
# here we need to check if there was at least one pair with same birthday in each experiment,
# so we iterate over all the experiments and check if count_pairs[i] > 0. If yes, I record a success.
success = 0
for i in range(num_experiment):
  if count_pairs[i] > 0:
    success += 1
print('empirical', success / num_experiment)
theory = 1 - math.comb(365, k) * math.factorial(k) / (365**k)
print('theory', theory)

# b)
# to get the empirical result we will simply take the mean of the vector proportion_pairs:
print('empirical', np.mean(proportion_pairs))
print('theory', 1/365)

# c)
# we simply need to take the average of the counts of pairs with the same birthday
print('empirical', np.mean(count_pairs))
print('theory', k*(k-1)/(2*365))

## Extra 2)
#### More advanced: another, more efficient, solution for a)
If you need to solve only a), you can do it more efficiently by using the `break` command which stops the for loop as soon as a condition is satisfied (indeed, since we are looking for 'at least one pair has the same birthday', as soon as we find one pair which has the same birthday we don't need to check all the other pairs).

In [None]:
count_at_least_onepair = 0
for i in range(num_experiment):
  x = X.rvs(k) # sample
  result_experiment = 0
  same_bday = 0
  for j in range(k):
    for l in range(j+1, k):
      if x[j] == x[l]: # check if the pair has the same birthday
        same_bday = 1 # if yes, set same_bday to 1
        break # the break command says that you can exit the last for loop you are in (in this case, the one with index l)
    # if you have broken the l for loop you want to exit also from the i for loop. So, check if same_bday == 1 and if yes you break the i for loop.
    if same_bday == 1:
      break
  result_experiment += same_bday # equivalent to result_experiment = result_experiment + same_bday
  if result_experiment > 0:
    count_at_least_onepair += 1 # equivalent to count_at_least_onepair = count_at_least_onepair + 1

In [None]:
empirical = count_at_least_onepair / num_experiment
print(empirical)

theory = 1 - math.comb(365, k) * math.factorial(k) / (365**k)
print(theory)