In [None]:
import math # math is used to perform some computations like factorial or binomial coefficient
from scipy import stats  # library for stats stuff, for example random variables
import numpy as np # package for scientific computing (dealing with arrays)
import matplotlib.pyplot as plt # to plot

# 3) Minimum and maximum of a set of iid random variables
Let $X_1,\dots,X_n$ be independent discrete uniform random variables on $\{1,\dots,N\}$.

a) Find the pmf of $\min(X_1,\dots,X_n)$

b) Find the pmf of $\max(X_1,\dots,X_n)$

In [None]:
n = 20
N = 10
num_experiment = 100

X =  # discrete uniform distribution

In [None]:
# we need to save the empirical pdf of the minimum, so we initialise a vector of suitable size to store the counts we will generate
count_min =

for i in range(num_experiment):
  x = X.rvs(n) # sample from the rv
  k =  # compute the minimum of th
  count_min[]  # increment of 1 count_min in the required position

In [None]:
empirical_pdf =
print('empirical', empirical_pdf)

theoretical_pdf = np.zeros(N)
for k in range(1, N+1):
  theoretical_pdf[k-1] = ((N-k+1)**n - (N-k)**n) / (N**n)
print('theory', theoretical_pdf)

print('absolute difference of the empirical and theoretical pmf', np.sum(np.abs(empirical_pdf - theoretical_pdf)))

### Our first plot in python: bar plot

Printing the vectors is not so clear, so we will use a visual way to check the difference between the empirical and the theoretical pmf.

The bar plot in Python (https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.bar.html#matplotlib.pyplot.bar) is a possible visualisation tool.
To use it, you need to `import matplotlib.pyplot as plt` and then call `plt.bar(x, y)` with `x,y` the desired values. Other arguments that go into the function are `alpha` which specifies the degree of transparency (between 0 and 1, by default 1) and `label` if we want to identify the two bar plots with a different label (since we do define the bar plots one after the other, python automatically chooses diffent colours. If we want to specify another colour, we can do so with the argument `color`). If we specify a label, we need to call `plt.legend()` so that the legend shows on the plot. Finally, we use `plt.show()` to make the plot appear.

In [None]:
plt.bar()
# np.arange(1, N+1) is the pythonic way of definying a vector of natural numbers from 1 to N
# we want to specify alpha<1 otherwise we would not distinguish the two colors.
plt.bar()
plt.xlabel('x')
plt.ylabel('$p_X(x)$') # the $ $ specifies that you are writing in a formula accoring to the latex syntax
plt.legend()
plt.show()

In [None]:
# same but for the maximum

count_max = np.zeros(N)

for i in range(num_experiment):
  x = X.rvs(n)
  k = np.max(x)
  count_max[k-1] += 1

In [None]:
empirical_pdf = count_max/num_experiment
print('empirical', empirical_pdf)

theoretical_pdf = np.zeros(N)
for k in range(1, N+1):
  theoretical_pdf[k-1] = (k/N)**n - ((k-1)/N)**n
print('theory', theoretical_pdf)

print('absolute difference of the empirical and theoretical pmf', np.sum(np.abs(empirical_pdf - theoretical_pdf)))

In [None]:
plt.bar(np.arange(1, N+1), empirical_pdf, alpha=0.5, label='empirical')
plt.bar(np.arange(1, N+1), theoretical_pdf, alpha=0.5, label='theoretical')
plt.legend()
plt.xlabel('x')
plt.ylabel('$p_X(x)$') # the $ $ specifies that you are writing in a formula accoring to the latex syntax
plt.show()

# 4) Independence of random variables computing joint pmf and marginals
You have a coin with probability $p$ of showing H. You flip the coin a random number $N$ of times, where $N\sim Pois(\lambda)$ and is independent of the outcome of the flips. Find the distribution of $X$ number of H and $Y$ number of T and show that $X,Y$ are independent.

In [None]:
num_experiment = 10000
p = 0.4
l = 5
# if I need to store the joint pmf I need a matrix instead of a vector. The issue is that a prior I don't know which values
# X and Y can take, so for now the (imperfect) solution that we will take is to define a matrix bigger than what we might empirically observe
# (how big depends on the parameter of the Poisson)
max_n = 50
count_XY = np.zeros((max_n, max_n))
count_X = np.zeros(max_n) # we use the same size for the marginals of X and Y, which will be vectors
count_Y = np.zeros(max_n)

N =  # number of tosses are poisson distributed
for i in range(num_experiment):
  # simulate the experiment and save the joint and marginal counts

# let's transform counts into probabilities
p_XY =
p_X =
p_Y =

# to verify that p_XY factorises in p_X * p_Y we need to compute the outer product of the marginals. In python, this is done with np.outer( , )
p_Xp_Y = np.outer(p_X, p_Y)
# you can verify that the shape of the result is the desired one (max_n, max_n):
print(p_Xp_Y.shape)

In [None]:
# let's see if the absolute distance of the distributions is small enough
np.sum(np.abs(p_XY - p_Xp_Y)) # the difference between matrices is computed element wise, then we apply the absolute value to all the elements,
# and finally we sum all the elements to obtain a scalar number

How could you represent with a plot the joint and marginals?