# Ex sheet 2: convergence of random variables

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rd

## The coupon collector problem

In [None]:
# Define the coupon collector with K coupons: T is the number of trials
def coupons(K=1):
    collection = np.zeros(K)
    Nbcoll = 0
    T = 0
    while Nbcoll < K:
        T += 1
        U = rd.randint(0,K)
        if collection[U] == 0:
                collection[U] = 1
                Nbcoll += 1
    return T

# Sample N i.i.d. experiments
def sample_coupons(K=1,N=1):
    T=np.zeros(N)
    for i in range(N):
        T[i]=coupons(K)
    return T

In [None]:
# Sample one coupon problem
K=1000
T=coupons(K)
T

In [None]:
# Theory says T ~ K*log K
theory=K*np.log(K)
theory

In [None]:
# Average number of trials over N experiments
# Very long to run!
N=5000
S=sample_coupons(K,N)
np.mean(S)

In [None]:
# plot of the values of the times T for each trial in the N experiments
plt.figure(figsize=(15,5))
plt.plot(S, label="sample")
plt.axhline(y = theory, color = 'r', linestyle = 'dashed', label = "theoretical value = %i" %theory)    
plt.title("Coupon collector with %i coupons" %K)
plt.ylabel("number of trials")
plt.xlabel("experiments")
plt.legend()
plt.show()

In [None]:
# plot the normalised values of the times for each trial
renormS=(S-theory)/K

plt.figure(figsize=(15,5))
plt.plot(renormS, label="normalised sample")
plt.axhline(y = 0, color = 'r', linestyle = 'dashed', label = "theoretical value = 0")    
plt.title("Normalised coupon collector with %i coupons" %K)
plt.ylabel("number of trials")
plt.xlabel("experiments")
plt.legend()
plt.show()

In [None]:
# Define the Gumbel density
def Gumbel_density(x):
    return np.exp(-(x+np.exp(-x)))

xs = np.linspace(-5, 10, 1000)
ys = Gumbel_density(xs)

plt.figure(figsize=(16,6))
plt.title("Histogram of the normalised coupon collector with %i coupons and the Gumbel distribution" %K)
plt.plot(xs, ys, label="Gumbel density : exp(-(x+exp(-x)))") 
plt.fill_between(xs, ys, 0)
plt.hist(renormS, bins=50, density=True, label="normalised sample")
plt.legend()
plt.show()

## Estimators of the right edge of a uniform law

In [None]:
# Define Ntrial copies of two estimators of theta using nsample i.i.d uniform(0,theta)

def estim(theta, nsample=1, Ntrial=1):
    arr=np.zeros((2,Ntrial))
    U=rd.uniform(low=0,high=theta,size=(Ntrial,nsample))
    arr[0] = 2*np.sum(U,axis=1)/nsample
    arr[1] = (nsample+1)*np.max(U,axis=1)/nsample
    return arr

In [None]:
theta=1
Ntrial=50000
nsample=5000
est=estim(theta, nsample, Ntrial)
estsum=est[0]
estmax=est[1]

In [None]:
# Histogram of the two estimators
plt.figure(figsize=(14,6))
plt.title("Histogram of the two estimators")
plt.hist(estsum, bins=100, label="Sum estimator")
plt.hist(estmax, bins=10, density=True, label="Max estimator")
plt.legend();

In [None]:
# plot of the values of the two estimators over the trials
plt.figure(figsize=(14,8))
plt.title("Values of the two estimators")
plt.plot(estsum, label='Sum estimator')
plt.plot(estmax, label='Max estimator')
plt.axhline(y = 1, color = 'r', linestyle = 'dashed', label = "estimated parameter = 1")    
plt.legend()
plt.ylabel("value")
plt.xlabel("trials")
plt.show()

In [None]:
# normalised values
renormestsum=np.sqrt(3*nsample)*(estsum-theta)/theta
renormestmax=np.sqrt(nsample*(nsample+2))*(estmax-theta)/theta

In [None]:
# plot the normalised values
plt.figure(figsize=(14,8))
plt.title("Normalised values of the two estimators")
plt.plot(renormestsum, label='Sum estimator')
plt.plot(renormestmax, label='Max estimator')
plt.axhline(y = 0, color = 'r', linestyle = 'dashed')    
plt.legend()
plt.ylabel("value")
plt.xlabel("trials")
plt.show()

In [None]:
# normalised histograms
plt.figure(figsize=(14,8))
plt.hist(renormestsum, bins=100, density=True, label="Sum estimator")
plt.hist(renormestmax, bins=100, density=True, label="Max estimator")
plt.axvline(x = 0, color = 'r', linestyle = 'dashed', label = "Mean value")    
plt.legend()
plt.show()

In [None]:
# Compare the sum estimator with the Gaussian density
def gaussian(x):
    return 1/math.sqrt(2*math.pi)*np.exp(-x**2/2)

xs = np.linspace(-5, 5, 1000)
ys = gaussian(xs)

plt.figure(figsize=(12,6))
plt.hist(renormestsum, bins=100, density=True, label="Sum estimator")
plt.plot(xs, ys, label="Gaussian density") 

plt.legend()
plt.show()

In [None]:
# Compare the max estimator with 1-X where X is exponentially distributed
def gaussian(x):
    return 1/math.sqrt(2*math.pi)*np.exp(-x**2/2)

xs = np.linspace(-8, 1, 1000)
ys = np.exp(-(1-xs))

plt.figure(figsize=(12,6))
plt.plot(xs, ys, label="1 - Exponential") 
plt.hist(renormestmax, bins=100, density=True, label="Max estimator")

plt.legend()
plt.show()

## Exponential of Gaussian r.v.'s

In [None]:
# Sample N trajectories of a(X_1+...+X_n)-bn
def Gauss(a,b,N,n):
    X=rd.normal(0,1,size=(N,n))
    return np.cumsum(a*X-b,axis=1)

In [None]:
# Plot the trajectories for a given choice of parameters
N=20
n=5000
a=1
b=0.1
Y=Gauss(a,b,N,n)

plt.figure(figsize=(15,5))
plt.plot(Y.T)
plt.title("Trajectories of sum of Gaussian r.v.'s with b>0")

plt.ylabel("a*S_n-bn")
plt.xlabel("n")
plt.show()

In [None]:
# Plot the exponential of these trajectories
Z=np.exp(Y)

plt.figure(figsize=(15,5))
plt.plot(Z.T)
plt.title("Trajectories of the exponential of the sum of Gaussian r.v.'s with b>0")

plt.ylabel("exp(a*S_n-bn)")
plt.xlabel("n")
plt.show()

In [None]:
# Plot the trajectories for a given choice of parameters
N=20
n=5000
a=1
b=-0.1
Y=Gauss(a,b,N,n)

plt.figure(figsize=(15,5))
plt.plot(Y.T)
plt.title("Trajectories of sum of Gaussian r.v.'s with b<0")

plt.ylabel("a*S_n-bn")
plt.xlabel("n")
plt.show()

In [None]:
# Plot the exponential of these trajectories
Z=np.exp(Y)

plt.figure(figsize=(15,5))
plt.plot(Z.T)
plt.title("Trajectories of the exponential of the sum of Gaussian r.v.'s with b<0")

plt.ylabel("exp(a*S_n-bn)")
plt.xlabel("n")
plt.show()

In [None]:
# Plot the trajectories for a given choice of parameters
N=20
n=5000
a=1
b=0
Y=Gauss(a,b,N,n)

plt.figure(figsize=(15,5))
plt.plot(Y.T)
plt.title("Trajectories of sum of Gaussian r.v.'s with b<0")

plt.ylabel("a*S_n-bn")
plt.xlabel("n")
plt.show()

In [None]:
# Plot the exponential of these trajectories
Z=np.exp(Y)

plt.figure(figsize=(15,5))
plt.plot(Z.T)
plt.title("Trajectories of the exponential of the sum of Gaussian r.v.'s with b<0")

plt.ylabel("exp(a*S_n-bn)")
plt.xlabel("n")
plt.show()

## Product of r.v.'s

In [None]:
# Sample N trajectories of X_1*...*X_n where X_i=a or X_i=n with proba 1/2
def prod_coin(a,b,N,n):
    U=(b-a)*rd.binomial(1, 1/2, size=(N,n))+a
    return np.cumprod(U,axis=1)

In [None]:
N=20
n=1000
a=.8
b=1.2
X=prod_coin(a,b,N,n)

plt.figure(figsize=(15,5))
plt.plot(X.T)

plt.show()

In [None]:
Y=np.log(X)

plt.figure(figsize=(15,5))
plt.plot(Y.T)

plt.show()

In [None]:
N=20
n=1000
a=.8
b=1.4
X=prod_coin(a,b,N,n)

plt.figure(figsize=(15,5))
plt.plot(X.T)

plt.show()

In [None]:
Y=np.log(X)

plt.figure(figsize=(15,5))
plt.plot(Y.T)

plt.show()