# Solutions to sheet 5

Import the necessary libraries: `numpy`, `matplotlib`. We can use the `as` keyword to define shortcuts to access the libraries (example: `import numpy as np` means we can access numpy functions with the `np.` syntax).

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats as stats

In [None]:
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
##      plotting properties
## - - - - - - - - - - - - - - - - - - - - - - - - - - -

mpl.rcParams.update({
    "axes.autolimit_mode":"round_numbers",
    "axes.axisbelow":"False",
    "axes.edgecolor":"000000",
    "axes.facecolor":"FFFFFF",
    "axes.formatter.limits":"-2, 4",
    "axes.formatter.use_mathtext":"True",
    "axes.grid":"False",
    "axes.labelcolor":"000000",
    "axes.labelsize":"12",
    "axes.linewidth":"1",
    "axes.titlesize":"12",
    "axes.xmargin":"0.",
    "axes.ymargin":"0.",
    "errorbar.capsize":"1",
    "figure.autolayout":"True",
    "figure.dpi":"96",
    "figure.edgecolor":"0.50",
    "figure.facecolor":"FFFFFF",
    "figure.figsize":"6, 4.5",
    "font.family":"monospace",
    "font.size":"12",
    "legend.fancybox":"True",
    "legend.framealpha":"0.25",
    "legend.frameon":"True",
    "lines.markersize":"6",
    "savefig.dpi":"150",
    "savefig.facecolor":"FFFFFF00",
    "xtick.bottom":"True",
    "xtick.color":"000000",
    "xtick.direction":"in",
    "xtick.major.size":"10",
    "xtick.minor.size":"5",
    "xtick.top":"True",
    "ytick.color":"000000",
    "ytick.direction":"in",
    "ytick.left":"True",
    "ytick.major.size":"10",
    "ytick.minor.size":"5",
    "ytick.right":"True",
})

In [None]:
mu = 2
sigma = 0.5

## Implement a Gaussian distribution

First implement a Gaussian distribution and show that we can plot it.

In [None]:
def Gauss(x, mu=0, sigma=1):
    """Implementation of a Gauss distribution.
    
    Args:
      x: the point at which to evaluate
      mu: the central value of the distribution
      sigma: the standard deviation of the distribution
    
    Returns: function evaluated at point x
    
    """
    pref = 1 / sigma / np.sqrt(2 * np.pi)
    return pref * np.exp(- np.power(x - mu, 2) / 2 / sigma / sigma)

In [None]:
n_points = 200

In [None]:
interval = (0, 4)

In [None]:
xfunc = np.linspace(*interval, n_points)
yfunc = Gauss(xfunc, mu, sigma)

In [None]:
plt.plot(xfunc, yfunc, label=f"Gauss($\mu$={mu}, $\sigma$={sigma})", lw=4)
plt.xlim(*interval)
plt.ylim((0, 1.0))
plt.legend()

plt.show()

## Sample from a Gaussian distribution

Now perform sampling from a Gaussian distribution, based on the inversion method.

In [None]:
n_samples = 6

In [None]:
gen = np.random.default_rng()

In [None]:
def sample_from_Gaussian(n_samples, mu=0, sigma=1):
    """Function to sample from a Gaussian distribution.
    
    This function returns a set of n_samples data points, sampled
    from a Gaussian distribution with central value mu and standard
    deviation sigma.
    
    Args:
      n_samples: The number of samples to draw.
      mu: The central value of the Gaussian distribution.
      sigma: The standard deviation of the Gaussian distribution.
      
    Returns: Tuple of two lists with x and y values
    
    """
    x = stats.norm.ppf(gen.uniform(0, 1, n_samples), loc=mu, scale=sigma)
    y = Gauss(x, mu, sigma) * gen.uniform(0, 1, n_samples)
    return x, y

In [None]:
x, y = sample_from_Gaussian(n_samples, mu, sigma)

In [None]:
plt.plot(xfunc, yfunc, label=f"Gauss($\mu$={mu}, $\sigma$={sigma})", lw=4)
plt.scatter(x, y, marker=".", label="Sampled data points")
plt.xlim(*interval)
plt.ylim((0, 1))
plt.legend()

plt.show()

## chi2 calculation

In [None]:
def calculate_chi2(x, mu, sigma):
    """Calculate the chi2 parameter from a list of data points.
    
    Args:
      x: the list of data points
      mu: the expectation value of the distribution
      sigma: the standard deviation of the distribution
    
    Returns:
      Overall chi2 parameter value of the data points
    """
    difference = (x - mu) / sigma
    return np.sum(np.power(difference, 2))

In [None]:
calculate_chi2(x, mu, sigma)

## Now repeat the experiment multiple times

We now repeat the same experiment 5000 times and collect all chi2 values of the experiments to fill them into a histogram.

In [None]:
n_experiments = 5000

In [None]:
interval = (0, 20)

In [None]:
chi2_experiments = []
for i in range(n_experiments):
    x, _ = sample_from_Gaussian(n_samples, mu, sigma)
    chi2_experiments.append(calculate_chi2(x, mu, sigma))

In [None]:
chi2_x = np.linspace(*interval, n_points)

In [None]:
chi2_y = stats.chi2.pdf(chi2_x, n_samples)

In [None]:
plt.hist(chi2_experiments, density=True, label="Sampled chi2 values", bins=20, range=interval, alpha=0.5)
plt.plot(chi2_x, chi2_y, label=f"chi2 distribution (k={n_samples})", lw=4)
plt.legend()

plt.show()

## Student's t-distribution

In [None]:
def calculate_t(x, mu):
    """Calculate the t variable for a sample x.
    
    Args:
      x: a list of sampled values
      mu: the (true) population mean
    
    Returns:
      Overall t value of the sample
    """
    bessel_std = np.std(x, ddof=1)
    return (np.mean(x) - mu) / bessel_std * np.sqrt(len(x))

In [None]:
calculate_t(x, mu)

In [None]:
t_values = []
for i in range(n_experiments):
    x = stats.norm.ppf(gen.uniform(0, 1, n_samples), loc=mu, scale=sigma)
    t_values.append(calculate_t(x, mu))

In [None]:
interval = (-5, 5)

In [None]:
t_x = np.linspace(*interval, 200)

In [None]:
t_y = stats.t.pdf(t_x, n_samples - 1)

In [None]:
plt.hist(t_values, density=True, label="Sampled t values", bins=20, range=interval, alpha=0.5)
plt.plot(t_x, t_y, label=f"t distribution (k={n_samples - 1})", lw=4)
plt.legend()

plt.show()