# Exercises: Data Analysis with Python

In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Decay Numbers
In order to investigate the random fluctuation of decay numbers for a radioactive source, the events measured within 5 s are recorded 2500 times. The data is stored in a csv file (counts.csv).

In [None]:
counts = np.genfromtxt('counts.csv', delimiter='c')
n = counts.size
print(n) # print number of entries

Calculate the mean value and the standard deviation.

In [None]:
counts_mean = np.mean(counts)
counts_std = np.std(counts)

print(f'mean: {counts_mean:.2f}, standard deviation: {counts_std:.2f}')

Calculate the fraction of measurements where the number of counts is within one standard deviation from the mean. What is the fraction within two or three standard deviations?

In [None]:
def n_sigma(n):
    lower = counts_mean - n * counts_std
    upper = counts_mean + n * counts_std
    return np.sum((counts > lower) & (counts < upper))

for i in range(3):
    print(f'fraction within {i} std dev: {n_sigma(i+1)/n*100:.1f} %')

Make a histogram for the number of counts.

In [None]:
counts_max = int(max(counts))
counts_min = int(min(counts))
bins = range(counts_min, counts_max+2) # bins: [0, 1), [1, 2), ...
plt.xlabel('# of counts')
plt.ylabel('frequency')
plt.hist(counts, bins=bins, align='left')
plt.xticks(range(counts_max+1))
plt.show()

Compare the measured distribution to a normal distribution: $f(c) = f_0 e^{-\frac{(c-c_0)^2}{2\sigma^2}}$ \
Are the fit parameters as expected?

In [None]:
from scipy.optimize import curve_fit

def f(c, f0, c0, si):
    return f0 * np.exp(-(c-c0)**2/(2*si**2))

hist, bin_edges = np.histogram(counts, bins)
coeff, pcov = curve_fit(f, bin_edges[:-1], hist, p0=[120, 12, 3])
f0, c0, si = coeff
hist_peak = np.max(hist)
hist_peak_at = bin_edges[(np.where(hist == hist_peak)[0][0])]

print(f'peak value: data -> {hist_peak}, normal distribution -> {f0:.1f}')
print(f'peak position: data -> {hist_peak_at}, normal distribution -> {c0:.1f}')
print(f'standard deviation: data -> {counts_std:.2f}, normal distribution -> {si:.2f}')

In [None]:
f0, c0, si = coeff
x = np.linspace(0, counts_max, 100)
normal = f(x, f0, c0, si)

counts_max = int(max(counts))
counts_min = int(min(counts))
bins = range(counts_min, counts_max+2) # bins: [0, 1), [1, 2), ...
plt.xlabel('# of counts')
plt.ylabel('frequency')
plt.hist(counts, bins=bins, align='left', label='data')
plt.plot(x, normal, color='red', label='normal distribution')
plt.xticks(range(counts_max+1))
plt.legend()
plt.show()