# Introduction to NumPy for Analysis of Lab Data

### Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Example: Measuring density
Six students measure mass and side of the same brass cube. They end up with slightly different values. The measured data is listed below. How can we efficiently calculate the densities resulting from this data?

#### Define arrays for measured data

In [None]:
m = np.array([69.3, 69.1, 69.0, 69.0, 69.2, 69.2]) # mass in g
a = np.array([2.05, 2.00, 2.05, 1.95, 2.00, 2.00]) # side in cm

#### Find some characteristic values

In [None]:
m_max = np.max(m)
m_min = np.min(m)
m_mean = np.mean(m)
print(f'max: {m_max} g, min: {m_min} g, mean: {m_mean:.2f} g')

Many more funcitons can be found in the documentation for NumPy. In particular, most standard functions can be applied to the elements of an array.

#### Calculate volumes and densities

In [None]:
V = a**3
rh = m/V
print(f'densities: {rh}')

#### Mean value and deviations

In [None]:
rh_mean = np.mean(rh)
rh_dev_max = np.max(rh)-rh_mean
rh_dev_min = rh_mean-np.min(rh)
print(f'mean density: {rh_mean:.1f} g/cm^3')
print(f'deviation max-mean: {rh_dev_max:.1f} g/cm^3')
print(f'deviation min-mean: {rh_dev_min:.1f} g/cm^3')

### Error calculation

All students measured the same quantities on the same cube. In this case we make an estimate for the measurement errors for mass and side length based on the deviations between the different students.

In [None]:
dm = 0.2 # error for mass in g
da = 0.05 # error for side in cm

The error for the density can be found using the "worst case method".

In [None]:
rh = m_mean/np.mean(a)**3
rh_max = (m_mean+dm)/(np.mean(a)-da)**3
drh = rh_max - rh

In [None]:
print(f'density: ({rh:.1f} ± {drh:.1f}) g/cm^3')

## Example: Investigating uniform motion
A low-friction cart slides down a slightly inclined track. Its position is measured at regular time intervals. We assume errors of 0.05 s for the time and 0.1 cm for the position measurements.

#### Define arrays for measured data

In [None]:
time = np.array([0.00, 0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 3.50]) # time in s
pos = np.array([12.3, 15.2, 24.3, 39.4, 60.5, 87.1, 120.3, 159.2]) # position in cm

dt = 0.05 # error for time in s
dx = 0.2 # error for position in cm

#### Visualise data with a scatter plot

In [None]:
plt.errorbar(time, pos, fmt='.', color='blue', xerr=dt, yerr=dx)
plt.xlabel('time (s)')
plt.ylabel('position (cm)')
plt.show()

The error bars are so small that they can barely be seen. In order to visually confirm the quality of a fit function later on, we will have to find a solution to this problem.

#### Fit a model to the data

In [None]:
from scipy.optimize import curve_fit

We expect the motion to have a constant acceleration, i.e. the position should be a quadratic function of time.

In [None]:
def f(t, a, v0, x0):
    """
    Fit function for a motion with constant acceleration.
    
    The parameters are:
    t    time
    a    acceleration
    v0   initial velocity
    x0   initial position
    
    The function returns the position at time t.
    """
    
    return a/2 * t**2 + v0 * t + x0

In [None]:
coeff, pcov = curve_fit(f, time, pos)
a, v0, x0 = coeff

#### Visualise data and model

In [None]:
t = np.linspace(0, max(time), 100)
x = f(t, a, v0, x0)

plt.plot(t, x, color='red', label='square fit')
plt.errorbar(time, pos, fmt='.', color='blue', xerr=dt, yerr=dx, label='data')
plt.xlabel('time (s)')
plt.ylabel('position (cm)')
plt.legend()
plt.show()

#### Residual plot

The model seems to describe the measured data quite accurately. To see even better if the fit function passes through the error ranges of the data points, we can create a _residual plot_, i.e. a plot of the deviation between the data points and the fit function.

In [None]:
res = pos - f(time, a, v0, x0)

plt.errorbar(range(1, len(time)+1), res, fmt='.', color='blue', xerr=dt, yerr=dx, capsize=2)
plt.xlabel('data point')
plt.ylabel('residuals (cm)')
plt.grid()
plt.show()

The horizontal axis is within the error ranges for all but one data point (no. 6). This is quite convincing evidence that the theoretical model is a good description for the measured data.

#### Errors of fit parameters
The curve_fit method returns the _covariance matrix_ as a second return value. The diagonal elements are the squared errors for the fit parameters.

In [None]:
sq_errors = np.diag(pcov) # assign diagonal elements of covariance matrix
da, dv0, dx0 = np.sqrt(sq_errors) # square root corresponds to errors of fit parameters

print(f'acceleration:      a = ({a:.2f} ± {da:.2f}) cm/s^2')
print(f'initial velocity:  v_0 = ({v0:.2f} ± {dv0:.2f}) cm/s')
print(f'initial position:  x_0 = ({x0:.2f} ± {dx0:.2f}) cm')