# Chapter 3. Describing Data

## Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from typing import List

## Table 3.1

In [None]:
# table 3.1
tab31: pd.DataFrame = pd.DataFrame(
    {"age": [7,7,8,8,8,9,11,12,12,13,13,14,14,15,16,17,17, 17,17,19,19,20,23,23,23], 
    "pi_max": [80, 85,110,95,95,100,45,95,130,75,80,70,80, 100,120,110,125,75,100,40,75,110,150,75,95]})
tab31.head(5)

## Figure 3.15

In [None]:
sns.scatterplot(data=tab31, x="age", y="pi_max", s=100, color="lightblue", legend=False)
plt.xlim((5, 25))
plt.title("Fig. 3.15. Scatter diagram of PImax by age")
plt.xlabel("Age [years]")
plt.ylabel("PImax [cm $H_20$]")
plt.show()

## Table 3.2

In [None]:
# table 3.2
tab32 = pd.DataFrame(
    {"igm": np.concatenate((np.arange(0.1, 1.9, 0.1), np.asarray([2.0, 2.1, 2.2, 2.5, 2.7, 4.5]))),
    "num_of_children": np.asarray([3,7,19,27,32,35,38,38,22,16,16,6,7,9,6,2,3,3,3,2,1,1,1,1])})
tab32.head(5)

## Figure 3.3

In [None]:
# figure 3.3
sns.histplot(data=tab32, x="igm", bins=np.arange(0.1, 4.6, 0.1), weights="num_of_children")
plt.title("Fig. 3.7. (a) Concentration of IgM in\n298 children aged 6 months to 6 years")
plt.xlabel("IgM [g/l]")
plt.ylabel("Number of children")
plt.show()

In [None]:
# figure 3.10 (a)
sns.histplot(data=tab32, x="igm", bins=np.arange(0.1, 4.6, 0.1),
weights="num_of_children", cumulative=True, stat="probability")
plt.title("Fig. 3.10 (a). Concentration of IgM in\n298 children aged 6 months to 6 years")
plt.xlabel("IgM [g/l]")
plt.ylabel("Cumulative frequency")
plt.show()

In [None]:
# figure 3.10 (b)
sns.histplot(data=tab32, x="igm", bins=np.arange(0.1, 4.6, 0.1),
weights="num_of_children", cumulative=True, 
stat="probability", element="poly", fill=False)
plt.title("Fig. 3.10 (b). Concentration of IgM in\n298 children aged 6 months to 6 years")
plt.xlabel("IgM [g/l]")
plt.ylabel("Cumulative frequency")
plt.show()

In [None]:
# there are different width age groups e.g 0-4, 25-59, > 60
# assumed upper limit is 80
table33 = pd.DataFrame(
    {"age": [4,9,15,16,17,19,24,59,80],
    "frequency": [28,46,58,20,31,64,149,316,103]})
table33

## figure 3.6

In [None]:
# it seems that by default sns draws histograms better than R or Julia
sns.histplot(data=table33, x="age", binwidth=5, weights="frequency")
plt.title("Figure 3.6. Correct histogram of road accident data of Table 3.3")
plt.xlabel("Age [years]")
plt.ylabel("Frequency")
plt.show()