# Set up notebook

In [None]:
from pyprojroot import here
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import ticker as mtick
import matplotlib.dates as mdates
import seaborn as sns

In [None]:
%matplotlib widget

# Load data

In [None]:
data_dir = here() / "data" / "raw"

In [None]:
file_path = data_dir / "440c0a18-9219-4add-9bb4-ebcdfe1ccadf.csv"
data = pd.read_csv(file_path)

In [None]:
data["Start"] = pd.to_datetime(data["Start"])
data["End"] = pd.to_datetime(data["End"])

In [None]:
# Overwrite the duration column
# because it is easier to calculate
# than to convert the string to timedelta
data["Duration"] = data["End"] - data["Start"]
data["Duration (h)"] = data["Duration"].dt.seconds / 60 / 60

In [None]:
data.head()

# Sleep data

In [None]:
sleep_data = data[data["Type"] == "Sleep"].copy()
sleep_data["Start Time"] = sleep_data["Start"].dt.time
sleep_data["Adjusted Start"] = sleep_data["Start"] - pd.Timedelta("12h")
sleep_data["Start Location"] = sleep_data["Start Location"].str.lower().str.split(", ")
sleep_data["Bool"] = True
sleep_data.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[8, 6])
temp = sleep_data.set_index("Start Time")["Duration (h)"]
temp.index = pd.to_datetime(temp.index, format="%H:%M:%S")
temp.plot(
    ax=ax,
    marker=".",
    markersize=10,
    linestyle="",
    color="tab:blue",
    alpha=0.3,
)
ax.set_ylabel("Sleep duration (h)")
ax.set_title("One dot per sleep session")
ax.yaxis.set_major_locator(mtick.MultipleLocator(1))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
plt.xticks(rotation=90, ha="center")
ax.grid(color="black", alpha=0.2, axis="both")
fig.tight_layout()

## Aggregate by day

In [None]:
sleep_data_agg = (
    sleep_data.set_index("Adjusted Start")
    .resample("1D")
    .agg(
        {
            "Duration (h)": ["max", "min", "mean", "median"],
            "End": ["count"],
        }
    )
)
sleep_data_agg.index = sleep_data_agg.index.date
sleep_data_agg.sort_index(axis=1).head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[12, 6])
sleep_data_agg[("Duration (h)", "max")].plot(
    ax=ax,
    color="tab:blue",
    marker=".",
    markersize=10,
    linestyle="-",
    linewidth=1,
    alpha=0.7,
)
ax.set_ylim(0)
ax.set_ylabel("Sleep duration (h)")
ax.set_xlabel("Date")
ax.set_title("Longest sleep session")
for container in ax.containers:
    ax.bar_label(container, fmt="%.1f", padding=3, fontsize=8, alpha=0.8)
ax.yaxis.set_major_locator(mtick.MultipleLocator(1))
ax.yaxis.set_minor_locator(mtick.MultipleLocator(0.5))
ax.xaxis.set_major_locator(mtick.MultipleLocator(7))
ax.xaxis.set_minor_locator(mtick.MultipleLocator(1))
plt.xticks(rotation=90)
ax.grid(color="black", alpha=0.2)
ax.grid(color="black", alpha=0.05, which="minor")
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[12, 6])
sleep_data_agg[("End", "count")].plot(
    ax=ax,
    color="tab:blue",
    marker=".",
    markersize=10,
    linestyle="-",
    linewidth=1,
    alpha=0.7,
)
ax.set_ylim(0)
ax.set_ylabel("Number of sleep sessions")
ax.set_title("Number of sleep session")
ax.set_xlabel("Date")
for container in ax.containers:
    ax.bar_label(container, fmt="%.1f", padding=3, fontsize=8, alpha=0.8)
ax.yaxis.set_major_locator(mtick.MultipleLocator(1))
ax.xaxis.set_major_locator(mtick.MultipleLocator(7))
ax.xaxis.set_minor_locator(mtick.MultipleLocator(1))
plt.xticks(rotation=90)
ax.grid(color="black", alpha=0.2)
ax.grid(color="black", alpha=0.05, which="minor")
fig.tight_layout()

## Aggregate by time of day

In [None]:
time_offset = pd.Timedelta(2, unit="h")
time_ranges = [
    ("22:00", "2:00"),
    ("2:00", "6:00"),
    ("6:00", "10:00"),
    ("10:00", "14:00"),
    ("14:00", "18:00"),
    ("18:00", "22:00"),
]
for tr in time_ranges:
    s = (sleep_data["Start"] + time_offset).dt.time
    min_time = (pd.to_datetime(tr[0]) + time_offset).time()
    max_time = (pd.to_datetime(tr[1]) + time_offset).time()
    mask = (s >= min_time) & (s < max_time)
    print(mask.sum())

## Resample by start location

In [None]:
sleep_data.shape

In [None]:
sleep_start_location = (
    sleep_data.explode("Start Location")
    .pivot(index="Start", columns="Start Location", values="Bool")
    .resample("W")
    .sum()
    .astype(int)
)
sleep_start_location.index = sleep_start_location.index.date
sleep_start_location = sleep_start_location.iloc[:-1, :]
sleep_start_location = sleep_start_location.drop("nursing", axis=1)
sleep_start_location.shape

In [None]:
sleep_start_location_pc = (
    sleep_start_location.divide(sleep_start_location.sum(axis=1), axis=0) * 100
)
# sorted_columns = sleep_start_location_pc.iloc[-1, :].sort_values(ascending=False).index
sorted_columns = [
    "worn or held",
    "bottle",
    "on own in bed",
    "next to carer",
    "co sleep",
    "stroller",
    "car",
    np.nan,
]
sleep_start_location_pc = sleep_start_location_pc[sorted_columns]
sleep_start_location_pc.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[8, 5])
sleep_start_location_pc.plot(
    ax=ax,
    kind="bar",
    stacked=True,
    width=1,
    alpha=1,
    linewidth=2,
    edgecolor="white",
    cmap="Pastel2",
)
ax.set_title("Sleep start location")
ax.set_ylabel("Percentage of sleep sessions (day and night)")
ax.set_xlabel("Week ending in")
ax.legend(bbox_to_anchor=[1, 1])
ax.set_ylim(0, 100)
ax.set_xlim(-0.5, sleep_start_location_pc.shape[0] - 0.5)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.yaxis.set_major_locator(mtick.MultipleLocator(10))
ax.yaxis.set_minor_locator(mtick.MultipleLocator(5))
ax.grid(color="k", alpha=0.15, axis="y", which="both")
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[5, 5])
sleep_start_location.sum().sort_values(ascending=False).plot(
    ax=ax,
    kind="pie",
    cmap="Blues",
    autopct="%.0f%%",
    startangle=0,
)
ax.set_ylabel("")
ax.set_title("Sleep start location (day and night)")
fig.tight_layout()

# Solids data

In [None]:
solids_data = data[data["Type"] == "Solids"].copy()
solids_data = solids_data[["Start", "Start Condition"]]
solids_data["Food"] = solids_data["Start Condition"].str.lower().str.split(", ")
solids_data = solids_data.explode("Food")
solids_data["Bool"] = True
solids_data.head()

## Timeline of foods

In [None]:
solids_heatmap = solids_data.pivot(index="Food", columns="Start", values="Bool")
solids_heatmap = solids_heatmap.sort_values(
    list(solids_heatmap.columns), ascending=False
)
solids_heatmap = solids_heatmap.T.resample("D").max().T.astype(float).fillna(0)
solids_heatmap.columns = solids_heatmap.columns.date
solids_heatmap.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=[8, 5])
sns.heatmap(
    solids_heatmap,
    cmap="Greens",
    vmin=0,
    vmax=1.5,
    cbar=False,
    square=True,
    linewidth=1,
)
ax.set_title("Weaning Chart")
ax.set_xlabel("Date")
fig.tight_layout()

## Number of times each food has been tried

In [None]:
solids_counts = solids_heatmap.sum(axis=1)
solids_counts.name = "Times tried"

In [None]:
fig, axes = plt.subplots(1, 3, figsize=[8, 4])
kwargs = {
    "kind": "barh",
    "color": "forestgreen",
    "width": 0.85,
    "alpha": 0.7,
}

ax = axes[0]
solids_counts.plot(ax=ax, **kwargs)
ax.invert_yaxis()
ax.set_title("Order of introduction", fontsize=10)

ax = axes[1]
solids_counts.sort_values().plot(ax=ax, **kwargs)
ax.set_title("By number of times tried", fontsize=10)

ax = axes[2]
solids_counts.sort_index(ascending=False).plot(ax=ax, **kwargs)
ax.set_title("Alphabetical order", fontsize=10)

for ax in axes:
    ax.set_ylabel("")
    ax.set_xlabel("Times tried")
    ax.grid(color="k", alpha=0.2)
    ax.set_xlim(0, 10)
    for container in ax.containers:
        ax.bar_label(container, fmt="%.0f", padding=3, fontsize=8, alpha=0.8)
fig.tight_layout()

In [None]:
file_path = here() / "data" / "processed" / "solids.csv"
solids_counts.sort_index().to_csv(file_path)