# Baby times
## Data prep
Notebook to prep baby times data

In [42]:
import pandas as pd
import re
from functools import reduce
import datetime as dt

In [43]:
date = dt.datetime.now().strftime("%m%d%y")
date

'080819'

In [44]:
%matplotlib inline

In [45]:
with open("./baby_times_080319.txt") as f:
    times = f.read()

In [46]:
time_lines = [{"raw": l} for l in times.split("\n")]

In [47]:
len(time_lines)

313

In [48]:
line_types = [
    {
        "type": "date",
        "regex": "^[0-9]{6}$"
    }
] + [
    {
        "type": i.lower(),
        "regex": "^" + i + " [0-9]{1,2}:[0-9]{1,2}(A|P)"
    } for i in ["Feed", "Wake", "Sleep"]]

In [49]:
def line_type(l):
    matched_types = [t["type"] for t in line_types if re.search(t["regex"], l["raw"])]
    if len(matched_types) == 0:
        return "no type"
    elif len(matched_types) == 1:
        return matched_types[0]
    else:
        return "multiple types"

In [50]:
for i, l in enumerate(time_lines):
    l["line_num"] = i
    l["type"] = line_type(l)

In [51]:
times_df = pd.DataFrame(time_lines)

add date

In [52]:
times_df["date_ind"] = times_df["type"].map(lambda x: 1 if x == "date" else 0)

In [53]:
times_df["date_num"] = times_df.sort_values("line_num").date_ind.cumsum()

In [54]:
rest_times_df = times_df[times_df["type"].isin(["feed", "wake", "sleep"])].merge(
    right = times_df[times_df["type"] == "date"][["raw", "date_num"]].rename(columns = {"raw": "date"}),
    on = ["date_num"],
    how = "left").sort_values("line_num")

In [55]:
rest_times_df["hour"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[0].rjust(2, "0"))
rest_times_df["min"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[1][:-1].rjust(2, "0"))
rest_times_df["am_pm"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[1][-1] + "M")

In [56]:
rest_times_df["date_and_time"] = reduce(lambda x, y: x + y, map(lambda x: rest_times_df[x], ["date", "hour", "min", "am_pm"]))

In [57]:
rest_times_df["dt"] = pd.to_datetime(rest_times_df.date_and_time, format = "%m%d%y%I%M%p")

get next line info

In [58]:
for i in ["type", "dt"]:
    rest_times_df["next_" + i] = rest_times_df[i].shift(-1)

In [59]:
rest_times_df["time_to_next_min"] = (rest_times_df.next_dt - rest_times_df.dt).dt.seconds / 60
rest_times_df["time_to_next_hr"] = (rest_times_df.next_dt - rest_times_df.dt).dt.seconds / 3600
rest_times_df["next_time_valid"] = rest_times_df.time_to_next_hr < 8

In [60]:
rest_times_df[["line_num", "type", "dt", "next_dt", "next_time_valid"]].to_csv("baby_times_prep_ver1_{}.csv".format(date), index = False)

In [61]:
rest_times_df.date.value_counts()

080619    31
080319    25
072819    25
080419    22
080219    20
080519    20
080119    20
072919    16
072719    15
073019    15
080819    14
080719    12
073119     7
072319     6
072219     5
072519     4
072419     4
072619     3
Name: date, dtype: int64

In [41]:
rest_times_df.date.isna().sum()

0