# Baby times
## Data prep
Notebook to prep baby times data

In [3]:
import pandas as pd
import re
from functools import reduce
import datetime as dt

In [9]:
date = dt.datetime.now().strftime("%m%d%y")
date

'080519'

In [10]:
%matplotlib inline

In [11]:
with open("./baby_times_080319.txt") as f:
    times = f.read()

In [4]:
time_lines = [{"raw": l} for l in times.split("\n")]

In [5]:
len(time_lines)

217

In [6]:
line_types = [
    {
        "type": "date",
        "regex": "^[0-9]{6}$"
    }
] + [
    {
        "type": i.lower(),
        "regex": "^" + i + " [0-9]{1,2}:[0-9]{1,2}(A|P)"
    } for i in ["Feed", "Wake", "Sleep"]]

In [7]:
def line_type(l):
    matched_types = [t["type"] for t in line_types if re.search(t["regex"], l["raw"])]
    if len(matched_types) == 0:
        return "no type"
    elif len(matched_types) == 1:
        return matched_types[0]
    else:
        return "multiple types"

In [8]:
for i, l in enumerate(time_lines):
    l["line_num"] = i
    l["type"] = line_type(l)

In [9]:
times_df = pd.DataFrame(time_lines)

add date

In [10]:
times_df["date_ind"] = times_df["type"].map(lambda x: 1 if x == "date" else 0)

In [11]:
times_df["date_num"] = times_df.sort_values("line_num").date_ind.cumsum()

In [12]:
rest_times_df = times_df[times_df["type"].isin(["feed", "wake", "sleep"])].merge(
    right = times_df[times_df["type"] == "date"][["raw", "date_num"]].rename(columns = {"raw": "date"}),
    on = ["date_num"],
    how = "left").sort_values("line_num")

In [13]:
rest_times_df["hour"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[0].rjust(2, "0"))
rest_times_df["min"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[1][:-1].rjust(2, "0"))
rest_times_df["am_pm"] = rest_times_df.raw.map(lambda x: x.split(" ")[1].split(":")[1][-1] + "M")

In [14]:
rest_times_df["date_and_time"] = reduce(lambda x, y: x + y, map(lambda x: rest_times_df[x], ["date", "hour", "min", "am_pm"]))

In [15]:
rest_times_df["dt"] = pd.to_datetime(rest_times_df.date_and_time, format = "%m%d%y%I%M%p")

get next line info

In [16]:
for i in ["type", "dt"]:
    rest_times_df["next_" + i] = rest_times_df[i].shift(-1)

In [17]:
rest_times_df["time_to_next_min"] = (rest_times_df.next_dt - rest_times_df.dt).dt.seconds / 60
rest_times_df["time_to_next_hr"] = (rest_times_df.next_dt - rest_times_df.dt).dt.seconds / 3600
rest_times_df["next_time_valid"] = rest_times_df.time_to_next_hr < 8

In [20]:
rest_times_df[["line_num", "type", "dt", "next_dt", "next_time_valid"]].to_csv("baby_times_prep_ver1_080419.csv", index = False)

In [19]:
rest_times_df.date.value_counts()

072819    25
080319    25
080219    20
080119    20
072919    16
073019    15
072719    15
080419     8
073119     7
072319     6
072219     5
072419     4
072519     4
072619     3
Name: date, dtype: int64

In [21]:
rest_times_df.date.isna().sum()

0