In [None]:
import json
import os
from pathlib import Path
import pandas as pd


DATA_DIR = Path("./data")
DATA_SOURCE = Path(os.environ.get("TOGGL_DATA", "./data/toggl-sample.csv"))

USER_INFO_FILE = os.environ.get("TOGGL_USER_INFO")

CLIENT_NAME = os.environ.get("HARVEST_CLIENT_NAME")


def str_timedelta(td):
    """
    Convert a string formatted duration (e.g. 01:30) to a timedelta.
    """
    return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S"))


def read_user_info():
    with open(USER_INFO_FILE, "r") as ui:
        return json.load(ui)


def write_user_info(info):
    with open(USER_INFO_FILE, "w") as ui:
        json.dump(info, ui, indent=2)

In [None]:
# assign category dtype for efficiency on repeating text columns
dtypes = {
    "Email": "category",
    "Task": "category",
    "Client": "category"
}
# skip reading the columns we don't care about for Harvest
cols = list(dtypes) + [
    "Start date",
    "Start time",
    "Duration",
]
# read CSV file, parsing dates and times
source = pd.read_csv(DATA_SOURCE, dtype=dtypes, usecols=cols, parse_dates=["Start date"], cache_dates=True)
source["Start time"] = source["Start time"].apply(str_timedelta)
source["Duration"] = source["Duration"].apply(str_timedelta)
source.sort_values(["Start date", "Start time", "Email"], inplace=True)
source.dtypes

In [None]:
# rename columns that can be imported as-is
source.rename(columns={"Task": "Project", "Description": "Notes", "Start date": "Date"}, inplace=True)
source.dtypes

In [None]:
# update static calculated columns
source["Client"] = CLIENT_NAME
source["Client"] = source["Client"].astype("category")
source["Task"] = "Project Consulting"
source["Task"] = source["Task"].astype("category")