The goal of this notebook is to develop a procedure to review the tasks compiled by the TODO-tracker and capture:
- when the task is completed
- who completed the task
- what label(s) the task had [NOTE: this may require identifying whether the same task appears in any other todo.yml file]
- any notes or supporting materials related to the task completion

In [1]:
# dependencies
from os import listdir
from pathlib import Path
import yaml
import hashlib
import pandas as pd

In [2]:
# support methods
def read_yaml(fname):
    with open(fname, 'r') as f_handle:
        out = yaml.safe_load(f_handle)
    return out


def get_hash(task_str):
    enc_task = str(task_str).encode()
    hash_obj = hashlib.sha1(enc_task)
    return str(hash_obj.hexdigest())


def collect_task_fs(task_dir):
    return [
        (tag, f"{task_dir}{tag}/{f}") for tag in listdir(task_dir) if '.log' not in tag
        for f in listdir(task_dir+tag) if '.yml' in f
    ]


def fillin_tasks(task_dir):
    assert Path(task_dir).exists()
    task_lib = collect_task_fs(task_dir)
    task_dfs = []
    for (tag, f) in task_lib:
        tag_tasks = read_yaml(f)
        df = pd.DataFrame(tag_tasks, columns=['task'])
        df[tag] = 1
        task_dfs.append(df)
    out = pd.concat(task_dfs).fillna(0)
    out['task_id'] = out.task.apply(get_hash)
    return out.reset_index().drop(columns='index')


def find_mult_tags(task_df):
    tag_cols = [col for col in task_df.columns if 'task' not in col]
    if (any(task_df[tag_cols].sum(axis=1) > 1)) | (any(task_df.duplicated(subset='task_id'))):
        print("tasks with multiple labels found")
    else:
        print("no tasks found with multiple labels assigned")
    task_is = task_df.loc[task_df[tag_cols].sum(axis=1) > 1].index.values
    task_ids = task_df.loc[task_df.index.isin(task_is), 'task_id'].values
    return task_ids

In [3]:
# main
task_dir = "../output/"

task_df = fillin_tasks(task_dir)
mult_tags = find_mult_tags(task_df)

task_df.to_parquet("../output/active.parquet")

NotADirectoryError: [Errno 20] Not a directory: '../output/active.parquet'

In [None]:
task_df

### how many tasks per tag?

In [None]:
task_df.sum()

### how many tags per task?

In [None]:
task_df[tag_cols].sum(axis=1).describe()