The goal of this notebook is to develop a procedure to review the tasks compiled by the TODO-tracker and capture:
- when the task is completed
- who completed the task
- what label(s) the task had [NOTE: this may require identifying whether the same task appears in any other todo.yml file]
- any notes or supporting materials related to the task completion

In [1]:
# dependencies
from os import listdir
from pathlib import Path
import yaml
import hashlib
import pandas as pd

In [2]:
# support methods
def read_yaml(fname):
    with open(fname, 'r') as f_handle:
        out = yaml.safe_load(f_handle)
    return out


def get_hash(task_str):
    enc_task = str(task_str).encode()
    hash_obj = hashlib.sha1(enc_task)
    return str(hash_obj.hexdigest())


def collect_task_fs(task_dir):
    return [
        (tag, f"{task_dir}{tag}/{f}") for tag in listdir(task_dir) if '.' not in tag
        for f in listdir(task_dir+tag) if ('.yml' in f)
    ]


def fillin_tasks(task_dir):
    assert Path(task_dir).exists()
    task_lib = collect_task_fs(task_dir)
    task_dfs = []
    for (tag, f) in task_lib:
        tag_tasks = read_yaml(f)
        df = pd.DataFrame(tag_tasks, columns=['task'])
        df[tag] = 1
        task_dfs.append(df)
    out = pd.concat(task_dfs).fillna(0)
    out['task_id'] = out.task.apply(get_hash)
    return out.reset_index().drop(columns='index')


def find_mult_tags(task_df):
    tag_cols = [col for col in task_df.columns if 'task' not in col]
    if (any(task_df[tag_cols].sum(axis=1) > 1)) | (any(task_df.duplicated(subset='task_id'))):
        print("tasks with multiple labels found")
    else:
        print("no tasks found with multiple labels assigned")
    task_is = task_df.loc[task_df[tag_cols].sum(axis=1) > 1].index.values
    task_ids = task_df.loc[task_df.index.isin(task_is), 'task_id'].values
    return task_ids

In [3]:
# main
task_dir = "../output/"

task_df = fillin_tasks(task_dir)
mult_tags = find_mult_tags(task_df)
tag_cols = [col for col in task_df.columns if 'task' not in col]

task_df.to_parquet("../output/active.parquet")

no tasks found with multiple labels assigned


In [4]:
task_df

Unnamed: 0,task,reading,project_2,untagged,longterm,task_id
0,review ts email about time results for other o...,1.0,0.0,0.0,0.0,2c43d4909dc6b5ffbd59eedb23553397db2cd677
1,import makefile targets need better names,0.0,1.0,0.0,0.0,6ce5d2bf56fa0aca3feea3610cb05c587680b1d3
2,import.r initial_asserts() needs improvement,0.0,1.0,0.0,0.0,d90429289e27a6cbc9dc1a13b7cf39c10a196546
3,import.r apply ts advice about short functions,0.0,1.0,0.0,0.0,9adce4399b8df9bcf534a23bfb78783f87a444f6
4,sample_state.r apply ts advice about short fun...,0.0,1.0,0.0,0.0,df0236a6047af6934b36dd1a1faa1bba797e5d1d
5,clean logical_missing.yaml deprecated?,0.0,1.0,0.0,0.0,632e0f748058389800fde57573945e53d8a8d86f
6,clean.py initial_asserts() needs improvement,0.0,1.0,0.0,0.0,e22c26fdc843dad07b91a4e8108025759df6723b
7,clean.py final_asserts() needs improvement,0.0,1.0,0.0,0.0,55d9043c2460c9f3146bc92d19cf6ca7ed4e76ba
8,clean.py apply ts advice about short functions,0.0,1.0,0.0,0.0,d2e38e8ea5e9bb60a400d881a27b985159cf302b
9,clean.py format_str() deprecated?,0.0,1.0,0.0,0.0,946b49655ae5d59a89b2bbaaa7732f7d755c1107


### how many tasks per tag?

In [5]:
task_df.sum()

task         review ts email about time results for other o...
reading                                                    1.0
project_2                                                 16.0
untagged                                                   1.0
longterm                                                   1.0
task_id      2c43d4909dc6b5ffbd59eedb23553397db2cd6776ce5d2...
dtype: object

### how many tags per task?

In [6]:
task_df[tag_cols].sum(axis=1).describe()

count    19.0
mean      1.0
std       0.0
min       1.0
25%       1.0
50%       1.0
75%       1.0
max       1.0
dtype: float64