# Activity Sampling

Parameters:

In [None]:
LOG_FILE = "../desktop-app/data/activity-log.csv"

Read activity log and compute activities ...

In [None]:
import collections
import csv
import datetime
import math

import isodate

# TODO add working hours
Activity = collections.namedtuple(
    "Activity",
    ["start", "finish", "client", "project", "task", "hours", "cycle_time"]
)


def project_activities(log_file=LOG_FILE):
    activities = {}
    with open(log_file, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            key = (row['Client'], row['Project'], row['Task'])
            if key not in activities:
                date = datetime.datetime.fromisoformat(row['Timestamp']).date()
                activity = Activity(
                    start=date,
                    finish=date,
                    client=row['Client'],
                    project=row['Project'],
                    task=row['Task'],
                    hours=isodate.parse_duration(row['Duration']),
                    cycle_time=datetime.timedelta(days=1)
                )
                activities[key] = activity
            else:
                activity = activities[key]
                date = datetime.datetime.fromisoformat(row['Timestamp']).date()
                start_updated = min(activity.start, date)
                finish_updated = max(activity.finish, date)
                activity = Activity(
                    start=start_updated,
                    finish=finish_updated,
                    client=row['Client'],
                    project=row['Project'],
                    task=activity.task,
                    hours=activity.hours + isodate.parse_duration(
                        row['Duration']),
                    cycle_time=finish_updated - start_updated + datetime.timedelta(
                        days=1)
                )
                activities[key] = activity
    return [activity for activity in activities.values()]


activities = project_activities()

## List of Activities

In [None]:
def print_activities(activities):
    headers = ["Start", "Finish", "Task", "Project", "Client", "Hours",
               "Cycle Time"]
    print(
        f"{headers[0]:<10}  {headers[1]:<10}  {headers[2]:<35}  {headers[3]:<18}  {headers[4]:<18}  {headers[5]:>7}  {headers[6]:<10}")
    print("-" * 120)
    for activity in activities:
        task = activity.task[:35]
        project = activity.project[:18]
        client = activity.client[:18]
        start = str(activity.start)
        finish = str(activity.finish)
        hours = round(activity.hours.total_seconds() / 3600, 2)
        cycle_time_days = math.ceil(
            activity.cycle_time.total_seconds() / 3600 / 24)
        print(
            f"{start:<10}  {finish:<10}  {task:<35}  {project:<18}  {client:<18}  {hours:>7}  {cycle_time_days:>10}")


print_activities(sorted(activities,
                        key=lambda x: (x.cycle_time, x.task, x.project,
                                       x.client)))

## Cycle Times

In [None]:
def print_cycle_times(activities):
    cycle_time_counts = collections.Counter()
    for activity in activities:
        cycle_time_days = math.ceil(
            activity.cycle_time.total_seconds() / 3600 / 24)
        cycle_time_counts[cycle_time_days] += 1
    sorted_cycle_times = sorted(cycle_time_counts.items())
    headers = ["Cycle Time", "Frequency", "Probability", "Cumulative"]
    print(
        f"{headers[0]:<10}  {headers[1]:<9}  {headers[2]:<10}  {headers[3]:<10}")
    print("-" * 45)
    total = cycle_time_counts.total()
    cumulative = 0
    for cycle_time, frequency in sorted_cycle_times:
        probability = frequency / total * 100
        cumulative += probability
        print(
            f"{cycle_time:>10}  {frequency:>9}  {round(probability):>9}%  {round(cumulative):>9}%")


print_cycle_times(activities)