# DBMI SET Effort Analysis - Data Exploration - Monday.com and Toggl.com Data Joins

In [None]:
import pathlib
import zipfile

import duckdb
import pandas as pd

In [None]:
# pull data from dvc remote
!dvc pull

In [None]:
# setting some paths for later reference
monday_data = "../data/monday.com/datadbmi-set-monday-acct-project-tasks.parquet"
toggl_data = list(
    pathlib.Path("../data/toggl.com/").glob("DB-Toggl_Track_summary_report_*.csv")
)

In [None]:
# read monday data into df
df_monday = pd.read_parquet(monday_data)
print(df_monday.info())
df_monday.head()

In [None]:
df_monday["Acct_Name"].unique()

In [None]:
# read toggl data into df
df_toggl = pd.concat([pd.read_csv(filepath) for filepath in toggl_data])
print(df_toggl.info())
df_toggl.head()

In [None]:
df_toggl["Client"].unique()

In [None]:
# rename columns for join clarity
df_monday = df_monday.rename(
    columns={"Acct_Name": "Client", "Task_Actual Time": "Duration"}
)
df_toggl = df_toggl.rename(columns={"Description": "Task_Name"})

In [None]:
# relabel clients from monday for dataset clarity
df_monday["Client"] = (
    df_monday["Client"]
    .str.replace("HealthAI: Way Lab", "Way Lab")
    .replace("HealthAI: Admin & Operations", "DBMI Administration")
)

In [None]:
column_subset = ["Client", "Task_Name", "Duration"]
df_combined = pd.concat([df_monday[column_subset], df_toggl[column_subset]]).dropna()
df_combined.head()

In [None]:
# cast duration as a datetime and as separate minutes column
df_combined["Duration"] = pd.to_timedelta(df_combined["Duration"], errors="ignore")
df_combined["Duration_Minutes"] = (
    df_combined["Duration"].dt.total_seconds().div(60).astype(int)
)

In [None]:
df_combined.groupby("Client")["Duration_Minutes"].sum().sort_values().plot(kind="barh")

In [None]:
# export for potential later use
df_combined.to_parquet("../data/analysis/monday_and_toggl_task_analysis.parquet")

In [None]:
!dvc add ../data/analysis/monday_and_toggl_task_analysis.parquet
!git add ../data/analysis/monday_and_toggl_task_analysis.parquet.dvc
!dvc push