# DBMI SET Effort Analysis - Data Exploration - Monday.com

In [None]:
import pathlib
import zipfile

import duckdb
import pandas as pd

In [None]:
# pull data from dvc remote
!dvc pull

In [None]:
# setting some paths for later reference
monday_data_dir = "../data/monday.com/"
monday_zip_path = pathlib.Path(
    f"{monday_data_dir}/account_10368903_data_1661961547.zip"
)
monday_dir_path = pathlib.Path(f"{monday_data_dir}{monday_zip_path.stem}")
monday_dir_path

In [None]:
# unzip the file if we haven't already
if not monday_dir_path.is_dir():
    with zipfile.ZipFile(monday_zip_path, "r") as zip_ref:
        zip_ref.extractall(monday_dir_path)

In [None]:
for path in pathlib.Path("../data/monday.com").glob("**/*.xlsx"):
    print(path, pd.read_excel(path).info(), end="\n\n")

In [None]:
df_tasks = pd.read_excel(
    "../data/monday.com/account_10368903_data_1661961547/boards/1883170887_Project Tasks.xlsx",
    skiprows=4,
)
df_tasks = df_tasks.add_prefix("Task_")
df_tasks.head()

In [None]:
df_projects = pd.read_excel(
    "../data/monday.com/account_10368903_data_1661961547/boards/1882404316_Customer Projects.xlsx",
    skiprows=4,
)
df_projects = (
    df_projects[["Name", "Account", "Project Contacts"]].add_prefix("Project_").dropna()
)
df_projects

In [None]:
df_accts = pd.read_excel(
    "../data/monday.com/account_10368903_data_1661961547/boards/1882424009_Accounts.xlsx",
    skiprows=4,
)
df_accts = df_accts[["Name", "Type", "Contacts", "Notes"]].add_prefix("Acct_")
df_accts

In [None]:
# join account, project, and task details
sql_stmt = """
SELECT * FROM df_accts
JOIN df_projects on
    df_projects.Project_Account = df_accts.Acct_Name
JOIN df_tasks on
    df_tasks."Task_Customer Project" = df_projects.Project_Name
"""
acct_project_tasks = duckdb.sql(sql_stmt).df()
print(acct_project_tasks.info())
acct_project_tasks.head()

In [None]:
acct_project_tasks["Task_Date Added"].max()

In [None]:
# export for potential later use
acct_project_tasks.to_parquet(
    "../data/monday.com/dbmi-set-monday-acct-project-tasks.parquet"
)

In [None]:
!dvc add ../data/monday.com/dbmi-set-monday-acct-project-tasks.parquet
!git add ../data/monday.com/dbmi-set-monday-acct-project-tasks.parquet.dvc
!dvc push