In [None]:
%%capture
import os
import pandas as pd
import numpy as np
from dj_notebook import activate
from pathlib import Path

env_file = os.environ["INTECOMM_ENV"]
analysis_folder = Path(os.environ["INTECOMM_ANALYSIS_FOLDER"])
reports_folder = Path(os.environ["INTECOMM_ANALYSIS_FOLDER"])
plus = activate(dotenv_file=env_file)


In [None]:
from intecomm_analytics.dataframes import get_df_main_1858
from tabulate import tabulate
from edc_constants.constants import NO, YES, NOT_APPLICABLE
from intecomm_analytics.notebooks.primary.table_utils import (
    get_formatted_rows_by_country,
    get_formatted_rows_yes_no,
    get_formatted_rows_categorical_by_country,
    get_formatted_rows_by_country_single
)
from edc_constants.choices import YES_NO



In [None]:
df_main = get_df_main_1858(None)


In [None]:
df_main[df_main.dm==1].dm.value_counts()

In [None]:
# yes_no_mapping = {tpl[0]:tpl[1] for tpl in YES_NO}
yes_no_mapping = {1: 'Yes', 0: 'No'}


In [None]:
table_dfs = []

In [None]:
# AGE
table = {'Condition': ['All', '', '']}
table.update({
    'Parameter': ['Age', '', ''],
    **get_formatted_rows_by_country_single(df_main, "age_in_years")
})
table_df = pd.DataFrame(table)
table_dfs.append(table_df)

In [None]:
# gender
table = {'Condition': ['All', '', '']}
table.update({
    'Parameter': ['Sex', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "gender")
})
table_df = pd.DataFrame(table)
table_dfs.append(table_df)

In [None]:
# Weight
table = {'Condition': ['All', '', '']}
table.update({
    'Parameter': ['Weight', '', ''],
    **get_formatted_rows_by_country_single(df_main, "weight")
})
table_df = pd.DataFrame(table)
table_df.drop(columns=["Timepoint"], inplace=True)
table_dfs.append(table_df)

In [None]:
# height
table = {'Condition': ['All', '', '']}
table.update({
    'Parameter': ['Height', '', ''],
    **get_formatted_rows_by_country_single(df_main, "height")
})
table_df = pd.DataFrame(table)
table_dfs.append(table_df)

In [None]:
yes_no_mapping

In [None]:
from intecomm_rando.constants import COMMUNITY_ARM, FACILITY_ARM

from intecomm_analytics.dataframes import treatment_arm_labels as treatment_arm

def get_cells_for_categorical(df: pd.DataFrame, col: str, arm: str | None = None) -> list[str]:
    if arm:
        n = len(df[(df["assignment"] == arm) & (df[col].notna())])
        counts = df[(df["assignment"] == arm) & (df[col].notna())][col].value_counts()
        percentages = (
            df[(df["assignment"] == arm) & (df[col].notna())][col].value_counts(normalize=True)
            * 100
        )
    else:
        n = len(df[(df[col].notna())])
        counts = df[(df[col].notna())][col].value_counts()
        percentages = df[(df[col].notna())][col].value_counts(normalize=True) * 100

    cells = [
        f"{counts.get(category, 0)} ({percentages.get(category, 0):.1f}%)"
        for category in df[df[col].notna()][col].unique().tolist()
    ]
    return [n, *cells]

def get_formatted_rows_categorical_by_country(
    df: pd.DataFrame, col: str, mapping: dict | None = None
):
    """Returns 5 columns"""
    rows = {}
    if mapping:
        df = df.copy()
        df[col] = df[col].apply(lambda x: mapping[x] if pd.notna(x) else x)
    func = get_cells_for_categorical

    categories = df[df[col].notna()][col].unique().tolist()

    rows.update(
        {
            "Statistics": ["n", *categories],
        }
    )
    rows.update(
        {
            f"{treatment_arm[COMMUNITY_ARM]} UG": [
                *func(df[df.country == "UG"], col, arm="a"),
            ],
            f"{treatment_arm[COMMUNITY_ARM]} TZ": [
                *func(df[df.country == "TZ"], col, arm="a"),
            ],
            f"{treatment_arm[COMMUNITY_ARM]} BOTH": [
                *func(df, col, arm="a"),
            ],
            f"{treatment_arm[FACILITY_ARM]} UG": [
                *func(df[df.country == "UG"], col, arm="b"),
            ],
            f"{treatment_arm[FACILITY_ARM]} TZ": [
                *func(df[df.country == "TZ"], col, arm="b"),
            ],
            f"{treatment_arm[FACILITY_ARM]} BOTH": [
                *func(df, col, arm="b"),
            ],
            "All": [*func(df, col)],
        }
    )
    return rows

In [None]:
#
table = {'Condition': ['Condition', '', '']}
table.update({
    'Parameter': ['HIV only', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "hiv_only",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['NCD only', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "ncd",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['DM only', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "dm_only",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['HTN only', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "htn_only",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['HIV', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "hiv",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['DM', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "dm",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['HTN', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "htn",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['HTN and DM', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "htn_and_dm",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *yes_no_mapping.values()], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)

In [None]:
table.update({
    'Parameter': ['HTN and DM and HIV', '', ''],
    **get_formatted_rows_categorical_by_country(df_main, "hiv_and_htn_and_dm",mapping=yes_no_mapping)
})
table_df = pd.DataFrame(table)
table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", 'Yes', 'No'], ordered=True)
table_df = table_df.sort_values(by=["Statistics"], ascending=True)
table_dfs.append(table_df)


In [None]:
from edc_constants.choices import SMOKER_STATUS_SIMPLE
from intecomm_subject.choices import EMPLOYMENT_STATUS, EDUCATION, MARITAL_STATUS, \
    ALCOHOL_CONSUMPTION

for col, choices in [("education",EDUCATION), ("employment_status",EMPLOYMENT_STATUS), ("marital_status",MARITAL_STATUS), ("smoking_status",SMOKER_STATUS_SIMPLE),  ("alcohol_consumption", ALCOHOL_CONSUMPTION)]:
    mapping = {tpl[0]:tpl[1] for tpl in choices}
    rows = len(get_formatted_rows_categorical_by_country(df_main, col).get('Statistics')) -1
    placeholders = [''] * rows
    table = {'Condition': ['All', *placeholders]}
    table.update({
        'Parameter': [col.replace("_", " ").title(), *placeholders],
        **get_formatted_rows_categorical_by_country(df_main, col, mapping=mapping)
    })
    table_df = pd.DataFrame(table)
    table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", *mapping.values()], ordered=True)
    table_df = table_df.sort_values(by=["Statistics"], ascending=True)
    table_dfs.append(table_df)


In [None]:
for col in ["stroke", "heart_attack", "renal_disease", "vision", "numbness", "foot_ulcers"]:
    rows = len(get_formatted_rows_categorical_by_country(df_main, col).get('Statistics')) -1
    placeholders = [''] * rows
    table = {'Condition': ['All', *placeholders]}
    table.update({
        'Parameter': [col.replace("_", " ").title(), *placeholders],
        **get_formatted_rows_categorical_by_country(df_main, col)
    })
    table_df = pd.DataFrame(table)
    table_df["Statistics"] = pd.Categorical(table_df["Statistics"], categories=["n", 'Yes', 'No'], ordered=True)
    table_df = table_df.sort_values(by=["Statistics"], ascending=True)
    table_dfs.append(table_df)


In [None]:
table = pd.concat(table_dfs)
table.to_csv(analysis_folder  / "demographics.csv", index=False)