In [None]:
%%capture
import os
from pathlib import Path
import pandas as pd

from dj_notebook import activate

env_file = os.environ["INTECOMM_ENV"]
reports_folder = Path(os.environ["INTECOMM_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["INTECOMM_ANALYSIS_FOLDER"])
plus = activate(dotenv_file=env_file)


In [None]:
"""
Assumptions
1. fasting must be >=8hrs
2. take latest measurement if more than one at endline at same timepoint
3. two records need unites to be changes

"""

In [None]:
from intecomm_analytics.dataframes import get_df_main_1858
from tabulate import tabulate
from intecomm_analytics.dataframes import treatment_arm_labels as treatment_arm
from intecomm_rando.constants import FACILITY_ARM, COMMUNITY_ARM
from intecomm_analytics.notebooks.primary.table_utils import (
    get_formatted_rows_by_country,
    get_formatted_rows_yes_no,
    get_cells_for_continuous_var,
)

In [None]:
narrative = []

# boudaries for first measurement
baseline_lower_bound = -180
baseline_upper_bound = 31

# boudaries for last measurement
endline_lower_bound = 182

# boundaries for diagnosis
days_since_dx = 180

# boundaries for fasting
fasting_hours = 8



In [None]:
# get 1858
df_main_original = get_df_main_1858(None)
df_main = df_main_original.copy()


In [None]:
df_main.country.value_counts()

In [None]:
msg = f"There are {len(df_main[(df_main.dm_scr==1)])}/{len(df_main)} subjects with DM reported at screening"
narrative.append(msg)
print(msg)


In [None]:
msg = f"There are {len( df_main[(df_main.dm==1)])}/{len(df_main)} subjects with DM confirmed at baseline and diagnosed at least {days_since_dx} days before baseline"
narrative.append(msg)
print(msg)


In [None]:
msg = f"There are {len(df_main[(df_main.dm==1) & (df_main.hiv==0)])}/{len(df_main[(df_main.dm==1)])} HIV(-) subjects with All confirmed at baseline and diagnosed at least {days_since_dx} days before baseline"
narrative.append(msg)
print(msg)



In [None]:
msg = f"There are {len(df_main[(df_main.htn==0) & (df_main.dm==1) & (df_main.hiv==0)])}/{len(df_main[(df_main.dm==1) & (df_main.hiv==0)])} HIV(-) subjects with DM ONLY confirmed at baseline and diagnosed at least {days_since_dx} days before baseline"
narrative.append(msg)
print(msg)


In [None]:
print("\n".join(narrative))

In [None]:
# create df_main filtered by condition
df_dm_htn = df_main[(df_main.hiv==0) & ((df_main.dm==1) | ((df_main.dm==1) & (df_main.htn==1)))].copy()
df_dm_htn.reset_index(inplace=True, drop=True)

In [None]:
print(f"{df_dm_htn[df_dm_htn.glucose_value_baseline.notna()]["subject_identifier"].count()} first results")
print(f"{df_dm_htn[df_dm_htn.glucose_value_endline.notna()]["subject_identifier"].count()} last results")
print(f"{df_dm_htn[(df_dm_htn.glucose_value_baseline.notna()) & (df_dm_htn.glucose_value_endline.notna())]["subject_identifier"].count()} first and last results")

In [None]:
path = analysis_folder / "df_htn_dm_by_country.csv"
df_dm_htn.to_csv(path, index=False)


In [None]:
df_dm_and_htn = df_dm_htn[(df_dm_htn.htn==1) & (df_dm_htn.dm==1)].copy()
df_dm_only = df_dm_htn[(df_dm_htn.dm==1) & (df_dm_htn.htn==0)].copy()


In [None]:

# All
glucose_table = {'Condition': ['All', '', '', '', '', '', ]}
glucose_table.update({
    'Parameter': ['Glucose level (mmol/L)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_dm_htn, "glucose_value_baseline", "glucose_value_endline")
})
table_dm_htn_df = pd.DataFrame(glucose_table)
table_dm_htn_df


In [None]:
# All
glucose_table = {'Condition': ['DM and HTN', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose level (mmol/L)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_dm_and_htn, "glucose_value_baseline", "glucose_value_endline")
})
table_dm_and_htn_df = pd.DataFrame(glucose_table)
table_dm_and_htn_df


In [None]:
# DM
glucose_table = {'Condition': ['DM only', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose level (mmol/L)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_dm_only, "glucose_value_baseline", "glucose_value_endline")
})
table_dm_only_df = pd.DataFrame(glucose_table)
table_dm_only_df


In [None]:
# Glucose resulted

glucose_table = {'Condition': ['All', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose measured', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_htn,
        df_dm_htn[(df_dm_htn["onstudy_days"] >= 182)],
        "glucose_resulted_baseline",
        "glucose_resulted_endline",
        missing=False
    )
})
table_dm_htn_available_df = pd.DataFrame(glucose_table)
table_dm_htn_available_df


In [None]:
glucose_table = {'Condition': ['DM and HTN', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose measured', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_and_htn,
        df_dm_and_htn[(df_dm_and_htn["onstudy_days"] >= 182)],
        "glucose_resulted_baseline",
        "glucose_resulted_endline",
        missing=False
    )
})
table_dm_and_htn_available_df = pd.DataFrame(glucose_table)
table_dm_and_htn_available_df


In [None]:
glucose_table = {'Condition': ['DM only', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose measured', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_only,
        df_dm_only[(df_dm_only["onstudy_days"] >= 182)],
        "glucose_resulted_baseline",
        "glucose_resulted_endline",
        missing=False
    )
})
table_dm_only_available_df = pd.DataFrame(glucose_table)
table_dm_only_available_df


In [None]:
glucose_table = {'Condition': ['All', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose < 7 mmol/L', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_htn[df_dm_htn.glucose_value_baseline.notna()],
        df_dm_htn[(df_dm_htn.glucose_value_endline.notna()) & (df_dm_htn["onstudy_days"] >= 182)],
        "glucose_controlled_baseline",
        "glucose_controlled_endline",
        missing=False
    )
})
table_dm_htn_controlled_df = pd.DataFrame(glucose_table)
table_dm_htn_controlled_df


In [None]:
glucose_table = {'Condition': ['DM and HTN', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose < 7 mmol/L', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_and_htn[df_dm_and_htn.glucose_value_baseline.notna()],
        df_dm_and_htn[(df_dm_and_htn.glucose_value_endline.notna()) & (df_dm_and_htn["onstudy_days"] >= 182)],
        "glucose_controlled_baseline",
        "glucose_controlled_endline",
        missing=False
    )
})
table_dm_and_htn_controlled_df = pd.DataFrame(glucose_table)
table_dm_and_htn_controlled_df


In [None]:


glucose_table = {'Condition': ['DM only', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Glucose < 7 mmol/L', '', '', '', '', ''],
    **get_formatted_rows_yes_no(
        df_dm_only[(df_dm_only.glucose_value_baseline.notna())],
        df_dm_only[(df_dm_only.glucose_value_endline.notna()) & (df_dm_only["onstudy_days"] >= 182)],
        "glucose_controlled_baseline",
        "glucose_controlled_endline",
        missing=False
    )
})
table_dm_only_controlled_df = pd.DataFrame(glucose_table)
table_dm_only_controlled_df


In [None]:
# FASTING HRS

# ALL
glucose_table = {'Condition': ['ALL', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Fasted (hrs)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_dm_htn, col_baseline='glucose_fasting_duration_hours_baseline', col_endline='glucose_fasting_duration_hours_endline')
})
table_fast_all_df = pd.DataFrame(glucose_table)
# table_fast_all_df

In [None]:
# ENDLINE DAYS FROM BASELINE

# ALL
glucose_table = {'Condition': ['ALL', '', '', '', '', '']}
glucose_table.update({
    'Parameter': ['Mean days measured from baseline', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_dm_htn, col_baseline='glucose_measured_days_baseline', col_endline='glucose_measured_days_endline')
})
table_days_measured_from_baseline_df = pd.DataFrame(glucose_table)


In [None]:
# ALL
df_ug = df_dm_htn[df_dm_htn.country=="UG"].copy()
df_tz = df_dm_htn[df_dm_htn.country=="TZ"].copy()

baseline_ug_a = df_ug[df_ug['assignment'] == COMMUNITY_ARM]["glucose_first_to_last_days"].describe()
baseline_tz_a = df_tz[df_tz['assignment'] == COMMUNITY_ARM]["glucose_first_to_last_days"].describe()
baseline_a = df_dm_htn[df_dm_htn['assignment'] == COMMUNITY_ARM]["glucose_first_to_last_days"].describe()

baseline_ug_b = df_ug[df_ug['assignment'] == FACILITY_ARM]["glucose_first_to_last_days"].describe()
baseline_tz_b = df_tz[df_tz['assignment'] == FACILITY_ARM]["glucose_first_to_last_days"].describe()
baseline_b = df_dm_htn[df_dm_htn['assignment'] == FACILITY_ARM]["glucose_first_to_last_days"].describe()

baseline_all = df_dm_htn["glucose_first_to_last_days"].describe()

glucose_table = {
    'Condition': ['ALL', '', ''],
    'Parameter': ['Mean days between measurments', '', '', ],
    'Timepoint': ['', '', ''],
    'Statistics': ['n', 'Mean(sd)', 'Median(min-max)'],
    f"{treatment_arm[COMMUNITY_ARM]} UG": [
        *get_cells_for_continuous_var(baseline_ug_a),
    ],
    f"{treatment_arm[COMMUNITY_ARM]} TZ": [
        *get_cells_for_continuous_var(baseline_tz_a),
    ],
    f"{treatment_arm[COMMUNITY_ARM]} BOTH": [
        *get_cells_for_continuous_var(baseline_a),
    ],
    f"{treatment_arm[FACILITY_ARM]} UG": [
        *get_cells_for_continuous_var(baseline_ug_b),
    ],
    f"{treatment_arm[FACILITY_ARM]} TZ": [
        *get_cells_for_continuous_var(baseline_tz_b),
    ],
    f"{treatment_arm[FACILITY_ARM]} BOTH": [
        *get_cells_for_continuous_var(baseline_b),
    ],
    'All': [
        *get_cells_for_continuous_var(baseline_all),
    ],
}


table_mean_days_between_measurements_df = pd.DataFrame(glucose_table)


In [None]:
table_df = pd.concat([
    table_dm_htn_available_df,
    table_dm_htn_df,
    table_dm_htn_controlled_df,
    table_dm_and_htn_available_df,
    table_dm_and_htn_df,
    table_dm_and_htn_controlled_df,
    table_dm_only_available_df,
    table_dm_only_df,
    table_dm_only_controlled_df,
    table_fast_all_df,
    table_days_measured_from_baseline_df,
    table_mean_days_between_measurements_df],
    ignore_index=True
)
table = tabulate(table_df, headers='keys', tablefmt='grid')


In [None]:
path = analysis_folder / 'glucose_by_country.csv'
table_df.to_csv(path_or_buf=path, index=False)


In [None]:
path = analysis_folder / 'glucose_by_country.txt'
with open(path, 'w') as file:
    file.write(table)

In [None]:
print("\n".join(narrative))

In [None]:
df_baseline = df_dm_htn[["subject_identifier", "assignment","glucose_controlled_baseline"]].copy()
df_baseline.rename(columns={"glucose_controlled_baseline": "glucose_controlled"}, inplace=True)
df_baseline["glucose_controlled"] = df_baseline["glucose_controlled"] == 1.0
df_baseline["time"] = "baseline"
df_endline = df_dm_htn[["subject_identifier", "assignment","glucose_controlled_endline"]].copy()
df_endline.rename(columns={"glucose_controlled_endline": "glucose_controlled"}, inplace=True)
df_endline["time"] = "endline"
df_endline["glucose_controlled"] = df_endline["glucose_controlled"] == 1.0

df_glu_gee = pd.concat([df_baseline, df_endline], ignore_index=True)


In [None]:
path = analysis_folder / 'df_glu_primary_by_country.csv'
df_glu_gee.to_csv(path, index=False)


In [None]:
table_df