In [None]:
%%capture
import os
import pandas as pd
import numpy as np
from dj_notebook import activate
from pathlib import Path

env_file = os.environ["INTECOMM_ENV"]
analysis_folder = Path(os.environ["INTECOMM_ANALYSIS_FOLDER"])
reports_folder = Path(os.environ["INTECOMM_ANALYSIS_FOLDER"])
plus = activate(dotenv_file=env_file)


In [None]:
from intecomm_analytics.dataframes import get_df_main_1858
from tabulate import tabulate
from edc_constants.constants import NO, YES
from intecomm_analytics.notebooks.primary.table_utils import (
    get_formatted_rows_by_country,
    get_formatted_rows_yes_no,
)



In [None]:
df_main = get_df_main_1858(None)

In [None]:
# this is the htn only and htn+dm --- excludes HIV and DM only
# htn_and_dm_cond = (((df_all.htn==1) & (df_all.dm==1)) | ((df_all.htn==1) & (df_all.dm==0))) & (df_all.hiv==0)

In [None]:
def get_yes_no_na(value):
    if value == 1:
        return YES
    elif value == 0:
        return NO
    return np.nan


df_main["bp_controlled_baseline_str"] = df_main["bp_controlled_baseline"].apply(get_yes_no_na)
df_main["bp_controlled_endline_str"] = df_main["bp_controlled_endline"].apply(get_yes_no_na)
df_main["bp_severe_htn_baseline_str"] = df_main["bp_severe_htn_baseline"].apply(get_yes_no_na)
df_main["bp_severe_htn_endline_str"] = df_main["bp_severe_htn_endline"].apply(get_yes_no_na)

columns = ["subject_identifier", "bp_sys_baseline", "bp_dia_baseline","bp_sys_endline", "bp_dia_endline", "bp_controlled_baseline", "bp_controlled_baseline_str", "bp_controlled_endline", "bp_controlled_endline_str", "bp_severe_htn_baseline", "bp_severe_htn_baseline_str", "bp_severe_htn_endline","bp_severe_htn_endline_str", "ncd", "hiv", "dm", "htn", "assignment", "country", "onstudy_days"]

In [None]:
df_all = df_main[(((df_main.htn==1) & (df_main.dm==1)) | ((df_main.htn==1) & (df_main.dm==0))) & (df_main.hiv==0)][columns].copy()
df_all.reset_index(drop=True, inplace=True)

In [None]:
df_htn_dm = df_main[((df_main.htn==1) & (df_main.dm==1)) & (df_main.hiv==0)][columns].copy()
df_htn_dm.reset_index(drop=True, inplace=True)
df_htn_dm

In [None]:
df_htn_only = df_main[(df_main.htn==1) & (df_main.dm==0) & (df_main.hiv==0)][columns].copy()
df_htn_only.reset_index(drop=True, inplace=True)
df_htn_only

In [None]:
pd.crosstab(df_all['dm'], df_all["htn"])

In [None]:
# build section on systolic and diastolic at baseline and endline
# a. all HTN only and HTN+DM subjects
sys_table = {'Condition': ['All', '', '', '', '', '']}
sys_table.update({
    'Parameter': ['Blood pressure: systolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_all, "bp_sys_baseline", "bp_sys_endline")
})
sys_df = pd.DataFrame(sys_table)

dia_table = {'Condition': ['All', '', '', '', '', '']}
dia_table.update({
    'Parameter': ['Blood pressure: diastolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_all, "bp_dia_baseline", "bp_dia_endline")
})
dia_df = pd.DataFrame(dia_table)

# b. htn+dm subjects

sys_table = {'Condition': ['HTN and DM', '', '', '', '', '']}
sys_table.update({
    'Parameter': ['Blood pressure: systolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_htn_dm, "bp_sys_baseline", "bp_sys_endline"),
})
sys_htn_dm_df = pd.DataFrame(sys_table)

dia_table = {'Condition': ['HTN and DM', '', '', '', '', '']}
dia_table.update({
    'Parameter': ['Blood pressure: diastolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_htn_dm, "bp_dia_baseline", "bp_dia_endline")
})
dia_htn_dm_df = pd.DataFrame(dia_table)

# c. htn only subjects

sys_table = {'Condition': ['HTN only', '', '', '', '', '']}
sys_table.update({
    'Parameter': ['Blood pressure: systolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_htn_only, "bp_sys_baseline", "bp_sys_endline"),
})
sys_htn_only_df = pd.DataFrame(sys_table)

dia_table = {'Condition': ['HTN only', '', '', '', '', '']}
dia_table.update({
    'Parameter': ['Blood pressure: diastolic (mmHg)', '', '', '', '', ''],
    **get_formatted_rows_by_country(df_htn_only, "bp_dia_baseline", "bp_dia_endline")
})
dia_htn_only_df = pd.DataFrame(dia_table)


In [None]:
# if you want, you can print / export the sys dia section separately
# concat results
summary_df = pd.concat([sys_df, dia_df, sys_htn_dm_df, dia_htn_dm_df, sys_htn_only_df, dia_htn_only_df], ignore_index=True)

# generate table
summary_tab = tabulate(summary_df, headers='keys', tablefmt='grid')

# export as csv
path = reports_folder / 'bp_summary_sys_dia.csv'
summary_df.to_csv(path_or_buf=path, index=False)

# Write the table to file
path = reports_folder / 'bp_summary_sys_dia.txt'
with open(path, 'w') as file:
    file.write(summary_tab)


In [None]:
# build the table section on bp control
# table as three subsections: ALL, HTN and DM, HTN only
# each subsection shows stats for <140/90 and >=180/120

# convert binary 0/1 values to YES/NO for printing
df_all["bp_controlled_baseline_str"] = df_all["bp_controlled_baseline"].apply(get_yes_no_na)
df_all["bp_controlled_endline_str"] = df_all["bp_controlled_endline"].apply(get_yes_no_na)
df_all["bp_severe_htn_baseline_str"] = df_all["bp_severe_htn_baseline"].apply(get_yes_no_na)
df_all["bp_severe_htn_endline_str"] = df_all["bp_severe_htn_endline"].apply(get_yes_no_na)

# a. All controlled / severe
all_controlled_table = {'Condition': ['All', '', '', '', '', '']}
all_controlled_table.update({
    'Parameter': ['Participants with blood pressure <140/90 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_all, df_all[df_all.onstudy_days>=182], "bp_controlled_baseline_str", "bp_controlled_endline_str")
})

all_severe_htn_table = {'Condition': ['All', '', '', '', '', '']}
all_severe_htn_table.update({
    'Parameter': ['Participants with blood pressure >=180/120 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_all, df_all[df_all.onstudy_days>=182], "bp_severe_htn_baseline_str", "bp_severe_htn_endline_str")
})

all_controlled_df = pd.DataFrame(all_controlled_table)
all_severe_htn_df = pd.DataFrame(all_severe_htn_table)

# b. htn only and htn+dm controlled / severe
htn_dm_controlled_table = {'Condition': ['HTN and DM', '', '', '', '', '']}
htn_dm_controlled_table.update({
    'Parameter': ['Participants with blood pressure <140/90 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_htn_dm, df_htn_dm[df_htn_dm.onstudy_days>=182], "bp_controlled_baseline_str", "bp_controlled_endline_str")
})

htn_dm_severe_htn_table = {'Condition': ['HTN and DM', '', '', '', '', '']}
htn_dm_severe_htn_table.update({
    'Parameter': ['Participants with blood pressure >=180/120 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_htn_dm, df_htn_dm[df_htn_dm.onstudy_days>=182],"bp_severe_htn_baseline_str", "bp_severe_htn_endline_str")
})

htn_dm_controlled_df = pd.DataFrame(htn_dm_controlled_table)
htn_dm_severe_htn_df = pd.DataFrame(htn_dm_severe_htn_table)

# c. htn only controlled / severe
htn_only_controlled_table = {'Condition': ['HTN only', '', '', '', '', '']}
htn_only_controlled_table.update({
    'Parameter': ['Participants with blood pressure <140/90 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_htn_only, df_htn_only[df_htn_only.onstudy_days>=182], "bp_controlled_baseline_str", "bp_controlled_endline_str")
})

htn_only_severe_htn_table = {'Condition': ['HTN only', '', '', '', '', '']}
htn_only_severe_htn_table.update({
    'Parameter': ['Participants with blood pressure >=180/120 mm Hg', '', '', '', '', ''],
    **get_formatted_rows_yes_no(df_htn_only, df_htn_only[df_htn_only.onstudy_days>=182], "bp_severe_htn_baseline_str", "bp_severe_htn_endline_str")
})

htn_only_controlled_df = pd.DataFrame(htn_only_controlled_table)
htn_only_severe_htn_df = pd.DataFrame(htn_only_severe_htn_table)



In [None]:
# build final table df using all table dfs
final_table_df = pd.concat([
    sys_df,
    dia_df,
    all_controlled_df,
    all_severe_htn_df,
    sys_htn_dm_df,
    dia_htn_dm_df,
    htn_dm_controlled_df,
    htn_dm_severe_htn_df,
    sys_htn_only_df,
    dia_htn_only_df,
    htn_only_controlled_df,
    htn_only_severe_htn_df
], ignore_index=True)
final_table = tabulate(final_table_df, headers="keys", tablefmt="grid")

# Write final_table to text
path = reports_folder / 'bp_by_country.txt'
with open(path, 'w') as file:
    file.write(final_table)

# Write final_table_df to csv
path = reports_folder / 'bp_by_country.csv'
final_table_df.to_csv(path_or_buf=path, index=False)


In [None]:
df_baseline = df_htn_dm[["subject_identifier", "assignment","bp_controlled_baseline"]].copy()
df_baseline.reset_index(drop=True, inplace=True)
df_baseline.rename(columns={"bp_controlled_baseline": "bp_controlled"}, inplace=True)
df_baseline["bp_controlled"] = df_baseline["bp_controlled"] == 1.0
df_baseline["time"] = "baseline"

df_endline = df_htn_dm[["subject_identifier", "assignment","bp_controlled_endline"]].copy()
df_endline.reset_index(drop=True, inplace=True)
df_endline.rename(columns={"bp_controlled_endline": "bp_controlled"}, inplace=True)
df_endline["time"] = "endline"
df_endline["bp_controlled"] = df_endline["bp_controlled"] == 1.0

df_bp = pd.concat([df_baseline, df_endline], ignore_index=True)


In [None]:
path = analysis_folder / 'df_bp_by_country.csv'
df_bp.to_csv(path, index=False)