In [None]:
# Work 27: Cohort Summaries of Combined BMI Data: Function Creation and Analysis:
# [W27.BMI.6.Function_2Compute_Combine_BMI_Sum.ipynb]

# "This notebook loads BMI data, creates a function to compute cohort summaries, and prints statistical
#  summaries for combined BMI values."

########################################################################################################
#  Sequence list
########################################################################################################

# 1: Load the combined data
# 2: Print column names for debugging
# 3: Check if 'BMI_calculated' and 'BMI_original' columns exist
# 4: Create 'bmi_c' column by taking the latest BMI value based on 'kirjaus_pvm'
# 5: Calculate 'mean_bmi' for each patient
# 6: For 'bmi_c', get the latest BMI value based on 'kirjaus_pvm'
# 7: Function to compute cohort summaries
# 8: Compute summaries for mean_bmi
# 9: Compute summaries for latest_bmi
# 10: Print the summaries

########################################################################################################
########################################################################################################

import pandas as pd

# 1: Load the combined data
combined_path = "/home/work/BMI_combined.csv"
combined_data = pd.read_csv(combined_path, sep="|")

print("1: Loaded the combined data")

# 2: Print all column names for debugging
print("2: Column names in the combined data:")
print(combined_data.columns)

# 3: Check if 'BMI_calculated' and 'BMI_original' columns exist
if "BMI_calculated" in combined_data.columns and "BMI_original" in combined_data.columns:
    combined_data["bmi_c"] = combined_data["BMI_calculated"].combine_first(
        combined_data["BMI_original"]
    )
    print("3: Created the 'bmi_c' column by taking the latest BMI value based on 'kirjaus_pvm'")
else:
    print("Error: 'BMI_calculated' or 'BMI_original' column is missing.")
    print("Available columns:", combined_data.columns)
    # You can raise an error or handle it as needed
    raise KeyError("'BMI_calculated' or 'BMI_original' column is missing")

# 4: Create 'bmi_c' column by taking the latest BMI value based on 'kirjaus_pvm'
# Combine BMI values from 'BMI_calculated' and 'BMI_original'
combined_data["bmi_c"] = combined_data["BMI_calculated"].combine_first(
    combined_data["BMI_original"]
)

print("4: Created the 'bmi_c' column by taking the latest BMI value based on 'kirjaus_pvm'")

# 5: Calculate 'mean_bmi' for each patient
mean_bmi = combined_data.groupby("Potilas_ID")["bmi_c"].mean().reset_index()

print("5: Calculated the 'mean_bmi' for each patient")

# 6: For 'bmi_c', get the latest BMI value based on 'kirjaus_pvm'
latest_bmi = combined_data.sort_values("kirjaus_pvm").groupby("Potilas_ID").tail(1)

print("6: For 'bmi_c', the latest BMI value based on 'kirjaus_pvm' has been captured")


# 7: Function to compute cohort summaries
def compute_cohort_summaries(data, column):
    summary = {
        "min": data[column].min(),
        "max": data[column].max(),
        "mean": data[column].mean(),
        "std": data[column].std(),
        "count": data[column].count(),
    }
    return summary


print("7: Created function to compute cohort summaries")

# 8: Compute summaries for mean_bmi
mean_bmi_summary = compute_cohort_summaries(mean_bmi, "bmi_c")

print("8: Computed summaries for 'mean_bmi' and for 'bmi_c'.")

# 9: Compute summaries for latest_bmi
latest_bmi_summary = compute_cohort_summaries(latest_bmi, "bmi_c")

# 10: Print the summaries
print("10a: Cohort summaries for mean BMI (mean_bmi):")
print(f"Min BMI: {mean_bmi_summary['min']:.2f}")
print(f"Max BMI: {mean_bmi_summary['max']:.2f}")
print(f"Mean BMI: {mean_bmi_summary['mean']:.2f}")
print(f"Standard Deviation BMI: {mean_bmi_summary['std']:.2f}")
print(f"Count of BMI records: {mean_bmi_summary['count']}")

print("10b: Cohort summaries for latest BMI (bmi_c):")
print(f"Min BMI: {latest_bmi_summary['min']:.2f}")
print(f"Max BMI: {latest_bmi_summary['max']:.2f}")
print(f"Mean BMI: {latest_bmi_summary['mean']:.2f}")
print(f"Standard Deviation BMI: {latest_bmi_summary['std']:.2f}")
print(f"Count of BMI records: {latest_bmi_summary['count']}")