In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS
from utils import (
    read_latest_daily_counts_df, clean_daily_counts_df,
    read_latest_demographics_df, clean_demographics_df, 
    read_latest_diagnoses_df,
    read_latest_labs_df,
    read_loinc_df, read_icd_df,
    apply_theme
)

# Dataset

### Demographics

In [None]:
dem_df = read_latest_demographics_df()
dem_df = clean_demographics_df(dem_df)

DEM_SITE_IDS = dem_df[COLUMNS.SITE_ID].unique().tolist()

# Columns to use
siteid = COLUMNS.SITE_ID
sex = COLUMNS.SEX
total_patients = COLUMNS.TOTAL_PATIENTS
age_group = COLUMNS.AGE_GROUP
num_patients = COLUMNS.NUM_PATIENTS

# Remove aggregated rows and columns
not_all_filter = dem_df[COLUMNS.SEX] != "All"
dem_df = dem_df[not_all_filter]
dem_df = dem_df.drop(columns=[COLUMNS.TOTAL_PATIENTS])

# Wide to long
dem_df = pd.melt(dem_df, id_vars=[COLUMNS.SITE_ID, COLUMNS.SEX])
dem_df = dem_df.rename(columns={"variable": age_group, "value": num_patients})

dem_df.head()

### DailyCounts

In [None]:
dai_df = read_latest_daily_counts_df()
dai_df = clean_daily_counts_df(dai_df)
DAI_SITE_IDS = dai_df[COLUMNS.SITE_ID].unique().tolist()

# Columns to use
siteid = COLUMNS.SITE_ID
date = COLUMNS.DATE
new_positive_cases = "New positive cases"
patients_in_icu = "Patients in ICU"
new_deaths = "New deaths"
category = "category"
num_patients = COLUMNS.NUM_PATIENTS

# Convert uncertain data to zero
dai_df.loc[dai_df[new_positive_cases] < 0, new_positive_cases] = 0
dai_df.loc[dai_df[patients_in_icu] < 0, patients_in_icu] = 0
dai_df.loc[dai_df[new_deaths] < 0, new_deaths] = 0

# wide to long
dai_df = pd.melt(dai_df, id_vars=[siteid, date])
dai_df = dai_df.rename(columns={"variable": category, "value": num_patients})

dai_df

### Lab

In [None]:
lab_df = read_latest_labs_df()
LAB_SITE_IDS = lab_df[COLUMNS.SITE_ID].unique().tolist()

# Columns to use

_loinc_df = read_loinc_df().set_index('loinc').rename(columns={'labTest': 'name'})

lab_df["loinc_name"] = lab_df[COLUMNS.LOINC].apply(lambda code: _loinc_df.at[code, "name"].capitalize())

# Convert uncertain data to zero
lab_df.loc[lab_df["num_patients"] < 0, "num_patients"] = 0

lab_df

### Diagnoses

In [None]:
dia_df = read_latest_diagnoses_df()
SITE_IDS = dia_df[COLUMNS.SITE_ID].unique().tolist()

# Columns
siteid = COLUMNS.SITE_ID
icd_code = COLUMNS.ICD_CODE
icd_version = COLUMNS.ICD_VERSION
num_patients = COLUMNS.NUM_PATIENTS

dia_df[COLUMNS.ICD_CODE] = dia_df[COLUMNS.ICD_CODE].apply(lambda x: x.replace(".", ""))

icd_df = read_icd_df()
dia_df = dia_df.merge(icd_df, how="left", left_on='icd_code', right_on='ICDcode')

# Handle the missing data
dia_df.loc[pd.isna(dia_df["ICDdescription"]), "ICDdescription"] = dia_df.loc[pd.isna(dia_df["ICDdescription"]), "icd_code"]
dia_df.loc[pd.isna(dia_df["Category"]), "Category"] = dia_df.loc[pd.isna(dia_df["Category"]), "icd_code"]

# Consistent capitalization
dia_df["ICDdescription"] = dia_df["ICDdescription"].apply(lambda x: x.capitalize())
dia_df["Category"] = dia_df["Category"].apply(lambda x: x.capitalize())

dia_df