# Generate Overview of Parameter Specific CHunking by ICUSTAY_ID

## Load Data

In [None]:
import pandas as pd
import pyarrow as pa

# Read chartevent_subset_values_with_chunkid_65 from parquet file to pandas data frame
chunk_analysis_data = pd.read_parquet('./data/chartevent_subset_values_with_chunkid_65.parquet', engine='pyarrow')
chunk_analysis_data = chunk_analysis_data.rename(columns={"CHUNK_ID_FILLED":"CHUNK_ID"})

## Analyze # of CHUNKIDs per ICUSTAY_ID

In [None]:
chunk_count_by_icustay = chunk_analysis_data[['ICUSTAY_ID','CHUNK_ID']]
chunk_count_by_icustay=chunk_count_by_icustay.drop_duplicates()
chunk_count_by_icustay = chunk_count_by_icustay.groupby(['ICUSTAY_ID']).count()

chunk_count_by_icustay = chunk_count_by_icustay.rename(columns = {'CHUNK_ID':'CHUNK_ID_COUNT'})
chunk_count_by_icustay = chunk_count_by_icustay.reset_index()
display(chunk_count_by_icustay)

In [None]:
chunk_count_by_icustay.describe()
# On Average, we have 13 Chunks per ICUSTAY_ID, 9 Chunks when looking at the median


## Analyze # of CHUNKIDs per ICUSTAY_ID, stratified by ITEMID

In [None]:
# Generate CHUNK_ID_COUNT by ICUSTAY_ID - ITEMID
chunk_count_by_icustay_itemid = chunk_analysis_data[['ICUSTAY_ID','ITEMID','CHUNK_ID']]
chunk_count_by_icustay_itemid = chunk_count_by_icustay_itemid.drop_duplicates()
chunk_count_by_icustay_itemid = chunk_count_by_icustay_itemid.groupby(['ICUSTAY_ID','ITEMID']).count()

chunk_count_by_icustay_itemid = chunk_count_by_icustay_itemid.rename(columns = {'CHUNK_ID':'CHUNK_ID_COUNT'})
chunk_count_by_icustay_itemid = chunk_count_by_icustay_itemid.reset_index()
display(chunk_count_by_icustay_itemid)

In [None]:
# Perform descriptive statistics on CHUNK_ID_COUNT
chunk_count_by_icustay_itemid_describe = chunk_count_by_icustay_itemid.groupby(['ITEMID'])['CHUNK_ID_COUNT'].describe()
chunk_count_by_icustay_itemid_describe
# Most Chunks refer to NBP 

In [None]:
# Visualize above findings in a boxplot
# Convert ITEMID to String
chunk_count_by_icustay_itemid['ITEMID'] = chunk_count_by_icustay_itemid['ITEMID'].astype(str)

# Imports
import seaborn as sns
import matplotlib.pyplot as plt

# Set variables
title = "Chunk Analysis by ICUSTAY and ITEMID"
xlabel = "Count of Chunks per ICUSTAY"
ylabel = "ITEMID"
plotdata = chunk_count_by_icustay_itemid
xvalue = "CHUNK_ID_COUNT"
yvalue = "ITEMID"

# Config figure
sns.set_style("whitegrid")
fig, ax = plt.subplots(
    figsize = (10, 5), 
    dpi = 72 
    )
ax = sns.boxplot( 
    data = plotdata, 
    x = xvalue,
    y = yvalue, 
    palette = sns.color_palette("colorblind")
    )
ax.set_title(title, fontweight='bold', color= 'black', fontsize=14, y=1.05)
ax.set_xlabel(xlabel, fontsize=12, labelpad=15)
ax.set_ylabel(ylabel, fontsize=12, labelpad=15) 
ax.grid(b=True, which='both')
ax.margins(.1)

# Plot figure
plt.show(fig)