# Resample cleaned chartevents chunks

The purpose of this script is to resample the previously already cleaned and chunked CHARTEVENTS data.

The vital parameter values series as well as the alarm threshold value series originally have an inconsistent sampling rate. However, for forecasting based on these data, a consistent sampling rate is required. Therefore, resampling is performed.

In [None]:
import pandas as pd
import pyarrow as pa

# Read chartevents_clean_values_and_thresholds_with_chunkid_65 from parquet file
chartevents_clean_values_and_thresholds_with_chunkid_65 = pd.read_parquet('../data/chartevents_clean_values_and_thresholds_with_chunkid_65.parquet', engine='pyarrow')

In [None]:
# Select rows with relevant ITEMIDs (only vital parameter values, no alarm threshold values) and
# Select relevant columns
chartevents_to_be_resampled = chartevents_clean_values_and_thresholds_with_chunkid_65[
    chartevents_clean_values_and_thresholds_with_chunkid_65.ITEMID.isin([220045,220179,220277])][
        ['CHUNK_ID_FILLED_TH','ICUSTAY_ID','ITEMID','CHARTTIME','VALUENUM_CLEAN']
    ].copy()

# Sort chartevents_to_be_resampled
chartevents_to_be_resampled = chartevents_to_be_resampled.sort_values(by=['CHUNK_ID_FILLED_TH','ICUSTAY_ID','ITEMID','CHARTTIME'])

display(chartevents_to_be_resampled)

In [None]:
# Resample vital parameter value series using different methods when downsampling (median and mean)

# Resampling of VALUENUM_CLEAN with a frequency of 60 min (1 hour), using the median of the values when downsampling.
chartevents_resampled_median = chartevents_to_be_resampled.groupby(
    ['CHUNK_ID_FILLED_TH','ICUSTAY_ID','ITEMID', pd.Grouper(key='CHARTTIME', freq='1H')]
    )['VALUENUM_CLEAN'].median().reset_index()

# Resampling of VALUENUM_CLEAN with a frequency of 60 min (1 hour), using the mean of the values when downsampling.
chartevents_resampled_mean = chartevents_to_be_resampled.groupby(
    ['CHUNK_ID_FILLED_TH','ICUSTAY_ID','ITEMID', pd.Grouper(key='CHARTTIME', freq='1H')]
    )['VALUENUM_CLEAN'].mean().reset_index()

# See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Grouper.html for further info on pd.Grouper
# Maybe interesting read: https://benalexkeen.com/resampling-time-series-data-with-pandas/

In [None]:
display(chartevents_resampled_median)

In [None]:
display(chartevents_resampled_mean)

In [None]:
import pandas as pd
import pyarrow as pa

# Save as parquet files
# Filename suffix refers to the method used in downsampling (median or mean)
chartevents_resampled_median.to_parquet('../data/chartevents_clean_values_and_thresholds_with_chunkid_65_resampled_median.parquet', engine='pyarrow')
chartevents_resampled_mean.to_parquet('../data/chartevents_clean_values_and_thresholds_with_chunkid_65_resampled_mean.parquet', engine='pyarrow')