# Investigate Default Scaler by Darts

In [None]:
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import pandas as pd

resampled_data = pd.read_parquet('../../data/resampling/resample_output_bp_first2000.parquet', engine='pyarrow')

series = dict()
mins, maxs = list(), list()

# Collect all series with minimal length
for chunk_id in pd.unique(resampled_data.CHUNK_ID_FILLED_TH):
    current_series = resampled_data[resampled_data['CHUNK_ID_FILLED_TH'] == chunk_id]

    if len(current_series) > 12:
        mins.append(current_series['VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING'].min())
        maxs.append(current_series['VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING'].max())

        series[chunk_id] = TimeSeries.from_dataframe(
            df=current_series,
            time_col='CHARTTIME',
            value_cols=['VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING'],
            freq='H')

In [None]:
def rescale_single_series(series, min, max):

    series_df = series.pd_dataframe()
    series_df.reset_index(level=0, inplace=True)
    series_df.columns = ['TIME', 'VALUE_SCALED']

    series_df['VALUE'] = series_df['VALUE_SCALED'] * (max-min) + min

    return series_df[['TIME', 'VALUE']]

In [None]:
# Scale all series at once
scaler = Scaler()
scaled_series = scaler.fit_transform(list(series.values()))

# Rescale all series at once with Darts
rescaled_by_darts = scaler.inverse_transform(scaled_series)

# Rescale single series with own function
chunk_idx = 42
chunk_id = list(series.keys())[chunk_idx]
rescaled_by_function_local = rescale_single_series(scaled_series[chunk_idx], mins[chunk_idx], maxs[chunk_idx])
rescaled_by_function_global = rescale_single_series(scaled_series[chunk_idx], min(mins), max(maxs))

# Show original and rescaled values together
merged = pd.DataFrame({'Original': [value[0] for value in series[chunk_id].values()],
                       'Rescaled_Darts': [value[0] for value in rescaled_by_darts[chunk_idx].values()],
                       'Rescaled_Local': rescaled_by_function_local['VALUE'],
                       'Rescaled_Global': rescaled_by_function_global['VALUE']})
merged

In [None]:
# Show list of several MinMaxScalers
scaler._fitted_params

In [None]:
# Show one specific max
print(maxs[chunk_idx])
print(scaler._fitted_params[chunk_idx].data_max_)

In [None]:
# Show number of samples one scaler has seen
print(len(series[chunk_id]))
print(scaler._fitted_params[chunk_idx].n_samples_seen_)