In [None]:
import pandas as pd
import nbimporter
import random
from imports import *
from functions_uc import *
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

def train_test(resampled_df, prediction_length):
    """Split time series data by encounter_id into training and test sets."""
    def split_train_test(group):
        test_rows = group.nlargest(prediction_length, 'recorded_time')
        train_rows = group.drop(test_rows.index)
        return train_rows, test_rows

    train_list, test_list = [], []
    for _, group in resampled_df.groupby('encounter_id'):
        train_rows, test_rows = split_train_test(group)
        train_list.append(train_rows)
        test_list.append(test_rows)

    train_data = pd.concat(train_list).reset_index(drop=True)
    test_data = pd.concat(test_list).reset_index(drop=True)
    return train_data, test_data

def train_model_ensemble(resampled_df, prediction_length, value_to_predict, resample_rate, metric):
    """Train an AutoML model on time series data with AutoGluon."""
    resampled_df = resampled_df.rename(columns={'id': 'encounter_id', 'date': 'recorded_time'})

    # Split into training and testing data
    train_df, _ = train_test(resampled_df, prediction_length)
    train_df = train_df[['encounter_id', 'recorded_time', value_to_predict]]
    test_df = resampled_df[['encounter_id', 'recorded_time', value_to_predict]]

    # Ensure timezone-naive timestamps
    train_df['recorded_time'] = train_df['recorded_time'].dt.tz_localize(None)
    test_df['recorded_time'] = test_df['recorded_time'].dt.tz_localize(None)

    # Convert to TimeSeriesDataFrame
    train_data = TimeSeriesDataFrame.from_data_frame(train_df, id_column="encounter_id", timestamp_column="recorded_time")
    test_data = TimeSeriesDataFrame.from_data_frame(test_df, id_column="encounter_id", timestamp_column="recorded_time")

    # Initialize predictor
    predictor = TimeSeriesPredictor(
        prediction_length=prediction_length,
        path="baseline_stats_model",
        target=value_to_predict,
        eval_metric=metric,
        freq=resample_rate,
        verbosity=3,
        quantile_levels=[0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95],
    )

    # Train the model
    predictor.fit(
        train_data,
        presets="best_quality",
        num_val_windows=1,
        time_limit=15000,
        hyperparameters={

        "AutoARIMA":{"n_jobs":3,"max_ts_length":max_ts_length},
        "NaiveModel":{"n_jobs":3,"max_ts_length":max_ts_length},
        "AverageModel":{"n_jobs":3,"max_ts_length":max_ts_length},
        
       },
        enable_ensemble=False,
    )

    return predictor, train_data, test_data

# Parameters - adapt to the respective use case
value_to_predict = value_to_predict
resample_rate = resample_rate
prediction_length = prediction_length
metric = metric
max_ts_length = max_ts_length

# Train the model
predictor, train_data, test_data = train_model_ensemble(full_data, prediction_length, value_to_predict, resample_rate, metric)
