## Disclaimer
The experiment was initially conducted with both nunmerical and categorical variables. After discussions with supervisors, we found it reasonable to stick to only categorical variables due to the problem of RMSE under MAR. This made changes to the code and a lot of variables may have redundant namings.

# Missing value experiment MI

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import TargetEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import torch
from lifelines import CoxPHFitter
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sklearn.model_selection import train_test_split
from sksurv.metrics import (
    concordance_index_censored
)
from sksurv.util import Surv

try:
    from utils.utils import *
    from utils.encoders import MultiLabelEncoder
except:
    import sys
    sys.path.append('../')
    from utils.utils import *
    from utils.encoders import MultiLabelEncoder    
    

def accuracy(X_imp, X_true, mask, column_wise = False):
    """
    Accuracy between imputed variables and ground truth.
    Pytorch/Numpy agnostic

    Parameters
    ----------
    X_imp : torch.DoubleTensor or np.ndarray, shape (n, d)
        Data with imputed variables.
    
    X_true : torch.DoubleTensor or np.ndarray, shape (n, d)
        Ground truth.
    
    mask : torch.BoolTensor or np.ndarray of booleans, shape (n, d)
        Missing value mask (missing if True)

    Returns
    -------
    acc : torch.DoubleTensor or np.ndarray of floats, shape (d,) if column_wise = True, else float
        accuracy between imputed variables and ground truth.
    """

    if not mask.any():
        return torch.tensor(1, dtype=torch.float64)   
    
    if torch.is_tensor(X_imp):
        if column_wise:
            acc = []
            for col in range(X_imp.shape[1]):
                if not mask[:, col].any():
                    acc.append(torch.tensor(0, dtype=torch.float64))
                else:
                    diff = (X_imp[:, col] == X_true[:, col]).double()
                    valid_diff = diff[mask[:, col]]
                    acc.append(valid_diff.mean())
            return torch.stack(acc)
        else:
            return ((X_imp[mask] == X_true[mask]).double().mean())
    else:  # NumPy array
        if column_wise:
            acc = []
            for col in range(X_imp.shape[1]):
                if not mask[:, col].any():
                    acc.append(0)
                else:
                    diff = (X_imp[:, col] == X_true[:, col]).double()
                    valid_diff = diff[mask[:, col]]
                    acc.append(valid_diff.mean())
            return np.array(acc)
        else:
            return ((X_imp[mask] == X_true[mask]).mean())
        



In [16]:
class MiceSimulationMV():
    def __init__(self, X, duration_col, event_col, cat_colnames):
        """
        Initialize the SimulationMV class. 
        Event and duration is removed for X before simulation.
        Encoding types used for modelling are specified for each categorical column.


        Parameters
        ----------
        X : pandas.DataFrame
            Complete data matrix
        duration_col : str
            Name of the column containing the durations.
        event_col : str
            Name of the column containing the events.
        cat_colnames : list of str
            List of categorical column names.        
        """
        # Assign parameters to instance
        self.duration_col = duration_col
        self.event_col = event_col

        self.categorical_colnames = cat_colnames

        # Reorder the columns 
        new_order = cat_colnames+[duration_col, event_col]
        X = X[new_order]

        # Store original X for baseline evaluation
        self.X_original = X.copy()

        # Remove duration and event col. We don't want to impute and evaluate on these
        X_to_sim = self._remove_event_and_duration(X)
        X_to_sim = X_to_sim.to_numpy()

        # split into numerical and categorical
        X_cat_init = X_to_sim
        
        # Define ordinal, one-hot and target encoding columns
        self.ordinal_idx = []
        self.one_hot_idx = []
        self.target_idx = []

        for i in range(X_cat_init.shape[1]):
            if not isinstance(X_cat_init[0, i], str):
                self.ordinal_idx.append(i)
            else:
                if len(np.unique(X_cat_init[:, i])) == 2:
                    self.one_hot_idx.append(i)
                else:
                    self.target_idx.append(i)
        
        self.not_ordinal_idx = [i for i in range(X_cat_init.shape[1]) if i not in self.ordinal_idx] 
        self.one_hot_cols = [self.categorical_colnames[i] for i in self.one_hot_idx]
        self.target_enc_cols = [self.categorical_colnames[i] for i in self.target_idx]
  
        # (Temporarily) encode map categorical variables to integers for simulating missing values
        # Torch is unable to handle strings.
        X_cat_enc = self._map_categorical(X_cat_init)
        
        self.X = X_cat_enc

        # Evaluate groundtruth coxPH
        self.GT_weights, self.GT_concordance = self._fit_single_cox_PH(self.X_original, None).values()
        
    def _remove_event_and_duration(self, X):
        """
        Remove event and duration columns from the data.
        Re-set the indices for the categorical and numerical columns.
        This is to avoid simulating missing values with these columns.

        Parameters
        ----------
        X : pandas.DataFrame
            Data matrix.
        
        Returns
        -------
        pandas.DataFrame
            Data matrix with event and duration columns removed.
        """

        X = X.drop(columns = [self.duration_col, self.event_col])

        cat_idx = [X.columns.get_loc(col) for col in self.categorical_colnames]
        self.categorical_cols = cat_idx
        return X
    
    
    def _add_event_and_duration(self, X, duration_col, event_col):
        """
        Add event and duration columns to the data.

        Parameters
        ----------
        X : pandas.DataFrame or np.ndarray
            Data matrix.

        duration_col : pandas.Series
            Duration column.

        event_col : pandas.Series
            Event column.
        
        Returns
        -------
        pandas.DataFrame
            Data matrix with event and duration columns added.
        """

        if isinstance(X, pd.DataFrame):
            # Add columns
            X[self.duration_col] = duration_col
            X[self.event_col] = event_col
        else:
            X = np.concatenate(X, [duration_col, event_col], axis = 1)
        return X

    def _fit_single_cox_PH(self, X_train, X_test, penalizer = 0):
        """ 
        Fit cox PH model to the training data. Evaluates concordance on both train and test data.

        Parameters
        ----------
        X_train : pandas.DataFrame
            Training data. Categories are mapped back to strings.

        X_test : pandas.DataFrame
            Test data. Categories are mapped back to strings.

        penalizer : float
            Regularization strength.
        
        Returns
        -------
        dict
            Dictionary containing the weights and concordance index (train and test).

        complete model
            if fitting on the full dataset.
        """

        me = TargetEncoder(smooth = "auto", target_type = "continuous", random_state=3155135)
        sc = StandardScaler()
        cph = CoxPHSurvivalAnalysis(alpha=penalizer)
        util = Surv()

        if X_test is None:
            # Prepare X and y. This is the complete data.
            # Define proper encodings
            y = X_train[[self.event_col, self.duration_col]]
            X = X_train.drop([self.event_col, self.duration_col], axis=1)
            X = pd.get_dummies(X, columns = self.one_hot_cols, drop_first=True)
            X[self.target_enc_cols] = me.fit_transform(X[self.target_enc_cols], y[self.duration_col])
            y = y.to_records(index=False)
            X_sc = sc.fit_transform(X)
            X_sc = pd.DataFrame(X_sc, columns = sc.feature_names_in_)
            cph.fit(X_sc, y)
            weights = cph.coef_
            weights = dict(zip(X_sc.columns, weights))
            preds = cph.predict(X_sc)
            conc = concordance_index_censored(event_indicator = y[self.event_col], event_time = y[self.duration_col], estimate = preds)[0]
            conc = round(conc, 3)

            # Lifelines summary of complete model
            X = pd.get_dummies(X_train, columns = self.one_hot_cols, drop_first=True)
            X[self.target_enc_cols] = me.fit_transform(X[self.target_enc_cols], y[self.duration_col])
            # Temporarily remove y to not scale it
            y = X[[self.event_col, self.duration_col]]
            X = X.drop([self.event_col, self.duration_col], axis=1)
            X_sc = sc.fit_transform(X)
            X_sc = pd.DataFrame(X_sc, columns = sc.feature_names_in_)
            X_sc[[self.event_col, self.duration_col]] = y

            self.complete_model = CoxPHFitter(baseline_estimation_method='breslow', penalizer = penalizer)
            self.complete_model.fit(X_sc, 
                               duration_col = self.duration_col, 
                               event_col= self.event_col,
                               show_progress=False)

            return {'weights': weights, 
                    'concordance': conc}

        # One hot encode binary
        X_train = pd.get_dummies(X_train, columns = self.one_hot_cols, drop_first = True)
        X_test = pd.get_dummies(X_test, columns = self.one_hot_cols, drop_first = True)

        # Ensure all category levels are present
        for column in X_train.columns:
            if column not in X_test.columns:
                X_test[column] = 0
        X_test  = X_test[X_train.columns]

        # Technicality of converting to structured array
        y_train = util.from_arrays(event = X_train[self.event_col], time = X_train[self.duration_col], name_event = self.event_col, name_time = self.duration_col)
        y_test = util.from_arrays(event = X_test[self.event_col], time = X_test[self.duration_col], name_event = self.event_col, name_time = self.duration_col)
        X_train = X_train.drop([self.event_col, self.duration_col], axis=1)
        X_test = X_test.drop([self.event_col, self.duration_col], axis=1)

        # Target encode train and transform test
        X_train[self.target_enc_cols] = me.fit_transform(X_train[self.target_enc_cols], y_train[self.duration_col])
        X_test[self.target_enc_cols] = me.transform(X_test[self.target_enc_cols])

        # Scale X (not event and duration)
        X_train_sc = sc.fit_transform(X_train)
        X_train_sc = pd.DataFrame(X_train_sc, columns = sc.feature_names_in_)
        X_test_sc = sc.transform(X_test)
        X_test_sc = pd.DataFrame(X_test_sc, columns = sc.feature_names_in_)
        
        # For regularizing singular matrices
        alpha_values = [0, 10, 100, 1000, 10000]
        for alpha in alpha_values:
            try:
                cph = CoxPHSurvivalAnalysis(alpha = alpha)
                cph.fit(X_train_sc, y_train)
                preds_train = cph.predict(X_train_sc)
                preds_test = cph.predict(X_test_sc)
                
                conc_train = concordance_index_censored(event_indicator = y_train[self.event_col], event_time = y_train[self.duration_col], estimate = preds_train)[0]
                conc_test = concordance_index_censored(event_indicator = y_test[self.event_col], event_time = y_test[self.duration_col], estimate = preds_test)[0]
                conc_train = round(conc_train, 3)
                conc_test = round(conc_test, 3)
                
                weights = cph.coef_
                weights = dict(zip(X_train.columns, weights))

                # tbd add if regularising

                return {'weights': weights, 
                'concordance_train': conc_train,
                'concordance_test': conc_test}
            
            except Exception as e:
                print(f"Failed with alpha={alpha}: {e}")

    def complete_summary(self):
        """ 
        Return a lifelines summary of cox ph of full data.
        This is used because SciKit Survival does not have statistical information such as p-values etc.
        """

        return self.complete_model.print_summary()
    
    def _map_categorical(self, X_cat):
        """
        Temporarily map categorical variables to integers.
        X_cat is assumed to be a numpy ndarray.

        Parameters
        ----------
        X_cat : np.ndarray
            Categorical data matrix.
    
        Returns
        -------
        X_cat_enc : np.ndarray
            Encoded categorical data matrix of integers.
        """
        self.multi_le = MultiLabelEncoder()
        
        # Initialize an array of the same shape as X_cat to hold encoded data
        X_cat_enc = np.empty(shape=X_cat.shape, dtype = "object")

        # create a list of the columns that are not ordinal
        encoded_columns = self.multi_le.fit_transform(X_cat[:, self.not_ordinal_idx])
        X_cat_enc[:, self.not_ordinal_idx] = encoded_columns
        X_cat_enc[:, self.ordinal_idx] = X_cat[:, self.ordinal_idx]
        return X_cat_enc

    def _decode_categorical(self, X_cat_enc):
        """
        Decode categorical variables, excluding columns specified in self.ordinal_idx.
        self.ordinal_idx contains indices of columns that should not be decoded because they are ordered.

        Parameters
        ----------
        X_cat_enc : np.ndarray
            Encoded categorical data matrix of integers.
        
        Returns
        -------
        X_cat_dec : np.ndarray
            Decoded categorical data matrix of strings.
        """

        # Initialize an array of the same shape as X_cat_enc to hold decoded data
        X_cat_dec = np.empty(shape=X_cat_enc.shape, dtype = "object")
        decoded_columns = self.multi_le.inverse_transform(X_cat_enc[:, self.not_ordinal_idx])
        X_cat_dec[:, self.not_ordinal_idx] = decoded_columns
        X_cat_dec[:, self.ordinal_idx] = X_cat_enc[:, self.ordinal_idx]
        return X_cat_dec
     
    def _simulate_single_na_dataset(self, p_miss, mecha = "MCAR", opt = None, p_obs = None, q = None,
                                    sample_seed = 135135, column_seed = 115342):
        """
        Generate missing values for specifics missing-data mechanism and proportion of missing values. 

        Parameters
        ----------
        p_miss : float
            Proportion of missing values to generate for variables which will have missing values.

        mecha : str, 
                Indicates the missing-data mechanism to be used. "MCAR" by default, "MAR" or "MNAR".

        opt: str, 
             For mecha = "MNAR", it indicates how the missing-data mechanism is generated: using a logistic regression ("logistic"), 
             quantile censorship ("quantile") or logistic regression for generating a self-masked MNAR mechanism ("selfmasked").

        p_obs : float
                If mecha = "MAR", or mecha = "MNAR" with opt = "logistic" or "quanti", proportion of variables with *no* 
                missing values that will be used for the logistic masking model.

        q : float
            If mecha = "MNAR" and opt = "quanti", quantile level at which the cuts should occur.

        sample_seed : int
            Seed for the random number generator used to generate the missing samples.
            Also used in train test split.

        column_seed : int
            Seed for the random number generator used to generate the missing columns.

        Returns
        ----------
        A dictionary containing:
        'X_init': the initial data matrix.
        'X_na': a tensor (n, d) containing the dataset with missing values.
        'mask': a tensor (n, d) containing the mask.
        'event_and_duration': a tensor (n, 2) containing the event and duration columns.
        """

        set_sample_seed(sample_seed)
        set_column_seed(column_seed)

        to_torch = torch.is_tensor(self.X) ## output a pytorch tensor, or a numpy array
        if not to_torch:
            X = self.X.astype(np.float32)
            X = torch.from_numpy(X)
        
        if mecha == "MAR":
            mask = MAR_mask(X, p_miss, p_obs).double()
        elif mecha == "MNAR" and opt == "logistic":
            mask = MNAR_mask_logistic(X, p_miss, p_obs).double()
        elif mecha == "MNAR" and opt == "quantile":
            mask = MNAR_mask_quantiles(X, p_miss, q, 1-p_obs).double()
        elif mecha == "MNAR" and opt == "selfmasked":
            mask = MNAR_self_mask_logistic(X, p_miss).double()
        else:
            mask = MCAR_mask(X, p_miss).double()
        
        X_nas = X.clone()
        X_nas[mask.bool()] = np.nan

        # Perform a train/test split on X_init, and use the same indices to split X_na and mask
        duration_and_event = self.X_original[[self.duration_col, self.event_col]].to_numpy()

        X_init_train, X_init_test, X_nas_train, X_nas_test, mask_train, mask_test, \
        event_and_duration_train, event_and_duration_test = train_test_split(X, X_nas, mask, duration_and_event, test_size=0.3, random_state = sample_seed)

        X_init = {'train': X_init_train.double(), 'test': X_init_test.double()}
        X_nas = {'train': X_nas_train.double(), 'test': X_nas_test.double()}
        mask = {'train': mask_train.bool(), 'test': mask_test.bool()}
        duration_and_event = {'train': event_and_duration_train, 'test': event_and_duration_test}
        
        return {'X_init': X_init, 'X_na': X_nas, 'mask': mask, 'event_and_duration': duration_and_event}
    
    def _simulate_M_na_datasets(self, M, p_miss, mecha = "MCAR", opt = None, p_obs = None, q = None,
                               vary_cols = True, save = False, sample_seed = 135135, column_seed = 115342):
        """
        Function to generate M datasets with missing values.

        Parameters
        ----------
        M : int
            Number of datasets to generate.
        
        vary_cols: bool
            If True, the column_seed will vary.
            This yields the most variation when simulating MAR because retained columns will vary. 

        See other params above

        Returns
        ----------
        A dictionary containing:
        'X_init': dictionary with train and test of shape (n_train, d, M), (n_test, d, M) respectively. 
            Contains the initial data matrix, but split into train and test.
        'X_na': dictionary with train and test of shape (n_train, d, M), (n_test, d, M) respectively.
            Contains the dataset with simulated missing values, but split into train and test.
        'mask': dictionary with train and test of shape (n_train, d, M), (n_test, d, M)
            Contains the mask, but split into train and test.
        'event_and_duration': dictionary with train and test of shape (n_train, 2, M), (n_test, 2, M)
        """

        to_torch = torch.is_tensor(self.X) ## output a pytorch tensor, or a numpy array
        if not to_torch:
            X = self.X.astype(np.float32)
            X = torch.from_numpy(X)

        # Set the dimensions used for initiating tensors
        temporary_train, temporary_test = train_test_split(X, test_size=0.3, random_state = sample_seed)
        n_train = temporary_train.shape[0]
        n_test = temporary_test.shape[0]
        n = X.shape[0]
        d = X.shape[1]

        # Initialize empty tensor for the data with missing values and the mask
        event_and_duration_train = np.empty((n_train, 2, M), dtype = object)
        event_and_duration_test = np.empty((n_test, 2, M), dtype = object)
        
        X_init_tensor_train = torch.empty((n_train, d,  M))
        X_init_tensor_test = torch.empty((n_test, d, M))

        X_na_tensor_train = torch.empty((n_train, d, M))
        X_na_tensor_test = torch.empty((n_test, d, M))

        X_mask_tensor_train = torch.empty((n_train, d, M), dtype = torch.bool)
        X_mask_tensor_test = torch.empty((n_test, d, M), dtype = torch.bool)

        for i in range(M):
            X_init, X_na, X_mask, event_and_duration = self._simulate_single_na_dataset(p_miss = p_miss,
                                                                    mecha = mecha,
                                                                    p_obs = p_obs,
                                                                    sample_seed = sample_seed,
                                                                    column_seed = column_seed).values()
            
            event_and_duration_train[:, :, i] = event_and_duration['train']
            event_and_duration_test[:, :, i] = event_and_duration['test']
            
            X_init_tensor_train[:, :, i] = X_init['train'].double()
            X_init_tensor_test[:, :, i] = X_init['test'].double()

            X_na_tensor_train[:, :, i] = X_na['train'].double()
            X_na_tensor_test[:, :, i] = X_na['test'].double()

            X_mask_tensor_train[:, :, i] = X_mask['train'].bool()
            X_mask_tensor_test[:, :, i] = X_mask['test'].bool()

            # Change seeds
            sample_seed += 1
            if vary_cols:
                column_seed += 1

        if save:
            colnames = np.array(self.categorical_colnames)

            np.save("../../data/R/simulated_datasets/colnames", colnames)

            X_init_tensor_train = X_init_tensor_train.numpy()
            X_init_tensor_train = X_init_tensor_train.astype(object)
            X_init_tensor_test = X_init_tensor_test.numpy()
            X_init_tensor_test = X_init_tensor_test.astype(object)

            X_na_tensor_train = X_na_tensor_train.numpy()
            X_na_tensor_test = X_na_tensor_test.numpy() 

            X_mask_tensor_train = X_mask_tensor_train.numpy()
            X_mask_tensor_train = X_mask_tensor_train.astype(bool)
            X_mask_tensor_test = X_mask_tensor_test.numpy()
            X_mask_tensor_test = X_mask_tensor_test.astype(bool)

            X_init_cat_train = X_init_tensor_train
            X_init_cat_test = X_init_tensor_test

            X_na_cat_train = X_na_tensor_train
            X_na_cat_test = X_na_tensor_test

            # replace all nan in X_na_cat with np.nan. Prevent overflows bug
            X_na_cat_train = np.where(np.isnan(X_na_cat_train), np.nan, X_na_cat_train)
            X_na_cat_test = np.where(np.isnan(X_na_cat_test), np.nan, X_na_cat_test)
            X_na_cat_train = X_na_cat_train.astype(object)
            X_na_cat_test = X_na_cat_test.astype(object)

            for i in range(M):
                X_init_cat_train[:, :, i] = self._decode_categorical(X_init_cat_train[:, :, i])
                X_init_cat_test[:, :, i] = self._decode_categorical(X_init_cat_test[:, :, i])
                X_na_cat_train[:, :, i] = self._decode_categorical(X_na_cat_train[:, :, i])
                X_na_cat_test[:, :, i] = self._decode_categorical(X_na_cat_test[:, :, i])

            X_init_train = X_init_cat_train
            X_init_test = X_init_cat_test
            X_na_train = X_na_cat_train
            X_na_test = X_na_cat_test

            if mecha == "MCAR":
                np.save(f"../../data/R/simulated_datasets/mcar/X_init_train_M{M}p_miss{p_miss}", X_init_train)
                np.save(f"../../data/R/simulated_datasets/mcar/X_init_test_M{M}p_miss{p_miss}", X_init_test)
                np.save(f"../../data/R/simulated_datasets/mcar/X_na_train_M{M}p_miss{p_miss}", X_na_train)
                np.save(f"../../data/R/simulated_datasets/mcar/X_na_test_M{M}p_miss{p_miss}", X_na_test)
                np.save(f"../../data/R/simulated_datasets/mcar/X_mask_train_M{M}p_miss{p_miss}", X_mask_tensor_train)
                np.save(f"../../data/R/simulated_datasets/mcar/X_mask_test_M{M}p_miss{p_miss}", X_mask_tensor_test)
                np.save(f"../../data/R/simulated_datasets/mcar/event_and_dur_train_M{M}p_miss{p_miss}", event_and_duration_train)
                np.save(f"../../data/R/simulated_datasets/mcar/event_and_dur_test_M{M}p_miss{p_miss}", event_and_duration_test)
            elif mecha == "MAR":
                p_obs = round(p_obs, 4)
                np.save(f"../../data/R/simulated_datasets/mar/X_init_train_M{M}p_miss{p_miss}p_obs{p_obs}", X_init_train)
                np.save(f"../../data/R/simulated_datasets/mar/X_init_test_M{M}p_miss{p_miss}p_obs{p_obs}", X_init_test)
                np.save(f"../../data/R/simulated_datasets/mar/X_na_train_M{M}p_miss{p_miss}p_obs{p_obs}", X_na_train)
                np.save(f"../../data/R/simulated_datasets/mar/X_na_test_M{M}p_miss{p_miss}p_obs{p_obs}", X_na_test)
                np.save(f"../../data/R/simulated_datasets/mar/X_mask_train_M{M}p_miss{p_miss}p_obs{p_obs}", X_mask_tensor_train)
                np.save(f"../../data/R/simulated_datasets/mar/X_mask_test_M{M}p_miss{p_miss}p_obs{p_obs}", X_mask_tensor_test)
                np.save(f"../../data/R/simulated_datasets/mar/event_and_dur_train_M{M}p_miss{p_miss}p_obs{p_obs}", event_and_duration_train)
                np.save(f"../../data/R/simulated_datasets/mar/event_and_dur_test_M{M}p_miss{p_miss}p_obs{p_obs}", event_and_duration_test)
            return None

        X_init = {'train': X_init_tensor_train, 'test': X_init_tensor_test}
        X_na = {'train': X_na_tensor_train, 'test': X_na_tensor_test}
        mask = {'train': X_mask_tensor_train, 'test': X_mask_tensor_test}
        event_and_duration = {'train': event_and_duration_train, 'test': event_and_duration_test}

        return {'X_init': X_init, 'X_na': X_na, 'mask': mask, 'event_and_duration': event_and_duration}

    
    def _evaluate_bias_weights(self, weights):
        """
        Takes in weights of the ith dataset in M and calculates the absolute bias for each feature.

        Parameters
        ----------
        weights : dict
            Dictionary containing the weights of the cox PH model.
            Key is the feature name and value is the weight.

        Returns
        -------
        abs_bias : dict
            Dictionary containing the absolute bias for each feature.
        """

        GT_weights = self.GT_weights
        abs_bias = {}
        for key, value in weights.items():
            abs_bias[key] = (value - GT_weights[key])/abs(GT_weights[key])
        return abs_bias
    
    @staticmethod
    def convert_datatypes(arr):
        """
        Convert the data types of the numpy arrays to the correct types.
        """
        shape = arr.shape
        
        # Flatten the array to iterate over it
        flat_arr = arr.flatten()
        
        converted = []
        for item in flat_arr:
            try:
                converted.append(float(item))
            except ValueError:
                converted.append(item)
        return np.array(converted, dtype=object).reshape(shape)

    @staticmethod
    def _map_p_obs(p_obs):
        map_obs = {'0.888889': '8/9',
                '0.777778': '7/9',
                '0.666667': '6/9',
                '0.555556': '5/9',
                '0.444444': '4/9'}

        # Given p_obs is the float, convert to string representation from the dict
        p_obs = map_obs[str(p_obs)]
    
    @staticmethod
    def mean_and_std_list(input_list):
        means = [x[0] for x in input_list]
        stds = [x[1] for x in input_list]

        mean_of_means = np.mean(means)
        mean_of_stds = np.mean(stds)

        result = (mean_of_means, mean_of_stds)
        return result
    
    @staticmethod
    def mean_and_std_df(column):
        """
        Given a datframe with tuple pairs (mean, std) as rows, extract the expected
        mean and std.
        """

        means = [x[0] for x in column]
        stds = [x[1] for x in column]
        return (np.mean(means), np.mean(stds))
    
class MiceSimulationMCAR(MiceSimulationMV):
    """
    Subclass of MicecSimulationMV to simulate MCAR missing values and impute with MICE.
    """

    def __init__(self, X, duration_col, event_col, cat_colnames):
        super().__init__(X, duration_col, event_col, cat_colnames)

    def _simulate_MCAR_dataset(self, M, p_miss, save, sample_seed = 135135, column_seed = 115342):
        """
        Simulate M datasets with MCAR missing values. See parent class for more details.
        """

        self._simulate_M_na_datasets(M = M,
                                     p_miss = p_miss,
                                     mecha = "MCAR",
                                     sample_seed = sample_seed,
                                     column_seed = column_seed,
                                     save = save)
        
    def _generate_mice_datasets(self, M, p_miss = [0.1, 0.2, 0.3, 0.4, 0.5]):
        """
        Generate datasets to save to disk for R to read.

        Parameters
        ----------
        M : int
            Number of datasets to generate.

        p_miss : list
            List of proportions of missing values to generate for variables which will have missing values.
        """

        self.p_miss = p_miss
        self.M = M

        # Get mean and std of each p_miss
        for miss in p_miss:
            self._simulate_MCAR_dataset(M = M, p_miss = miss, save = True)

    def _read_single_simulated_dataset(self, M, p_miss):
        """ 
        Method to read from disk the simulated, unimputed, datasets used for imputing with MICE. 

        Parameters
        ----------
        M : int
            Number of datasets to generate.
        
        p_miss : float
            Proportion of missing values to generate for variables which will have missing values.
        """
        X_init_train = np.load(f"../../data/R/simulated_datasets/mcar/X_init_train_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        X_init_test = np.load(f"../../data/R/simulated_datasets/mcar/X_init_test_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        X_mask_train = np.load(f"../../data/R/simulated_datasets/mcar/X_mask_train_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        X_mask_test = np.load(f"../../data/R/simulated_datasets/mcar/X_mask_test_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        event_and_duration_train = np.load(f"../../data/R/simulated_datasets/mcar/event_and_dur_train_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        event_and_duration_test = np.load(f"../../data/R/simulated_datasets/mcar/event_and_dur_test_M{M}p_miss{p_miss}.npy", allow_pickle=True)
        
        X_init = {'train': X_init_train, 'test': X_init_test}
        X_mask = {'train': X_mask_train, 'test': X_mask_test}
        event_and_duration = {'train': event_and_duration_train, 'test': event_and_duration_test}

        return {'X_init': X_init, 'X_mask': X_mask, 'event_and_duration': event_and_duration}
    
    def _read_single_mice_tensor(self, p_miss):
        """ 
        Read the train and test tensors representing mice imputations. 
        They are of shape (n_train, d, G, M) or (n_test, d, G, M) respectively.
        G represents the number of imputed datasets by MICE, and M is the number of datasets generated with missing values.  
        """

        X_imp_train = np.load(f"../../data/R/imputed_datasets/mcar/X_mice_tensor_train_G{self.G}_p_miss{p_miss}.npy", allow_pickle=True)
        X_imp_test = np.load(f"../../data/R/imputed_datasets/mcar/X_mice_tensor_test_G{self.G}_p_miss{p_miss}.npy", allow_pickle=True)
        X_imp_train = self.convert_datatypes(X_imp_train)
        X_imp_test = self.convert_datatypes(X_imp_test)
        return {"train": X_imp_train, "test": X_imp_test}
    
    def _evaluate_single_mice_acc(self, X_imp, X_true, X_mask):
        """ 
        Given a jth simulated dataset, evaluate the accuracy of the MICE imputations. 
        """

        X_imp_cat = X_imp
        X_true_cat = X_true
        X_mask_cat = X_mask

        acc = {"categorical": {"mice": []}}

        for i in range(X_imp.shape[2]):
            acc["categorical"]["mice"].append(accuracy(X_imp = X_imp_cat[:, :, i],
                                                    X_true = X_true_cat[:, :, i],
                                                    mask = X_mask_cat[:, :, i]))
        for key, value in acc.items():
            for k, v in value.items():
                acc[key][k] = np.matrix(v)
                acc[key][k] = (acc[key][k].mean().round(4), acc[key][k].std().round(4))
        return acc
                
    def _evaluate_mice_coxPH(self, X_imp_train, X_imp_test, event_and_duration_train, event_and_duration_test):
        """
        Evaluate the multivariate cox PH model on the multiple imputed datasets.
        Data is already decoded.
        """

        c_index_train = []
        c_index_test = []
        bias = []

        # concatinate X_train and event and duration
        X_imp_train = np.concatenate((X_imp_train, event_and_duration_train), axis = 1)
        X_imp_test = np.concatenate((X_imp_test, event_and_duration_test), axis = 1)
        
        for i in range(X_imp_train.shape[2]):
            data_train = pd.DataFrame(X_imp_train[:, :, i], columns = self.categorical_colnames + [self.duration_col, self.event_col])
            data_test = pd.DataFrame(X_imp_test[:, :, i], columns = self.categorical_colnames + [self.duration_col, self.event_col])
            weights, conc_train, conc_test = self._fit_single_cox_PH(data_train, data_test).values()
            c_index_train.append(conc_train)
            c_index_test.append(conc_test)
            
            bias_weights = self._evaluate_bias_weights(weights)
            bias.append(bias_weights)

        c_index_train = (np.array(c_index_train).mean().round(3), np.array(c_index_train).std().round(3))
        c_index_test = (np.array(c_index_test).mean().round(3), np.array(c_index_test).std().round(3))

        bias_df = pd.DataFrame(bias)
        bias = dict(zip(bias_df.columns, zip(round(bias_df.mean(),4), round(bias_df.std(), 4))))

        return {'c_index_train': c_index_train, 'c_index_test': c_index_test, 'bias': bias}

    def _run_mice_imputations(self, M, p_miss):
        """
        Run MICE imputation on M datasets with MCAR missing values.
        Assumes method self._generate_mice_datasets(.) has been ran.
        
        Parameters
        ----------
        M : int
            Number of datasets to generate.

        p_miss : float
            Proportion of missing values to generate for variables which will have missing values.

        Returns
        ----------
        dict
            Dictionary containing train and test accuracy, concordance index and bias of the imputed datasets.
        """

        X_init = self._read_single_simulated_dataset(M, p_miss)["X_init"]
        X_mask = self._read_single_simulated_dataset(M, p_miss)["X_mask"]
        event_and_duration = self._read_single_simulated_dataset(M, p_miss)["event_and_duration"]
        
        X_init_train = X_init['train']
        X_init_test = X_init['test']
        X_mask_train = X_mask['train']
        X_mask_test = X_mask['test']
        event_and_duration_train = event_and_duration['train']           
        event_and_duration_test = event_and_duration['test']

        # Recall imputations are tensors of dimensions (n, d, g, M)
        imputed_train, imputed_test = self._read_single_mice_tensor(p_miss).values()
        
        M = imputed_train.shape[3]
        g = imputed_train.shape[2]

        # We need g copies of the initial data and the mask because we have g imputed datasets
        # Thus, we reshape the initial data and the mask to (n, d, G, M)
        X_init_train = np.repeat(X_init_train[:, :, np.newaxis], g, axis=2)
        X_init_test = np.repeat(X_init_test[:, :, np.newaxis], g, axis=2)

        X_mask_train = np.repeat(X_mask_train[:, :, np.newaxis], g, axis=2)
        X_mask_test = np.repeat(X_mask_test[:, :, np.newaxis], g, axis=2)

        event_and_duration_train = np.repeat(event_and_duration_train[:, :, np.newaxis], g, axis=2)
        event_and_duration_test = np.repeat(event_and_duration_test[:, :, np.newaxis], g, axis=2)

        acc_train = {"categorical": {"mice": []}}
        
        acc_test = {"categorical": {"mice": []}}

        c_index_train = []
        c_index_test = []
        bias = []
        
        for i in range(M):
            # accuracy
            acc_dict_train = self._evaluate_single_mice_acc(imputed_train[:, :, :, i], X_init_train[:, :, :, i], X_mask_train[:, :, :, i])
            acc_dict_test = self._evaluate_single_mice_acc(imputed_test[:, :, :, i], X_init_test[:, :, :, i], X_mask_test[:, :, :, i])

            acc_train["categorical"]["mice"].append((acc_dict_train["categorical"]["mice"][0], acc_dict_train["categorical"]["mice"][1]))
            acc_test["categorical"]["mice"].append((acc_dict_test["categorical"]["mice"][0], acc_dict_test["categorical"]["mice"][1]))

            # Bias and c-index
            conc_train, conc_test, weight_bias = self._evaluate_mice_coxPH(imputed_train[:, :, :, i], imputed_test[:, :, :, i], \
                                                                          event_and_duration_train[:, :, :, i], event_and_duration_test[:, :, :, i]).values()
            c_index_train.append(conc_train)
            c_index_test.append(conc_test)
            bias.append(weight_bias)

        # Take average of mean and std
        for key, value in acc_train.items():
            for k, v in value.items():
                acc_train[key][k] = np.matrix(v)
                acc_train[key][k] = acc_train[key][k].mean(axis = 0).round(4).flatten()

        for key, value in acc_test.items():
            for k, v in value.items():
                acc_test[key][k] = np.matrix(v)
                acc_test[key][k] = acc_test[key][k].mean(axis = 0).round(4).flatten()

        c_index_train = self.mean_and_std_list(c_index_train)
        c_index_test = self.mean_and_std_list(c_index_test)

        bias_df = pd.DataFrame(bias)
        bias = {col: self.mean_and_std_df(bias_df[col]) for col in bias_df.columns}

        train = {"acc": acc_train, "c_index": c_index_train, "bias": bias}
        test = {"acc": acc_test, "c_index": c_index_test, "bias": bias}
        return {"train:": train, "test": test}

    def _get_mice_acc(self, acc_in):
        """
        Get the MICE accuracy for the imputed datasets.

        Parameters
        ----------
        acc_in : dict
            Dictionary containing the accuracy for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the accuracy for the imputed datasets of each p_miss.
        """

        acc_cols = pd.MultiIndex.from_tuples([
                        ('Categorical', 'MICE')
                        ])
        acc_row = []

        for miss in self.p_miss:
            acc = acc_in[self.p_miss.index(miss)]
        
            mice_mean_cat = acc['categorical']['mice'][0]
            mice_std_cat = acc['categorical']['mice'][1]

            row = [f"{mice_mean_cat:.4f} ± {mice_std_cat:.4f}"]
            acc_row.append(row)
        
        acc_out = pd.DataFrame(acc_row, index = self.p_miss , columns = acc_cols)
        acc_out.index.name = 'p_miss'

        return acc_out
    
    def _get_mice_c_index(self, c_index_in):
        """
        Get the multivariate concordance index for the imputed datasets.

        Parameters
        ----------
        c_index_in : list
            List containing the concordance index for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the concordance index for the imputed datasets of each p_miss.
        """

        column = ['mice']
        conc_row = []

        for miss in self.p_miss:
            conc = c_index_in[self.p_miss.index(miss)]
            mice_mean = conc[0]
            mice_std = conc[1]
            row = [f"{mice_mean:.4f} ± {mice_std:.4f}"]
            conc_row.append(row)

        conc_out = pd.DataFrame(conc_row, index = self.p_miss , columns = column)
        conc_out.index.name = 'p_miss'

        return conc_out

    def _get_mice_bias(self):
        """
        Get the mice bias for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the bias for the imputed datasets of each p_miss.
        """

        feature_cols = list(self.bias[0].keys())
        bias_row = []

        for miss in self.p_miss:
            mice_bias = list(self.bias[self.p_miss.index(miss)].values())

            mice_row = []
            for column_index in range(len(mice_bias)):
                mice_bias_mean = mice_bias[column_index][0]
                mice_bias_std = mice_bias[column_index][1]
                mice_row.append(f"{mice_bias_mean:.4f} ± {mice_bias_std:.4f}")
            row = mice_row
            bias_row.append(row)

        bias = pd.DataFrame(bias_row, index = self.p_miss , columns = feature_cols)
        bias.index.name = 'p_miss'
        return bias

    def simulate_mice(self, M, G, p_miss = [0.1, 0.2, 0.3, 0.4, 0.5]):
        """
        Run a simulation with mice imputations.
        """
        self.p_miss = p_miss
        self.G = G

        self.acc_train = []
        self.acc_test = []
        self.conc_train = []
        self.conc_test = []
        self.bias = []

        for miss in self.p_miss:
            print(f"Currently simulating for p_miss: {miss}")
            train, test = self._run_mice_imputations(M = M, p_miss = miss).values()

            acc_train = train['acc']
            conc_train = train['c_index']
            acc_test = test['acc']
            conc_test = test['c_index']
            bias = train['bias']
                    
            self.acc_train.append(acc_train)
            self.acc_test.append(acc_test)
            self.conc_train.append(conc_train)
            self.conc_test.append(conc_test)
            self.bias.append(bias)

    def get_mice_results(self):
        """ 
        Get the results of the mice imputation simulation.
        
        Returns
        -------
        dict
            A dictionary containing the accuracy, concordance index and bias for the multivariate imputation simulation.
            The dictionary values are dataframes.
        """

        acc_train, acc_test = self._get_mice_acc(self.acc_train), self._get_mice_acc(self.acc_test)
        conc_train, conc_test = self._get_mice_c_index(self.conc_train), self._get_mice_c_index(self.conc_test)
        bias = self._get_mice_bias()

        return {'acc_train': acc_train, 'acc_test': acc_test, 'c_index_train': conc_train, 'c_index_test': conc_test, 'bias': bias}

class MiceSimulationMAR(MiceSimulationMV):
    """
    Subclass of MicecSimulationMV to simulate MAR missing values and impute with MICE.
    """

    def __init__(self, X, duration_col, event_col, cat_colnames):
        super().__init__(X, duration_col, event_col, cat_colnames)

    def _simulate_MAR_dataset(self, M, p_miss, p_obs, save, sample_seed = 135135, column_seed = 115342):
        """
        Simulate M datasets with MAR missing values. See parent class for more details.
        """

        self._simulate_M_na_datasets(M = M,
                                     p_miss = p_miss,
                                     p_obs = p_obs,
                                     mecha = "MAR",
                                     sample_seed = sample_seed,
                                     column_seed = column_seed,
                                     save = save)
        
    def _generate_mice_datasets(self, M, p_miss = [0.1, 0.2, 0.3, 0.4, 0.5], p_obs = [8/9, 7/9, 6/9, 5/9, 4/9]):
        """
        Generate datasets to save to disk for R to read.

        Parameters
        ----------
        M : int
            Number of datasets to generate.

        p_miss : list
            List of proportions of missing values to generate for variables which will have missing values.

        p_obs : list
            List of proportions of missing values to retain.
        """

        for miss in p_miss:
            for obs in p_obs:
                self._simulate_MAR_dataset(M = M, p_miss = miss, p_obs = obs, save = True)

    def _read_single_simulated_dataset(self, M, p_miss, p_obs):
        """ 
        Method to read from disk the simulated, unimputed, datasets used for imputing with MICE. 

        Parameters
        ----------
        M : int
            Number of datasets to generate.
        
        p_miss : float
            Proportion of missing values to generate for variables which will have missing values.

        p_obs : float
            Proportion of missing values to retain.
        
        Returns
        -------
        dict
            Dictionary containing the train and test datasets, the mask and the event and duration.
        """

        X_init_train = np.load(f"../../data/R/simulated_datasets/mar/X_init_train_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        X_init_test = np.load(f"../../data/R/simulated_datasets/mar/X_init_test_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        X_mask_train = np.load(f"../../data/R/simulated_datasets/mar/X_mask_train_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        X_mask_test = np.load(f"../../data/R/simulated_datasets/mar/X_mask_test_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        event_and_duration_train = np.load(f"../../data/R/simulated_datasets/mar/event_and_dur_train_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        event_and_duration_test = np.load(f"../../data/R/simulated_datasets/mar/event_and_dur_test_M{M}p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        
        X_init = {'train': X_init_train, 'test': X_init_test}
        X_mask = {'train': X_mask_train, 'test': X_mask_test}
        event_and_duration = {'train': event_and_duration_train, 'test': event_and_duration_test}

        return {'X_init': X_init, 'X_mask': X_mask, 'event_and_duration': event_and_duration}
    
    def _read_single_mice_tensor(self, p_miss, p_obs):
        """ 
        Read the train and test tensors representing mice imputations. 
        They are of shape (n_train, d, G, M) or (n_test, d, G, M) respectively.
        G represents the number of imputed datasets by MICE, and M is the number of datasets generated with missing values.  
        """

        X_imp_train = np.load(f"../../data/R/imputed_datasets/mar/X_mice_tensor_train_G{self.G}_p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)
        X_imp_test = np.load(f"../../data/R/imputed_datasets/mar/X_mice_tensor_test_G{self.G}_p_miss{p_miss}p_obs{p_obs}.npy", allow_pickle=True)

        X_imp_train = self.convert_datatypes(X_imp_train)
        X_imp_test = self.convert_datatypes(X_imp_test)
        return {"train": X_imp_train, "test": X_imp_test}
    
    def _evaluate_single_mice_acc(self, X_imp, X_true, X_mask):
        """ 
        Given a jth simulated dataset, evaluate the accuracy of the MICE imputations. 
        """

        X_imp_cat = X_imp
        X_true_cat = X_true
        X_mask_cat = X_mask

        acc = {"categorical": {"mice": []}}

        for i in range(X_imp.shape[2]):
            acc["categorical"]["mice"].append(accuracy(X_imp = X_imp_cat[:, :, i],
                                                    X_true = X_true_cat[:, :, i],
                                                    mask = X_mask_cat[:, :, i]))
        for key, value in acc.items():
            for k, v in value.items():
                acc[key][k] = np.matrix(v)
                acc[key][k] = (acc[key][k].mean().round(4), acc[key][k].std().round(4))
        return acc
                
    def _evaluate_mice_coxPH(self, X_imp_train, X_imp_test, event_and_duration_train, event_and_duration_test):
        """
        Evaluate the multivariate cox PH model on the multiple imputed datasets.
        Data is already decoded.
        """

        c_index_train = []
        c_index_test = []
        bias = []

        # concatinate X_train and event and duration
        X_imp_train = np.concatenate((X_imp_train, event_and_duration_train), axis = 1)
        X_imp_test = np.concatenate((X_imp_test, event_and_duration_test), axis = 1)

        for i in range(X_imp_train.shape[2]):
            data_train = pd.DataFrame(X_imp_train[:, :, i], columns = self.categorical_colnames + [self.duration_col, self.event_col])
            data_test = pd.DataFrame(X_imp_test[:, :, i], columns = self.categorical_colnames + [self.duration_col, self.event_col])
            weights, conc_train, conc_test = self._fit_single_cox_PH(data_train, data_test).values()

            c_index_train.append(conc_train)
            c_index_test.append(conc_test)
            
            bias_weights = self._evaluate_bias_weights(weights)
            bias.append(bias_weights)

        c_index_train = (np.array(c_index_train).mean().round(3), np.array(c_index_train).std().round(3))
        c_index_test = (np.array(c_index_test).mean().round(3), np.array(c_index_test).std().round(3))

        bias_df = pd.DataFrame(bias)
        bias = dict(zip(bias_df.columns, zip(round(bias_df.mean(),4), round(bias_df.std(), 4))))

        return {'c_index_train': c_index_train, 'c_index_test': c_index_test, 'bias': bias}

    def _run_mice_imputations(self, M, p_miss, p_obs):
        """
        Run MICE imputation on M datasets with MAR missing values.
        Assumes method self._generate_mice_datasets(.) has been ran.
        
        Parameters
        ----------
        M : int
            Number of datasets to generate.

        p_miss : float
            Proportion of missing values to generate for variables which will have missing values.

        Returns
        ----------
        dict
            Dictionary containing train and test accuracy, concordance index and bias of the imputed datasets.
        """
        p_obs = round(p_obs, 4)

        X_init = self._read_single_simulated_dataset(M, p_miss, p_obs)["X_init"]
        X_mask = self._read_single_simulated_dataset(M, p_miss, p_obs)["X_mask"]
        event_and_duration = self._read_single_simulated_dataset(M, p_miss, p_obs)["event_and_duration"]
        
        X_init_train = X_init['train']
        X_init_test = X_init['test']
        X_mask_train = X_mask['train']
        X_mask_test = X_mask['test']
        event_and_duration_train = event_and_duration['train']           
        event_and_duration_test = event_and_duration['test']


        # Recall imputations are tensors of dimensions (n, d, G, M)
        imputed_train, imputed_test = self._read_single_mice_tensor(p_miss, p_obs).values()    
        
        M = imputed_train.shape[3]
        G = imputed_train.shape[2]

        # We need G copies of the initial data and the mask because we have G imputed datasets
        # Thus, we reshape the initial data and the mask to (n, d, G, M)
        X_init_train = np.repeat(X_init_train[:, :, np.newaxis], G, axis=2)
        X_init_test = np.repeat(X_init_test[:, :, np.newaxis], G, axis=2)

        X_mask_train = np.repeat(X_mask_train[:, :, np.newaxis], G, axis=2)
        X_mask_test = np.repeat(X_mask_test[:, :, np.newaxis], G, axis=2)

        event_and_duration_train = np.repeat(event_and_duration_train[:, :, np.newaxis], G, axis=2)
        event_and_duration_test = np.repeat(event_and_duration_test[:, :, np.newaxis], G, axis=2)

        acc_train = {"categorical": {"mice": []}}
        
        acc_test = {"categorical": {"mice": []}}

        c_index_train = []
        c_index_test = []
        bias = []
        
        for i in range(M):
            # accuracy
            acc_dict_train = self._evaluate_single_mice_acc(imputed_train[:, :, :, i], X_init_train[:, :, :, i], X_mask_train[:, :, :, i])
            acc_dict_test = self._evaluate_single_mice_acc(imputed_test[:, :, :, i], X_init_test[:, :, :, i], X_mask_test[:, :, :, i])

            acc_train["categorical"]["mice"].append((acc_dict_train["categorical"]["mice"][0], acc_dict_train["categorical"]["mice"][1]))
            acc_test["categorical"]["mice"].append((acc_dict_test["categorical"]["mice"][0], acc_dict_test["categorical"]["mice"][1]))

            # Bias and c-index
            conc_train, conc_test, weight_bias = self._evaluate_mice_coxPH(imputed_train[:, :, :, i], imputed_test[:, :, :, i], \
                                                                          event_and_duration_train[:, :, :, i], event_and_duration_test[:, :, :, i]).values()
            c_index_train.append(conc_train)
            c_index_test.append(conc_test)
            bias.append(weight_bias)

        # Take average of mean and std
        for key, value in acc_train.items():
            for k, v in value.items():
                acc_train[key][k] = np.matrix(v)
                acc_train[key][k] = acc_train[key][k].mean(axis = 0).round(4).flatten()

        for key, value in acc_test.items():
            for k, v in value.items():
                acc_test[key][k] = np.matrix(v)
                acc_test[key][k] = acc_test[key][k].mean(axis = 0).round(4).flatten()

        c_index_train = self.mean_and_std_list(c_index_train)
        c_index_test = self.mean_and_std_list(c_index_test)

        bias_df = pd.DataFrame(bias)
        bias = {col: self.mean_and_std_df(bias_df[col]) for col in bias_df.columns}

        train = {"acc": acc_train, "c_index": c_index_train, "bias": bias}
        test = {"acc": acc_test, "c_index": c_index_test, "bias": bias}
        return {"train:": train, "test": test}

    def _get_mice_acc(self, acc_in):
        """
        Get the MICE accuracy for the imputed datasets.

        Parameters
        ----------
        acc_in : dict
            Dictionary containing the accuracy for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the accuracy for the imputed datasets of each p_miss.
        """

        acc_cols = pd.MultiIndex.from_tuples([
                ('Categorical', 'mice')
                ])
        index = ((miss, obs) for miss in self.p_miss for obs in self.p_obs)
        row_index = pd.MultiIndex.from_tuples(index, names=['p_miss', 'p_obs'])
        acc_row = []

        for miss in self.p_miss:
            for obs in self.p_obs:
                acc = acc_in[self.p_miss.index(miss), self.p_obs.index(obs)]        
                mice_mean_cat = acc['categorical']['mice'][0]
                mice_std_cat = acc['categorical']['mice'][1]

                row = [f"{mice_mean_cat:.4f} ± {mice_std_cat:.4f}"]
                acc_row.append(row)

        return pd.DataFrame(acc_row, index = row_index, columns = acc_cols)
    
    def _get_mice_c_index(self, c_index_in):
        """
        Get the multivariate concordance index for the imputed datasets.

        Parameters
        ----------
        c_index_in : list
            List containing the concordance index for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the concordance index for the imputed datasets of each p_miss.
        """

        column = ['mice']
        index = ((miss, obs) for miss in self.p_miss for obs in self.p_obs)
        row_index = pd.MultiIndex.from_tuples(index, names=['p_miss', 'p_obs'])
        conc_row = []

        for miss in self.p_miss:
            for obs in self.p_obs:
                conc = c_index_in[self.p_miss.index(miss), self.p_obs.index(obs)]
                mice_mean = conc[0]
                mice_std = conc[1]
                row = [f"{mice_mean:.4f} ± {mice_std:.4f}"]
                conc_row.append(row)

        return pd.DataFrame(conc_row, index = row_index, columns = column)

    def _get_mice_bias(self):
        """
        Get the mice bias for the imputed datasets.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the bias for the imputed datasets of each p_miss and p_obs.
        """

        feature_cols = list(self.bias[0][0].keys())
        index = ((miss, obs) for miss in self.p_miss for obs in self.p_obs)
        row_index = pd.MultiIndex.from_tuples(index, names=['p_miss', 'p_obs'])
        bias_row = []

        for miss in self.p_miss:
            for obs in self.p_obs:
                mice_bias = list(self.bias[self.p_miss.index(miss), self.p_obs.index(obs)].values())
                mice_row = []
                for column_index in range(len(mice_bias)):
                    mice_bias_mean = mice_bias[column_index][0]
                    mice_bias_std = mice_bias[column_index][1]
                    mice_row.append(f"{mice_bias_mean:.4f} ± {mice_bias_std:.4f}")
                bias_row.append(mice_row)

        return pd.DataFrame(bias_row, index = row_index, columns = feature_cols)

    def simulate_mice(self, M, G, p_miss = [0.1, 0.2, 0.3, 0.4, 0.5], p_obs = [8/9, 7/9, 6/9, 5/9, 4/9]):
        """
        Run a simulation with mice imputations.
        """

        self.p_miss = p_miss
        self.p_obs = p_obs
        self.G = G

        self.acc_train = np.empty((len(p_miss), len(p_obs)), dtype=object)
        self.acc_test = np.empty((len(p_miss), len(p_obs)), dtype=object)
        self.conc_train = np.empty((len(p_miss), len(p_obs)), dtype=object)
        self.conc_test = np.empty((len(p_miss), len(p_obs)), dtype=object)
        self.bias = np.empty((len(p_miss), len(p_obs)), dtype=object)

        for miss in self.p_miss:
            print(f"Currently simulating for p_miss: {miss}")
            for obs in p_obs:
                train, test = self._run_mice_imputations(M = M, p_miss = miss, p_obs=obs).values()

                acc_train = train['acc']
                conc_train = train['c_index']
                acc_test = test['acc']
                conc_test = test['c_index']
                bias = train['bias']
                        
                self.acc_train[p_miss.index(miss), p_obs.index(obs)] = acc_train
                self.acc_test[p_miss.index(miss), p_obs.index(obs)] = acc_test
                self.conc_train[p_miss.index(miss), p_obs.index(obs)] = conc_train
                self.conc_test[p_miss.index(miss), p_obs.index(obs)] = conc_test
                self.bias[p_miss.index(miss), p_obs.index(obs)] = bias

    def get_mice_results(self):
        """ 
        Get the results of the mice imputation simulation.
        
        Returns
        -------
        dict
            A dictionary containing the accuracy, concordance index and bias for the multivariate imputation simulation.
            The dictionary values are dataframes.
        """
        acc_train, acc_test = self._get_mice_acc(self.acc_train), self._get_mice_acc(self.acc_test)
        conc_train, conc_test = self._get_mice_c_index(self.conc_train), self._get_mice_c_index(self.conc_test)
        bias = self._get_mice_bias()

        return {'acc_train': acc_train, 'acc_test': acc_test, 'c_index_train': conc_train, 'c_index_test': conc_test, 'bias': bias}

## Reading data

In [7]:
df = pd.read_csv('../../data/colon/colon.csv', sep=',', index_col=0)
# Start by subetting data where etype == 2
df = df[df['etype'] == 2]
# drop node4 column 
df = df.drop('node4', axis=1)
# df.drop_duplicates(inplace=True)
df.dropna(axis=0, inplace=True)
#df['status'] = df['status'].map({0: False, 1: True}) 
cox_ph_cols = df.columns.drop(['id', 'study', 'age', 'etype'])

to_keep = [col for col in df.columns if col not in ['etype', 'study', 'id', 'age']]
df = df[to_keep]
df = df.reset_index(drop=True)

df['sex'] = df['sex'].map({1: 'M', 0: 'F'})
df['obstruct'] = df['obstruct'].map({1: 'Y', 0: 'N'})
df['perfor'] = df['perfor'].map({1: 'Y', 0: 'N'})
df['adhere'] = df['adhere'].map({1: 'Y', 0: 'N'})
#df['differ'] = df['differ'].map({1: 'Well', 2: 'Moderate', 3: 'Poor'})
df['extent'] = df['extent'].map({1: 'Submucosa', 2: 'Muscule', 3: 'Serosa', 4: 'Contiguous_structures'})
df['surg'] = df['surg'].map({1: 'L', 0: 'S'})
df['status'] = df['status'].map({1: True, 0: False})
df["nodes"] = pd.cut(df["nodes"], bins = [-1, 1, 3, 7, 100], labels = [1.0, 2.0, 3.0, 4.0])
df.head()


Unnamed: 0,rx,sex,obstruct,perfor,adhere,nodes,status,differ,extent,surg,time
0,Lev+5FU,M,N,N,N,3.0,True,2.0,Serosa,S,1521
1,Lev+5FU,M,N,N,N,1.0,False,2.0,Serosa,S,3087
2,Obs,F,N,N,Y,3.0,True,2.0,Muscule,S,963
3,Lev+5FU,F,Y,N,N,3.0,True,2.0,Serosa,L,293
4,Obs,M,N,N,N,4.0,True,2.0,Serosa,L,659


In [6]:
cat_colnames = ['rx', 'sex', 'obstruct', 'perfor', 'adhere', 'nodes', 'differ', 'extent', 'surg']
num_colnames = []

## Running simulations

### Generate data for R

In [17]:
M = 50
G = 50

p_miss = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
p_obs = [8/9, 7/9, 6/9, 5/9, 4/9, 3/9]

In [18]:
to_R = MiceSimulationMCAR(df, duration_col = 'time', event_col = 'status', cat_colnames = cat_colnames)
to_R._generate_mice_datasets(M = M, p_miss = p_miss)

In [None]:
to_R = MiceSimulationMAR(df, duration_col = 'time', event_col = 'status', cat_colnames = cat_colnames)
to_R._generate_mice_datasets(M = 50, p_miss = p_miss, p_obs = p_obs)

### MCAR

In [None]:
mice_mcar = MiceSimulationMCAR(df, duration_col = 'time', event_col = 'status', cat_colnames = cat_colnames)
mice_mcar.simulate_mice(M = M, G = G, p_miss = p_miss)

In [30]:
acc_train, acc_test, conc_train, conc_test, bias = mice_mcar.get_mice_results().values()
acc_train.to_csv(f'../results/acc_train_MCAR{M}_MICE.csv')
acc_test.to_csv(f'../results/acc_test_MCAR{M}_MICE.csv')
conc_train.to_csv(f'../results/c_index_train_MCAR{M}_MICE.csv')
conc_test.to_csv(f'../results/c_index_test_MCAR{M}_MICE.csv')
bias.to_csv(f'../results/bias_MCAR{M}_MICE.csv')

### MAR

In [31]:
mice_mar = MiceSimulationMAR(df, duration_col = 'time', event_col = 'status', cat_colnames = cat_colnames)
mice_mar.simulate_mice(M = M, G = G, p_miss = p_miss, p_obs = p_obs)

Currently simulating for p_miss: 0.05
Currently simulating for p_miss: 0.1
Currently simulating for p_miss: 0.2
Currently simulating for p_miss: 0.3
Currently simulating for p_miss: 0.4
Currently simulating for p_miss: 0.5


In [32]:
acc_train, acc_test, conc_train, conc_test, bias = mice_mar.get_mice_results().values()
acc_train.to_csv(f'../results/acc_train_MAR{M}_MICE.csv')
acc_test.to_csv(f'../results/acc_test_MAR{M}_MICE.csv')
conc_train.to_csv(f'../results/c_index_train_MAR{M}_MICE.csv')
conc_test.to_csv(f'../results/c_index_test_MAR{M}_MICE.csv')
bias.to_csv(f'../results/bias_MAR{M}_MICE.csv')