# Simulating Missing Data with `MissMechaGenerator`

This notebook demonstrates how to simulate missing data using the `MissMechaGenerator` from the `missmecha` package.

We use:
- A complete synthetic dataset (`data_num`)
- A single missingness mechanism applied globally across all features
- No per-column customization

For each configuration, we display:
- The generated missing data
- The overall missingness rate
- Little's MCAR test result (to assess if data *may* be MCAR)

## Setup
Import required packages


In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from missmecha import MissMechaGenerator
from missmecha.analysis import compute_missing_rate, MCARTest

### Generate Complete Data


In [2]:
# Create a synthetic numeric dataset
data_num = np.random.default_rng(1).normal(loc=0.0, scale=1.0, size=(1000, 10))
data_num[:5]

array([[ 0.34558419,  0.82161814,  0.33043708, -1.30315723,  0.90535587,
         0.44637457, -0.53695324,  0.5811181 ,  0.3645724 ,  0.2941325 ],
       [ 0.02842224,  0.54671299, -0.73645409, -0.16290995, -0.48211931,
         0.59884621,  0.03972211, -0.29245675, -0.78190846, -0.25719224],
       [ 0.00814218, -0.27560291,  1.29406381,  1.00672432, -2.71116248,
        -1.88901325, -0.17477209, -0.42219041,  0.213643  ,  0.21732193],
       [ 2.11783876, -1.11202076, -0.37760501,  2.04277161,  0.646703  ,
         0.66306337, -0.51400637, -1.64807517,  0.16746474,  0.10901409],
       [-1.22735205, -0.68322666, -0.07204368, -0.94475162, -0.09826997,
         0.09548303,  0.03558624, -0.50629166,  0.59374807,  0.89116695]])

### Train/Test Split

In [3]:
X_train, X_test = train_test_split(data_num, test_size=0.3, random_state=42)

### Run Simulations Across Mechanism Types

In [4]:
missing_type = "mcar"
mechanism_type_list = [1, 2, 3]
missing_rate_list = [0.3, 0.7]

for mechanism_type in mechanism_type_list:
    for missing_rate in missing_rate_list:
        print(f"Mechanism: {missing_type.upper()}-{mechanism_type} | Missing rate: {missing_rate}")
        
        # Initialize generator
        mecha = MissMechaGenerator(
            mechanism=missing_type,
            mechanism_type=mechanism_type,
            missing_rate=missing_rate,
            seed=42
        )

        # Fit and apply
        X_missing = mecha.fit_transform(X_train)

        # Report missing rate
        compute_missing_rate(X_missing)

        # Run Little's test
        pval = MCARTest(method="little")(X_missing)
        print("-----------------------------------------------------------")

Mechanism: MCAR-1 | Missing rate: 0.3


AttributeError: 'MissMechaGenerator' object has no attribute 'fit_transform'

In [None]:
missing_type = "mar"
mechanism_type_list = [1, 2, 3]
missing_rate_list = [0.3, 0.7]

for mechanism_type in mechanism_type_list:
    for missing_rate in missing_rate_list:
        print(f"Mechanism: {missing_type.upper()}-{mechanism_type} | Missing rate: {missing_rate}")
        
        # Initialize generator
        mecha = MissMechaGenerator(
            mechanism=missing_type,
            mechanism_type=mechanism_type,
            missing_rate=missing_rate,
            seed=42
        )

        # Fit and apply
        X_missing = mecha.fit_transform(X_train)

        # Report missing rate
        compute_missing_rate(X_missing)

        # Run Little's test
        pval = MCARTest(method="little")(X_missing)
        print("-----------------------------------------------------------")

In [None]:
missing_type = "mnar"
mechanism_type_list = [1, 2, 3]
missing_rate_list = [0.3, 0.7]

for mechanism_type in mechanism_type_list:
    for missing_rate in missing_rate_list:
        print(f"Mechanism: {missing_type.upper()}-{mechanism_type} | Missing rate: {missing_rate}")
        
        # Initialize generator
        mecha = MissMechaGenerator(
            mechanism=missing_type,
            mechanism_type=mechanism_type,
            missing_rate=missing_rate,
            seed=42
        )

        # Fit and apply
        X_missing = mecha.fit_transform(X_train)

        # Report missing rate
        compute_missing_rate(X_missing)

        # Run Little's test
        pval = MCARTest(method="little")(X_missing)
        print("-----------------------------------------------------------")