In [1]:
import pandas as pd
import numpy as np
from missmecha.generator import MissMechaGenerator

np.random.seed(42)
df = pd.DataFrame({
    "age": np.random.randint(20, 60, size=100),
    "income": np.random.normal(60000, 10000, size=100),
    "gender": np.random.choice(["M", "F"], size=100)
})

### Check Number of Available Mechanisms

In [2]:
from missmecha.generator import MECHANISM_LOOKUP

print(f"Supported MCAR types: {len(MECHANISM_LOOKUP['mcar'])}")
print(f"Supported MAR types: {len(MECHANISM_LOOKUP['mar'])}")
print(f"Supported MNAR types: {len(MECHANISM_LOOKUP['mnar'])}")

Supported MCAR types: 3
Supported MAR types: 8
Supported MNAR types: 6


###  Global Mechanism Simulation 

In [3]:
global_df = df.copy()

# Simple MCAR globally applied
gen_global = MissMechaGenerator(mechanism="mcar", missing_rate=0.3, cat_cols=["gender"])
df_missing_global = gen_global.fit_transform(global_df)

df_missing_global.head()


Unnamed: 0,age,income,gender
0,58.0,58222.678787,
1,48.0,55896.166913,M
2,34.0,71797.163447,F
3,,51017.920605,F
4,40.0,68347.954192,F


### Column-wise Different Mechanisms (Using `info`)

In [4]:
columnwise_df = df.copy()
info = {
    "age": {
        "mechanism": "mcar",
        "type": 1,
        "rate": 0.2
    },
    "income": {
        "mechanism": "mar",
        "type": 3,
        "rate": 0.3,
        "depend_on": "age"  # income missing depends on age
    },
    "gender": {
        "mechanism": "mnar", 
        "type": 4, 
        "rate": 0.4,
        "parameter": {"q": 0.1, "p": 0.8, "cut": "low"}  
    }
}

gen_info = MissMechaGenerator(info=info, cat_cols=["gender"])
df_missing_info = gen_info.fit_transform(columnwise_df)

df_missing_info.head()

[MARType3] No label provided. Using synthetic labels instead.


Unnamed: 0,age,income,gender
0,,58222.678787,
1,48.0,,M
2,,71797.163447,F
3,27.0,51017.920605,F
4,40.0,,


### Access Missingness Mask

In [5]:
# Binary mask: 1 = observed, 0 = missing
mask = gen_info.get_mask()
print(mask[:5])


# Boolean mask: True = observed, False = missing
bool_mask = gen_info.get_bool_mask()
print(bool_mask[:5])


[[0 1 0]
 [1 0 1]
 [0 1 1]
 [1 1 1]
 [1 0 0]]
[[False  True False]
 [ True False  True]
 [False  True  True]
 [ True  True  True]
 [ True False False]]
