In [1]:
import pandas as pd
import numpy as np

# Step 1: Original complete dataset
data = {
    "Age": [20, 25, 30, 35, 40, 45, 50, 55],
    "Salary": [20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000],
    "Experience": [1, 2, 3, 4, 5, 6, 7, 8]
}

df = pd.DataFrame(data)
print("Original Data:\n")
print(df)

# Step 2: Introduce MCAR missing values
np.random.seed(42)          # for reproducibility
missing_rate = 0.25         # 25% missing completely at random

df_mcar = df.copy()

for col in df_mcar.columns:
    mask = np.random.rand(len(df_mcar)) < missing_rate
    df_mcar.loc[mask, col] = np.nan

# Step 3: Display MCAR data
print("\nData After Applying MCAR:\n")
print(df_mcar)


Original Data:

   Age  Salary  Experience
0   20   20000           1
1   25   25000           2
2   30   30000           3
3   35   35000           4
4   40   40000           5
5   45   45000           6
6   50   50000           7
7   55   55000           8

Data After Applying MCAR:

    Age   Salary  Experience
0  20.0  20000.0         1.0
1  25.0  25000.0         2.0
2  30.0      NaN         3.0
3  35.0  35000.0         4.0
4   NaN  40000.0         5.0
5   NaN      NaN         NaN
6   NaN      NaN         7.0
7  55.0      NaN         8.0


In [2]:
import pandas as pd
import numpy as np

# Step 1: Original complete dataset
data = {
    "Age": [22, 25, 28, 30, 35, 40, 45, 50],
    "Salary": [22000, 25000, 28000, 30000, 35000, 40000, 45000, 50000],
    "Experience": [1, 2, 3, 4, 5, 6, 7, 8]
}

df = pd.DataFrame(data)
print("Original Data:\n")
print(df)

# Step 2: Implement MAR
# Salary is missing depending on Age (observed variable)

df_mar = df.copy()
np.random.seed(10)

for i in df_mar.index:
    if df_mar.loc[i, "Age"] < 30:
        prob = 0.6    # higher missing probability for younger people
    else:
        prob = 0.1    # lower missing probability for older people

    if np.random.rand() < prob:
        df_mar.loc[i, "Salary"] = np.nan

# Step 3: Display MAR data
print("\nData After Applying MAR:\n")
print(df_mar)


Original Data:

   Age  Salary  Experience
0   22   22000           1
1   25   25000           2
2   28   28000           3
3   30   30000           4
4   35   35000           5
5   40   40000           6
6   45   45000           7
7   50   50000           8

Data After Applying MAR:

   Age   Salary  Experience
0   22  22000.0           1
1   25      NaN           2
2   28  28000.0           3
3   30  30000.0           4
4   35  35000.0           5
5   40  40000.0           6
6   45  45000.0           7
7   50  50000.0           8
