# Manual Implementation of Little's MCAR Test

In [3]:
import numpy as np
import pandas as pd
from scipy.stats import chi2

# Helper function to calculate Little's MCAR test
def little_mcar_test(data):
  n = len(data)
  groups = []
  for col in data.columns:
    mask = data[col].isnull()
    if mask.any():
      groups.append(mask.astype(int).values.reshape(-1, 1))
  if len(groups) == 0:
    raise ValueError("No missing data found.")
  r = np.concatenate(groups, axis=1)
  group_stats = r.T @ r
  m = len(groups)
  df = (n - 1) * m
  chi2_stat = group_stats.trace()
  p_value = chi2.sf(chi2_stat, df)
  if p_value < 0.05:
    print("The data are not MCAR.")
  else:
    print("The data are MCAR.")

  return {"chi2_stat": chi2_stat, "degrees_of_freedom": df, "p_value": p_value}

data = pd.read_csv('/content/House Prices.csv')
little_mcar_test(data)

The data are MCAR.


{'chi2_stat': 7829, 'degrees_of_freedom': 27721, 'p_value': 1.0}

**Interpreting the Results**

Chi-square statistic: This measures how much the observed missing data pattern deviates from what would be expected if the data were
MCAR.

Degrees of freedom: The number of independent pieces of information used to calculate the chi-square statistic.

P-value: If this value is below a certain threshold (commonly 0.05), it suggests that the data are not MCAR.

