In [5]:
import numpy as np
import pulp
import scipy

def balanced_pair_matching(treated_idx, untreated_idx, distance_matrix, Bp, Be):
    """
    Solves the balanced pair matching problem using Integer Linear Programming (ILP).
    
    Parameters:
        treated_idx (list): Indices of treated patients.
        untreated_idx (list): Indices of untreated patients.
        distance_matrix (numpy.ndarray): Pairwise Mahalanobis distance matrix.
        Bp (numpy.ndarray): Binary attribute matrix for treated patients (size: |T| x K).
        Be (numpy.ndarray): Binary attribute matrix for untreated patients (size: |E| x K).
    
    Returns:
        list: List of matched pairs (treated, untreated).
    """
    model = pulp.LpProblem("Balanced_Pair_Matching", pulp.LpMinimize)
    
    # Decision variables
    # understand this
    x = {(p, q): pulp.LpVariable(f"x_{p}_{q}", cat='Binary') 
         for p in treated_idx for q in untreated_idx}
    
    # Objective function: Minimize total Mahalanobis distance
    model += pulp.lpSum(distance_matrix[p, q] * x[p, q] for p in treated_idx for q in untreated_idx)
    
    # Constraint: Each untreated patient is matched exactly once
    for q in untreated_idx:
        model += pulp.lpSum(x[p, q] for p in treated_idx) == 1
    
    # Constraint: Each treated patient is matched at most once
    for p in treated_idx:
        model += pulp.lpSum(x[p, q] for q in untreated_idx) <= 1



    # Balance constraints for each binary attribute k
    # understand this
    K = Bp.shape[1]  # Number of binary attributes
    for k in range(K):
        print("Bp keys:", Bp.shape)  # See what indices are available
        print("p:", p, "k:", k)  # See which value caused the error
        model += pulp.lpSum(Bp[p, k] * x[p, q] for p in treated_idx for q in untreated_idx) == \
                 pulp.lpSum(Be[q, k] * x[p, q] for p in treated_idx for q in untreated_idx)
    
    # Solve the optimization problem
    model.solve()
    
    # Extract matched pairs
    matched_pairs = [(p, q) for p in treated_idx for q in untreated_idx if pulp.value(x[p, q]) == 1]
    
    return matched_pairs

# Example usage
num_patients = 400
num_features = 6
num_attributes = 3  # Number of binary attributes

# Simulated patient data
X = np.random.rand(num_patients, num_features)
treatment_labels = np.random.choice([0, 1], size=num_patients)
treated_idx = np.where(treatment_labels == 1)[0]
untreated_idx = np.where(treatment_labels == 0)[0]

# Binary attributes for treated and untreated patients
# right now this is wrong because the value of Be is dependent on the treated patient our untreated patient is paired with.
# understand this
Bp = np.random.randint(0, 2, size=(len(treated_idx), num_attributes))
Be = np.random.randint(0, 2, size=(len(untreated_idx), num_attributes))

# Compute distance matrix
cov_matrix = np.cov(X.T)
VI = np.linalg.inv(cov_matrix)
distance_matrix = scipy.spatial.distance.cdist(X, X, metric='mahalanobis', VI=VI)

# Solve balanced pair matching
# the error started here
matched_pairs = balanced_pair_matching(treated_idx, untreated_idx, distance_matrix, Bp, Be)
print("Balanced Pair Matching:", matched_pairs)


Bp keys: (202, 3)
p: 398 k: 0


IndexError: index 202 is out of bounds for axis 0 with size 202