# Nearest Neighbor Propensity Score Matching (PSM)
This R function create_matched_cohort() uses the MatchIt package to perform 1:1 nearest neighbor matching based on a logistic regression-derived propensity score. The goal is to create a matched cohort for comparing individuals with and without a COVID-19 diagnosis prior to delivery, balancing key baseline characteristics.

Input: file_add_bin_age – a data frame with binary/categorical covariates and treatment variable.

Method: Logistic regression to estimate propensity scores; nearest neighbor matching (method = "nearest", ratio = 1).

Covariates: Age, race, vaccine doses, obesity, hypertension, lung disease, smoking, diabetes, and healthcare utilization.

Output: A matched dataset and a Love plot showing standardized mean differences before and after matching.

It also prints a summary of balance statistics to assess covariate balance post-matching.

Matching package for Nearest neighbor matching on the propensity score

In [None]:
# Create matched cohort
library(MatchIt)
library(cobalt)


create_matched_cohort <- function(File_add_bin_age) {
    df <- File_add_bin_age

    IV <- "LL_COVID_diagnosis_1y_during_1y_prior_to_1st_delivery_indicator"

    covariates <- c(
    "age",
    "race_White",
    "total_number_of_COVID_vaccine_doses",
    "OBESITY_1y_during_1y_prior_to_1st_delivery_indicator",
    "hypertension_1y_during_1y_prior_to_1st_delivery_indicator",
    "chronic_lung_disease_1y_during_1y_prior_to_1st_delivery_indicator",
    "tobacco_smoker_1y_during_1y_prior_to_1st_delivery_indicator",
    "diabetes_combined_1y_during_1y_prior_to_1st_delivery_indicator",
    "total_visits"
    )

    # Create formula for propensity score model
    ps_formula <- as.formula(paste(IV, "~", paste(covariates, collapse = " + ")))

    # Nearest neighbor matching on the propensity score
    match_out <- matchit(
        formula = ps_formula,
        data = df,
        method = "nearest",   # for 1:1 PSM
        distance = "logit",   # logistic model for PS
        ratio = 1             # 1:1 matching
    )

    matched_df <- match.data(match_out)

    # Love plot
    # Custom covariate labels for love plot
    var_labels <- c(
        age = "Age",
        race_White = "White Race",
        total_number_of_COVID_vaccine_doses = "Respiratory\nDisease\nVaccine Doses",
        OBESITY_1y_during_1y_prior_to_1st_delivery_indicator = "Obesity",
        hypertension_1y_during_1y_prior_to_1st_delivery_indicator = "Hypertension",
        chronic_lung_disease_1y_during_1y_prior_to_1st_delivery_indicator = "Chronic\nLung Disease",
        tobacco_smoker_1y_during_1y_prior_to_1st_delivery_indicator = "Tobacco Use",
        diabetes_combined_1y_during_1y_prior_to_1st_delivery_indicator = "Diabetes",
        total_visits = "Total Visits"
    )

    # Love plot with custom labels and increased font/dot sizes
    love_plot <- love.plot(
        match_out,
        binary = "std",
        threshold = 0.1,
        var.names = var_labels,
        abs = TRUE,
        size = 7
    ) +
    theme_bw(base_size = 18) +
    theme(
        legend.position = "right",
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20),
        axis.title = element_text(size = 22),
        axis.title.y = element_text(size = 20),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20)
    ) +
    scale_x_continuous(
        breaks = seq(0, 1, by = 0.1),
        limits = c(0, 1)
    ) +
    geom_vline(
        xintercept = 0.1,
        linetype = "dashed",
        color = "darkblue",
        linewidth = 1.2
    ) +
    guides(color = guide_legend(override.aes = list(size = 6)))


    print(love_plot)

    # Print the matching results
    print(summary(match_out))

    return(matched_df)

}