# Medicare Advantage: Star Ratings Data

This notebook reproduces the GA 2022 Medicare Advantage star ratings example, focusing on merging plan data with contract-level ratings and constructing a simple `raw_rating` measure in both R and Python.

In [None]:
!pip -q install rpy2
%load_ext rpy2.ipython

## R: Load data, merge, and construct `raw_rating`

In [None]:
%%R
library(tidyverse)

# Read GA MA data (dropping partc_score for recomputation)
ga_ma_2022 <- read_csv("https://raw.githubusercontent.com/imccart/empirical-methods-content/main/data/output/ma-snippets/ga-ma-data-2022.csv") %>%
  select(-partc_score) %>%
  ungroup()

# Read ratings data
ma_ratings <- read_csv("https://raw.githubusercontent.com/imccart/empirical-methods-content/main/data/output/ma-snippets/ga-ratings-2022.csv")

# Merge and construct raw_rating
ga_ma_full <- ga_ma_2022 %>%
  left_join(ma_ratings, by = "contractid") %>%
  mutate(
    raw_rating = rowMeans(
      cbind(
        breastcancer_screen, rectalcancer_screen, flu_vaccine,
        physical_monitor, specialneeds_manage, older_medication, older_pain,
        osteo_manage, diabetes_eye, diabetes_kidney, diabetes_bloodsugar,
        ra_manage, falling, bladder, medication, statin, nodelays,
        carequickly, customer_service, overallrating_care, overallrating_plan,
        coordination, complaints_plan, leave_plan, improve, appeals_timely,
        appeals_review, ttyt_available
      ),
      na.rm = TRUE
    )
  ) %>%
  select(
    contractid, planid, fips, plan_type, partd, avg_enrollment, avg_eligibles,
    avg_enrolled, premium, premium_partc, premium_partd, rebate_partc, ma_rate,
    bid, avg_ffscost, partc_score, partcd_score, raw_rating
  )

# Quick look at the data
head(ga_ma_full)

## Python: Load data, merge, and construct `raw_rating`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read in data
ga_ma_2022_py = (
    pd.read_csv("https://raw.githubusercontent.com/imccart/empirical-methods-content/main/data/output/ma-snippets/ga-ma-data-2022.csv")
    .drop(columns=["partc_score"], errors="ignore")
)

ma_ratings_py = pd.read_csv("https://raw.githubusercontent.com/imccart/empirical-methods-content/main/data/output/ma-snippets/ga-ratings-2022.csv")

# Merge on contractid
ga_ma_full_py = ga_ma_2022_py.merge(ma_ratings_py, on="contractid", how="left")

# Variables used to construct the raw rating
rating_vars = [
    "breastcancer_screen", "rectalcancer_screen", "flu_vaccine",
    "physical_monitor", "specialneeds_manage", "older_medication", "older_pain",
    "osteo_manage", "diabetes_eye", "diabetes_kidney", "diabetes_bloodsugar",
    "ra_manage", "falling", "bladder", "medication", "statin", "nodelays",
    "carequickly", "customer_service", "overallrating_care", "overallrating_plan",
    "coordination", "complaints_plan", "leave_plan", "improve", "appeals_timely",
    "appeals_review", "ttyt_available"
]

# Row-wise mean, ignoring missing values
ga_ma_full_py["raw_rating"] = ga_ma_full_py[rating_vars].mean(axis=1, skipna=True)

# Keep the desired columns
ga_ma_full_py = ga_ma_full_py[
    [
        "contractid", "planid", "fips", "plan_type", "partd", "avg_enrollment",
        "avg_eligibles", "avg_enrolled", "premium", "premium_partc",
        "premium_partd", "rebate_partc", "ma_rate", "bid", "avg_ffscost",
        "partc_score", "partcd_score", "raw_rating"
    ]
]

# Quick look at the data
ga_ma_full_py.head()

## Raw vs rounded ratings: scatterplots

In [None]:
%%R
library(ggplot2)

ggplot(ga_ma_full, aes(x = raw_rating, y = partc_score)) +
  geom_point(alpha = 0.6) +
  labs(
    x = "Raw score",
    y = "Rounded star rating",
    title = "Raw score vs rounded star rating (R)"
  ) +
  theme_minimal()

In [None]:
# Python scatterplot
fig, ax = plt.subplots()
ax.scatter(ga_ma_full_py["raw_rating"], ga_ma_full_py["partc_score"], alpha=0.6)
ax.set_xlabel("Raw score")
ax.set_ylabel("Rounded star rating")
ax.set_title("Raw score vs rounded star rating (Python)")
plt.show()

## Rounding around the 3.75 threshold

In [None]:
%%R
rounding_counts_3_75 <- ga_ma_full %>%
  filter(
    !is.na(raw_rating), !is.na(partc_score),
    partc_score %in% c(3.5, 4.0),
    raw_rating >= 3.5, raw_rating <= 4.0,
    (raw_rating >= 3.75 & partc_score == 4.0) |
      (raw_rating <= 3.75 & partc_score == 3.5)
  ) %>%
  mutate(
    `Raw Score` = if_else(raw_rating >= 3.75, "≥ 3.75", "< 3.75"),
    `Star`      = if_else(partc_score == 4.0, "4.0", "3.5")
  ) %>%
  count(`Raw Score`, `Star`, name = "Plans")

rounding_counts_3_75

In [None]:
# Python version of rounding table around 3.75 threshold
mask = (
    ga_ma_full_py["raw_rating"].notna()
    & ga_ma_full_py["partc_score"].notna()
    & ga_ma_full_py["partc_score"].isin([3.5, 4.0])
    & ga_ma_full_py["raw_rating"].between(3.5, 4.0)
    & (
        ((ga_ma_full_py["raw_rating"] >= 3.75) & (ga_ma_full_py["partc_score"] == 4.0))
        | ((ga_ma_full_py["raw_rating"] <= 3.75) & (ga_ma_full_py["partc_score"] == 3.5))
    )
)

sub = ga_ma_full_py.loc[mask].copy()
sub["Raw Score"] = np.where(sub["raw_rating"] >= 3.75, "≥ 3.75", "< 3.75")
sub["Star"] = sub["partc_score"].astype(str)

rounding_counts_3_75_py = (
    sub.groupby(["Raw Score", "Star"])['contractid']
    .size()
    .reset_index(name="Plans")
)

rounding_counts_3_75_py