# Nurse Staffing Recommendations

Analysis of the Center for Medicare & Medicaid Services Nurse Staffing
Dataset

Matthew Bain  
2024-03-22

I investigate nurse staffing data to provide informed recommendations to
a medical staffing organization.

\[…\]

## Imports

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from great_tables import GT
from pandas.plotting import scatter_matrix

from src.stylesheet import customize_plots
from src.inspection import make_df, display

## The dataset

We begin by exploring the data to get to know the features and patterns
on which we will base our analysis.

In [9]:
if 'data' not in locals():
    data = pd.read_csv(
        "../data/raw/PBJ_Daily_Nurse_Staffing_Q1_2024.zip",
        encoding='ISO-8859-1'
    )
else:
    print("data loaded.")

In [10]:
data.sample(5)

In [11]:
# TODO: pivot on day

data_pivoted = data.pivot_table(
    index="STATE",
    columns="WorkDate",
    values="Hrs_RN",
    aggfunc='mean'
)

# Resetting the index for easier column access
# data_pivoted.reset_index(inplace=True)
data_pivoted

In [24]:
data_pivoted.iloc[:, 1:]

In [None]:
(
    GT(data_pivoted, rowname_col="STATE")
    .fmt_nanoplot(
        columns=data_pivoted.columns[1:],
        reference_line="mean",
        reference_area=["min", "q1"]
    )
    .fmt_nanoplot(
        columns=data_pivoted.columns[1:],
        plot_type="bar",
        reference_line="max",
        reference_area=["max", "median"]
    )
)

In [80]:
data.describe().round(1)
# display(Markdown(data.describe().to_markdown()))

In [81]:
attributes = ["Hrs_RN", "Hrs_LPN_ctr", "Hrs_CNA", "Hrs_NAtrn", "Hrs_MedAide"]
n = len(attributes)

fig, axs = plt.subplots(n, n, figsize=(8, 8))
scatter_matrix(
    data[attributes].sample(200),
    ax=axs, alpha=.7,
    hist_kwds=dict(bins=15, linewidth=0)
)
fig.align_ylabels(axs[:, 0])
fig.align_xlabels(axs[-1, :])
for ax in axs.flatten():
    ax.tick_params(axis='both', which='both', length=3.5)

# save_fig("scatter_matrix_plot")

plt.show()

In [125]:
from great_tables import GT

df = data.loc[150000:, [
    "STATE",
    "COUNTY_NAME", "COUNTY_FIPS",
    "CITY",
    "PROVNAME", "PROVNUM",
    # "MDScensus"
]].value_counts().reset_index()
GT(df.head(n=10))

## Some GT examples

In [91]:
from typing import Any
from IPython.display import display as ipy_display, HTML
import numpy as np

def display2(
    *args,
    globs: dict[str, Any] | None = None,
    bold: bool = True,
    width: str = "400px"  # Fixed width for each block
) -> None:
    """
    Display an informative representation of multiple objects side-by-side in Jupyter.

    Parameters
    ----------
    *args : tuple
        Tuple of expressions to evaluate and display.
    globs : dict[str, Any], default=None
        Global namespace, to give eval() access to nonlocals passed by name.
    bold : bool, default=True
        Option to enable/disable string styling.
    width : str, default="400px"
        Fixed width for each displayed block in the Jupyter notebook.

    Warnings
    --------
    This function uses `eval()` to render expressions it receives
    as strings. Access to variables in the global namespace is controlled
    by `globs`. Take care to only pass trusted expressions to the function.
    """

    if globs is None:
        globs = {}

    outputs = []
    for arg in args:
        name = f"<b>{arg}</b>" if bold else arg
        value = np.round(eval(arg, globs), 2)
        shape = np.shape(value)
        content = f"<div style='width:{width}; padding:10px; float:left;'><pre>{name}\n--- {repr(shape)} ---\n{repr(value)}</pre></div>"
        outputs.append(content)

    # Clearfix for layout
    clearfix = "<div style='clear: both;'></div>"

    # Display the HTML content in Jupyter
    html_output = ''.join(outputs) + clearfix
    ipy_display(HTML(html_output))

    return None

In [92]:
A = np.array([[1, 3], [2, 4]])
x = np.array([[0, 1]])

display2(
    "A", "x.T", "np.dot(A, x.T)", globs=globals(), bold=True, width="100px"
)

In [93]:
display2(
    "data['STATE'].value_counts()",
    "data['COUNTY_NAME'].value_counts()",
    "data['CITY'].value_counts()",
    "data['PROVNAME'].value_counts()",
    "data['MDScensus'].value_counts()",
    width="340px",
    globs=globals()
)

In [94]:
data[["CY_Qtr", "WorkDate", "MDScensus"]]

## SQL