# **CCC-BIDMC Schedule Generator**

**Ayush Noori** <br>
**Crimson Care Collaborative Clinic** <br>
**May 19, 2024**

---

In [1]:
# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Path management
from pathlib import Path

# Get downloads folder
downloads = Path.home() / "Downloads"

Read in the availability data.

In [2]:
# Read in form responses
returning_responses = "CCC-BIDMC Summer 2024 Returning Interest Form (Responses) - Form Responses 1.csv"
new_responses = "CCC-BIDMC Summer 2024 New Volunteer Scheduling (Responses) - Form Responses 1.csv"
df = pd.read_csv(downloads / returning_responses)

# Read new response
df_new = pd.read_csv(downloads / new_responses)
df["new"] = "No"
df_new["new"] = "Yes"

# Concatenate dataframes
df.columns = ['timestamp', 'first', 'last', 'role', 'return', 'epic', 'shifts', 'comments', 'new']
df_new.columns = ['timestamp', 'first', 'last', 'role', 'return', 'epic', 'shifts', 'comments', 'new']
df = pd.concat([df, df_new], ignore_index=True)

# Set column names
df["first"] = df["first"].str.strip()
df["last"] = df["last"].str.strip()

# Convert time stamp to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"])

# Take the most recent response
df = df.sort_values("timestamp").groupby(["first", "last"]).last().reset_index()
df = df[df["return"] == "Yes"]
df.reset_index(drop=True, inplace=True)

Generate availability table.

In [3]:
# Generate summer clinic dates
start_date = "June 4, 2024"
end_date = "September 17, 2024"

# Weekly dates on Tuesdays
dates = pd.date_range(start=start_date, end=end_date, freq="W-TUE")
dates = dates.strftime("%-m/%-d/%Y").values

# Create a dataframe with name, role, and dates
availability = pd.DataFrame(columns=["Name", "Role"] + list(dates))

Populate availability table with form responses.

In [4]:
# Iterate over the responses
for i, row in df.iterrows():
    
    # Get the name and role
    name = f"{row['first']} {row['last']}"
    if row['new'] == "Yes":
        name = f"{row['first']} {row['last']}"
    role = row["role"]
    
    # Get the shifts
    shifts = row["shifts"].split(", ")
    year = 2024
    shifts = [pd.to_datetime(f"{shift} {year}").strftime("%-m/%-d/%Y") for shift in shifts]
    
    # Create a row
    schedule = [name, role] + ["" if date in shifts else "x" for date in dates]
    
    # Add the row to the dataframe
    availability.loc[i] = schedule

Mark all new volunteers as unavailable for the first two weeks.

In [5]:
# Get all new volunteers
new_volunteers = df[df["new"] == "Yes"]

# Set all new volunteer availability to "x" for first two weeks
for i, row in new_volunteers.iterrows():
    
    # Get the name
    name = f"{row['first']} {row['last']}"
    print("Updating availability for", name)

    # Update availability for first two weeks
    availability.loc[availability["Name"] == name, dates[:2]] = "x"

Updating availability for Amanda Law
Updating availability for Caroline Diggins
Updating availability for Corinne Auger
Updating availability for Denny Lu
Updating availability for Emily Yang
Updating availability for Esmé Wheeler
Updating availability for Fielding Fischer
Updating availability for Massoud Sharif
Updating availability for Michael Chen
Updating availability for Nishmi Abeyweera
Updating availability for Sojas Wagle


Add PCC (Senior Clinicians) every two weeks, and grant them highest scheduling priority. Finally, add back-up junior and senior clinicians.

In [6]:
# Add PCC 1-4 to the availability
pcc_clinicans = ["PCC 1", "PCC 2", "PCC 3", "PCC 4"]
for pcc in pcc_clinicans:

    # PCC start and end dates
    pcc_start_1 = "June 4, 2024"
    pcc_end_1 = "June 18, 2024"
    pcc_start_2 = "July 9, 2024"
    pcc_end_2 = "September 17, 2024"

    # Make availability every other week
    pcc_dates_1 = pd.date_range(start=pcc_start_1, end=pcc_end_1, freq="2W-TUE")
    pcc_dates_1 = pcc_dates_1.strftime("%-m/%-d/%Y").values
    pcc_dates_2 = pd.date_range(start=pcc_start_2, end=pcc_end_2, freq="2W-TUE")
    pcc_dates_2 = pcc_dates_2.strftime("%-m/%-d/%Y").values
    pcc_dates = np.concatenate([pcc_dates_1, pcc_dates_2])
    pcc_avail = ["" if date in pcc_dates else "x" for date in dates]

    # Add a row for each PCC
    availability.loc[len(availability)] = [pcc, "Senior Clinician"] + pcc_avail
    print("Adding availability for", pcc)

Adding availability for PCC 1
Adding availability for PCC 2
Adding availability for PCC 3
Adding availability for PCC 4


Process and save availability table.

In [8]:
# Order by role
role_order = ["Senior Clinician", "Junior Clinician", "Senior Director", "Administrative Manager"]
availability["Role"] = pd.Categorical(availability["Role"], categories=role_order, ordered=True)
availability = availability.sort_values(["Role", "Name"]).reset_index(drop=True)

Schedule clinic shifts based on availability and even distribution of shifts.

In [15]:
# First, schedule everyone
schedule = availability.copy()
schedule = schedule.replace("", "s")

# Maximum shifts by role
max_shifts = {
    "Senior Clinician": 6,
    "Junior Clinician": 6,
    "Senior Director": 3,
    "Administrative Manager": 1
} 

# Create counter of shifts per person
schedule["Counts"] = schedule[dates].apply(lambda x: x.value_counts()["s"], axis=1)

# Grant PCC highest priority by setting counter to 0
schedule.loc[schedule["Name"].str.contains("PCC"), "Counts"] = 0

Generate schedule table.

In [16]:
# Iterate over the dates
for date in dates:

    # Iterate over the roles
    for role in max_shifts.keys():

        # Get the people who are scheduled
        scheduled = schedule[schedule[date] == "s"]
        scheduled = scheduled[scheduled["Role"] == role]

        # If the number of people is more than the maximum
        if len(scheduled) > max_shifts[role]:
            scheduled = scheduled.sort_values("Counts", ascending=False)

            # Drop people with the fewest shifts until the number is correct
            while len(scheduled) > max_shifts[role]:
                person = scheduled.iloc[0]
                schedule.loc[schedule["Name"] == person["Name"], date] = ""
                schedule.loc[schedule["Name"] == person["Name"], "Counts"] -= 1
                scheduled = scheduled.drop(person.name)

Save to file.

In [21]:
# Save availability and schedule
availability.to_csv("availability.csv", index=False, encoding="utf-8-sig")
schedule.to_csv("schedule.csv", index=False, encoding="utf-8-sig")