In [None]:
# Dependencies
import pandas as pd
from datetime import datetime, timedelta

In [32]:
# Learning Exercise - Get next record using .shift

# Toy data: 6 encounters for 3 patients
data = {
    'patient_key': ['PatientC', 'PatientA', 'PatientA', 'PatientB', 'PatientA', 'PatientC'],
    'patient_name': ['Clark', 'Anderson', 'Anderson', 'Brown', 'Anderson', 'Clark'],
    'admit_date': [
        datetime(2024, 2, 1),
        datetime(2024, 1, 5),
        datetime(2024, 7, 5),
        datetime(2024, 1, 5),
        datetime(2024, 8, 1),
        datetime(2024, 2, 3)

    ],
    'discharge_date': [
        datetime(2024, 2, 2),
        datetime(2024, 6, 15),
        datetime(2024, 7, 10),
        datetime(2024, 1, 6),
        None,
        datetime(2024, 2, 10)
    ]
}

encounters_df = pd.DataFrame(data)
encounters_df

Unnamed: 0,patient_key,patient_name,admit_date,discharge_date
0,PatientC,Clark,2024-02-01,2024-02-02
1,PatientA,Anderson,2024-01-05,2024-06-15
2,PatientA,Anderson,2024-07-05,2024-07-10
3,PatientB,Brown,2024-01-05,2024-01-06
4,PatientA,Anderson,2024-08-01,NaT
5,PatientC,Clark,2024-02-03,2024-02-10


In [33]:
# Sort the DataFrame

encounters_df = encounters_df.sort_values(['patient_key', 'discharge_date'])
encounters_df

Unnamed: 0,patient_key,patient_name,admit_date,discharge_date
1,PatientA,Anderson,2024-01-05,2024-06-15
2,PatientA,Anderson,2024-07-05,2024-07-10
4,PatientA,Anderson,2024-08-01,NaT
3,PatientB,Brown,2024-01-05,2024-01-06
0,PatientC,Clark,2024-02-01,2024-02-02
5,PatientC,Clark,2024-02-03,2024-02-10


In [34]:
# Shift discharge_date up by one row
# Doesn't give us 100% what we're looking for...

# See what happens if we use the .shift() function
encounters_df['next_discharge'] = encounters_df['discharge_date'].shift(-1)

print("\nAfter shift(-1):")
encounters_df


After shift(-1):


Unnamed: 0,patient_key,patient_name,admit_date,discharge_date,next_discharge
1,PatientA,Anderson,2024-01-05,2024-06-15,2024-07-10
2,PatientA,Anderson,2024-07-05,2024-07-10,NaT
4,PatientA,Anderson,2024-08-01,NaT,2024-01-06
3,PatientB,Brown,2024-01-05,2024-01-06,2024-02-02
0,PatientC,Clark,2024-02-01,2024-02-02,2024-02-10
5,PatientC,Clark,2024-02-03,2024-02-10,NaT


In [35]:
# Shift discharge_date up by one row
# Let's try this again...

# Instead of shifting the whole column, shift within each patient group
encounters_df['next_discharge'] = encounters_df.groupby('patient_key')['discharge_date'].shift(-1)

print("\nAfter groupby + shift(-1):")
encounters_df


After groupby + shift(-1):


Unnamed: 0,patient_key,patient_name,admit_date,discharge_date,next_discharge
1,PatientA,Anderson,2024-01-05,2024-06-15,2024-07-10
2,PatientA,Anderson,2024-07-05,2024-07-10,NaT
4,PatientA,Anderson,2024-08-01,NaT,NaT
3,PatientB,Brown,2024-01-05,2024-01-06,NaT
0,PatientC,Clark,2024-02-01,2024-02-02,2024-02-10
5,PatientC,Clark,2024-02-03,2024-02-10,NaT


In [36]:
# Calculate Days Between Discharges

# Subtract dates to get a timedelta
encounters_df['days_to_next'] = (encounters_df['next_discharge'] - encounters_df['discharge_date']).dt.days

print("\nWith days calculated:")
encounters_df


With days calculated:


Unnamed: 0,patient_key,patient_name,admit_date,discharge_date,next_discharge,days_to_next
1,PatientA,Anderson,2024-01-05,2024-06-15,2024-07-10,25.0
2,PatientA,Anderson,2024-07-05,2024-07-10,NaT,
4,PatientA,Anderson,2024-08-01,NaT,NaT,
3,PatientB,Brown,2024-01-05,2024-01-06,NaT,
0,PatientC,Clark,2024-02-01,2024-02-02,2024-02-10,8.0
5,PatientC,Clark,2024-02-03,2024-02-10,NaT,


In [37]:
# Create Readmission Flag
# Logic: If days_to_next â‰¤ 30, then readmitted = 1

# Create binary readmission flag
encounters_df['readmitted'] = (encounters_df['days_to_next'] <= 30).astype(int)

print("\nWith readmission flag:")
encounters_df


With readmission flag:


Unnamed: 0,patient_key,patient_name,admit_date,discharge_date,next_discharge,days_to_next,readmitted
1,PatientA,Anderson,2024-01-05,2024-06-15,2024-07-10,25.0,1
2,PatientA,Anderson,2024-07-05,2024-07-10,NaT,,0
4,PatientA,Anderson,2024-08-01,NaT,NaT,,0
3,PatientB,Brown,2024-01-05,2024-01-06,NaT,,0
0,PatientC,Clark,2024-02-01,2024-02-02,2024-02-10,8.0,1
5,PatientC,Clark,2024-02-03,2024-02-10,NaT,,0


In [39]:
# Edge Case 1: Use Admit Date, Not Discharge Date for "Next"
# Question: What date defines the "next admission"?
# Answer: The ADMIT date of the next encounter (not the discharge date).

# Corrected calculation: get next ADMISSION date (not next discharge date)
encounters_df['next_admit_date'] = encounters_df.groupby('patient_key')['admit_date'].shift(-1)

# Calculate days from discharge to next admission
encounters_df['days_to_next_admit'] = (encounters_df['next_admit_date'] - encounters_df['discharge_date']).dt.days

# Flag readmission
encounters_df['readmitted'] = (encounters_df['days_to_next_admit'] <= 30).astype(int)

encounters_df

Unnamed: 0,patient_key,patient_name,admit_date,discharge_date,next_discharge,days_to_next,readmitted,next_admit_date,days_to_next_admit
1,PatientA,Anderson,2024-01-05,2024-06-15,2024-07-10,25.0,1,2024-07-05,20.0
2,PatientA,Anderson,2024-07-05,2024-07-10,NaT,,1,2024-08-01,22.0
4,PatientA,Anderson,2024-08-01,NaT,NaT,,0,NaT,
3,PatientB,Brown,2024-01-05,2024-01-06,NaT,,0,NaT,
0,PatientC,Clark,2024-02-01,2024-02-02,2024-02-10,8.0,1,2024-02-03,1.0
5,PatientC,Clark,2024-02-03,2024-02-10,NaT,,0,NaT,
