<h1>Patient Pathway</h1>

This notebook was used to gain information on the patient pathways in the dataset.

In [29]:
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
#seaborn settings
sns.set_theme(style="whitegrid")
sns.set_context("paper")
sns.despine(left=True,right=True)

<Figure size 640x480 with 0 Axes>

In [30]:
df = pd.read_csv("example_dataset.csv")

In [31]:
#number of admissions with stay_number > 1
df_stay = df[df['stay_number'] > 1]
print("Number of admissions with stay_number > 1: ", len(df_stay))

Number of admissions with stay_number > 1:  78


In [32]:
#number of linkid with stay_number > 1
df_stay_linkid = df_stay['patient_id'].unique()
print("Number of linkid with stay_number > 1: ", len(df_stay_linkid))

Number of linkid with stay_number > 1:  53


In [33]:
# Group rows by 'linkid'
groups = df.groupby('patient_id')

# Check if a group has multiple rows and at least one 'ward_level' is 'unknown'
result = groups.filter(lambda x: (x['ward_level'] == 'unknown').any() and len(x) > 1)

# Print the result
result

Unnamed: 0,patient_id,number_of_children,sex,birth_weight,hoftiezer,nicu_admission,stay_number,start_date,end_date,hospital_id,...,others_indicator,region,hospital,ward,ward_level,c_section,season,month,year,gestational_age


In [34]:
#filter out ward_level unknown
df = df[df['ward_level'] != 'unknown']


In [35]:
df['ward_level'].value_counts(normalize=True)

ward_level
Medium    0.42
NICU      0.31
High      0.27
Name: proportion, dtype: float64

In [36]:
patient_journeys = df.groupby('patient_id')['ward_level'].apply(list).reset_index()


In [37]:
patient_journeys.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65 entries, 0 to 64
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   patient_id  65 non-null     int64 
 1   ward_level  65 non-null     object
dtypes: int64(1), object(1)
memory usage: 1.1+ KB


In [38]:
patient_journeys

Unnamed: 0,patient_id,ward_level
0,1000,[Medium]
1,1001,"[NICU, High, NICU]"
2,1005,[NICU]
3,1006,[NICU]
4,1007,[High]
...,...,...
60,1093,[NICU]
61,1094,[Medium]
62,1095,[Medium]
63,1096,[Medium]


In [39]:
test = df.groupby('patient_id').agg({'ward_level': lambda x: tuple(x),
                                     'length_of_stay':['sum','mean']
                                     }).reset_index()



In [40]:
def combine_consecutive_ward_levels(group):
    combined_wards = []
    combined_stays = []
    previous_ward = None
    for ward, stay in zip(group['ward'], group['length_of_stay']):
        if ward == previous_ward:
                    combined_stays[-1] += stay
        else:
                    combined_wards.append(ward)
                    combined_stays.append(stay)
        previous_ward = ward
 
    return pd.Series({'combined_wards': tuple(combined_wards), 'total_length_of_stay': sum(combined_stays)})
 
combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()
 
# Step 2: Group by the combined ward combinations to get total and average length of stay
ward_level_summary = combined_df.groupby('combined_wards').agg(
    total_length_of_stay=('total_length_of_stay', 'sum'),
    number_of_patients=('patient_id', 'nunique'),
    number_of_admissions=('patient_id', 'size')  # count of admissions for each combination
).reset_index()
 
# Step 3: Calculate the average length of stay per admission
ward_level_summary['average_length_of_stay_per_admission'] = ward_level_summary['total_length_of_stay'] / ward_level_summary['number_of_admissions']
 
# Step 4: Calculate the total number of admissions
total_admissions = ward_level_summary['number_of_admissions'].sum()
 
# Step 5: Calculate the percentage of total admissions for each combination
ward_level_summary['percentage_of_total_admissions'] = (ward_level_summary['number_of_admissions'] / total_admissions) * 100

  combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()


In [41]:
ward_level_summary.sort_values('number_of_patients',ascending=False)

Unnamed: 0,combined_wards,total_length_of_stay,number_of_patients,number_of_admissions,average_length_of_stay_per_admission,percentage_of_total_admissions
4,"(Medium,)",340,19,19,17.894737,29.230769
0,"(High,)",282,18,18,15.666667,27.692308
7,"(NICU,)",149,9,9,16.555556,13.846154
10,"(NICU, Medium)",152,5,5,30.4,7.692308
3,"(High, NICU)",162,3,3,54.0,4.615385
8,"(NICU, High)",149,3,3,49.666667,4.615385
1,"(High, Medium)",46,2,2,23.0,3.076923
5,"(Medium, High)",91,2,2,45.5,3.076923
9,"(NICU, High, NICU)",107,2,2,53.5,3.076923
2,"(High, Medium, High)",54,1,1,54.0,1.538462


In [42]:
def combine_consecutive_ward_levels(group):
    combined_wards = []
    combined_stays = []
    previous_ward = None
    for ward, stay in zip(group['ward_level'], group['length_of_stay']):
        if ward == previous_ward:
                    combined_stays[-1] += stay
        else:
                    combined_wards.append(ward)
                    combined_stays.append(stay)
        previous_ward = ward
 
    return pd.Series({'combined_wards': tuple(combined_wards), 'total_length_of_stay': sum(combined_stays)})
 
combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()
 
# Step 2: Group by the combined ward combinations to get total and average length of stay
ward_level_summary = combined_df.groupby('combined_wards').agg(
    total_length_of_stay=('total_length_of_stay', 'sum'),
    number_of_patients=('patient_id', 'nunique'),
    number_of_admissions=('patient_id', 'size')  # count of admissions for each combination
).reset_index()
 
# Step 3: Calculate the average length of stay per admission
ward_level_summary['average_length_of_stay_per_admission'] = ward_level_summary['total_length_of_stay'] / ward_level_summary['number_of_admissions']
 
# Step 4: Calculate the total number of admissions
total_admissions = ward_level_summary['number_of_admissions'].sum()
 
# Step 5: Calculate the percentage of total admissions for each combination
ward_level_summary['percentage_of_total_admissions'] = (ward_level_summary['number_of_admissions'] / total_admissions) * 100

  combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()


In [43]:
#sort by number of patients
ward_level_summary.sort_values('number_of_patients',ascending=False)

Unnamed: 0,combined_wards,total_length_of_stay,number_of_patients,number_of_admissions,average_length_of_stay_per_admission,percentage_of_total_admissions
4,"(Medium,)",374,23,23,16.26087,35.384615
0,"(High,)",200,11,11,18.181818,16.923077
8,"(NICU,)",188,11,11,17.090909,16.923077
1,"(High, Medium)",130,4,4,32.5,6.153846
7,"(Medium, NICU)",143,4,4,35.75,6.153846
3,"(High, NICU)",123,3,3,41.0,4.615385
5,"(Medium, High)",53,2,2,26.5,3.076923
10,"(NICU, High, NICU)",118,2,2,59.0,3.076923
11,"(NICU, Medium)",100,2,2,50.0,3.076923
2,"(High, Medium, NICU)",75,1,1,75.0,1.538462


In [44]:
ward_level_summary.sum()

combined_wards                          (High, High, Medium, High, Medium, NICU, High,...
total_length_of_stay                                                                 1578
number_of_patients                                                                     65
number_of_admissions                                                                   65
average_length_of_stay_per_admission                                           445.283597
percentage_of_total_admissions                                                      100.0
dtype: object

In [45]:
def combine_consecutive_ward_levels(group):
    combined_wards = []
    combined_stays = []
    previous_ward = None
    for ward, stay in zip(group['ward_level'], group['length_of_stay']):
        if ward == previous_ward:
                    combined_stays[-1] += stay
        else:
                    combined_wards.append(ward)
                    combined_stays.append(stay)
        previous_ward = ward
 
    return pd.Series({'combined_wards': tuple(combined_wards), 'total_length_of_stay': sum(combined_stays)})
 
combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()
#filter combined_df for only ward_level nicu
combined_df = combined_df[combined_df['combined_wards'].apply(lambda x: 'nicu' in x)]
# Step 2: Group by the combined ward combinations to get total and average length of stay
ward_level_summary = combined_df.groupby('combined_wards').agg(
    total_length_of_stay=('total_length_of_stay', 'sum'),
    number_of_patients=('patient_id', 'nunique'),
    number_of_admissions=('patient_id', 'size')  # count of admissions for each combination
).reset_index()
 
# Step 3: Calculate the average length of stay per admission
ward_level_summary['average_length_of_stay_per_admission'] = ward_level_summary['total_length_of_stay'] / ward_level_summary['number_of_admissions']
 
# Step 4: Calculate the total number of admissions
total_admissions = ward_level_summary['number_of_admissions'].sum()
 
# Step 5: Calculate the percentage of total admissions for each combination
ward_level_summary['percentage_of_total_admissions'] = (ward_level_summary['number_of_admissions'] / total_admissions) * 100
ward_level_summary

  combined_df = df.groupby('patient_id').apply(combine_consecutive_ward_levels).reset_index()


Unnamed: 0,combined_wards,total_length_of_stay,number_of_patients,number_of_admissions,average_length_of_stay_per_admission,percentage_of_total_admissions


In [46]:
#count where combined_wards starts with high
ward_level_summary['combined_wards'].apply(lambda x: x[0]).value_counts()

Series([], Name: count, dtype: int64)

In [47]:
unique_combinations = patient_journeys['ward_level'].apply(tuple).value_counts(normalize=True)

In [48]:
combinations = unique_combinations.to_frame().reset_index().rename(columns={'index':'ward_level', 'ward_level':'ward_level'})

In [49]:
combinations

Unnamed: 0,ward_level,proportion
0,"(Medium,)",0.323077
1,"(NICU,)",0.153846
2,"(High,)",0.153846
3,"(Medium, NICU)",0.046154
4,"(NICU, High, NICU)",0.030769
5,"(High, NICU)",0.030769
6,"(High, Medium, Medium)",0.030769
7,"(High, Medium)",0.030769
8,"(Medium, Medium)",0.030769
9,"(Medium, High)",0.015385


In [50]:
#make if the same ward level is visited multiple times in a row, it is only counted once
def remove_consecutive_duplicates(ward_level_list):
    return [ward_level_list[i] for i in range(len(ward_level_list)) if i == 0 or ward_level_list[i] != ward_level_list[i-1]]

patient_journeys['ward_level'] = patient_journeys['ward_level'].apply(remove_consecutive_duplicates)


In [51]:
#sum all percentages where ward_level starts with nicu
combinations[combinations['ward_level'].apply(lambda x: x[0] == 'nicu')]['proportion'].sum()

0.0

In [52]:
combinations[combinations['ward_level'].apply(lambda x: x[0] == 'high')]['proportion'].sum()

0.0

In [53]:
combinations[combinations['ward_level'].apply(lambda x: x[0] == 'medium')]['proportion'].sum()

0.0

In [54]:
#show all combinations starting with medium
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'medium')]

Series([], Name: proportion, dtype: float64)

In [55]:
#sum all combinations starting with medium
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'medium')].sum()

0.0

In [56]:
#show all combinations starting with high
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'high')]

Series([], Name: proportion, dtype: float64)

In [57]:
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'high')].sum()

0.0

In [58]:
#show all combinations starting with high
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'nicu')]


Series([], Name: proportion, dtype: float64)

In [59]:
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'unknown')]

Series([], Name: proportion, dtype: float64)

In [60]:
unique_combinations[unique_combinations.index.map(lambda x: x[0] == 'nicu')].sum()

0.0

In [61]:
#filter only first stay
df_first = df[df['stay_number'] == 1]
df_first['ward_level'].value_counts(normalize=True)

ward_level
NICU      0.454545
Medium    0.318182
High      0.227273
Name: proportion, dtype: float64

In [62]:
df_first.groupby('ward_level')['length_of_stay'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
ward_level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
High,5.0,14.8,9.093954,4.0,11.0,12.0,19.0,28.0
Medium,7.0,15.428571,10.533394,1.0,8.5,14.0,23.5,29.0
NICU,10.0,18.3,6.733828,8.0,15.25,17.5,23.75,28.0
