# Patient Hospital Stay Timeline Visualization
A simple, patient-friendly visualization showing the progression of events during hospitalization

In [1]:


import sys
from pathlib import Path

# Add the project root to Python path so we can import from utils
project_root = Path().resolve().parent  # Go up one level from tests/ to project root
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Now we can import from utils
from utils.Dataset import MIMICDataset
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import numpy as np

In [2]:
# Initialize dataset
dataset = MIMICDataset(max_patients=1)

Loading MIMIC-IV data tables...
  - Loading patients...
  - Loading admissions...
  - Limited to 1 patients
  - Filtered to 1 patients, 4 admissions
  - Loading ICU stays...
  - Loading diagnosis dictionary...
  - Loading lab items dictionary...
  - Loading procedure dictionary...
  - Loading diagnoses...
  - Loading procedures...
  - Loading prescriptions...
  - Loading lab events (filtering by hadm_id)...
  - Loading microbiology events...


  self.microbiology_df = pd.read_csv(self.hosp_dir / "microbiologyevents.csv.gz", compression='gzip')


  - ICU time-series data will be loaded on-demand...
Indexing data for fast lookups...
  - Indexing prescriptions...
  - Indexing lab events...
  - Indexing microbiology events...
  - Preparing ICU time-series data (indexed on-demand)...
Data loading complete!


In [3]:
# Get patient 10000032 (patients[0])
patients = dataset.get_all_patients()
patient = patients[0]
hadm_id = patient.admissions[0].hadm_id

print(f"Patient ID: {patient.subject_id}")
print(f"Admission ID: {hadm_id}")

  - Loading ICU chart events (on-demand)...
  - Loading ICU input events (on-demand)...
  - Loading ICU output events (on-demand)...
  - Loading ICU procedure events (on-demand)...
Patient ID: 10000032
Admission ID: 22595853


In [4]:
from utils.patient_json import patient_to_json
json = patient_to_json(hadm_id, dataset, save=True, filename=f"data/patient_{hadm_id}.json")

In [4]:
# Get timeline data using existing Dataset method
# This method consolidates data from admission_info, diagnoses, procedures, lab_results, and medications
timeline = dataset.get_admission_timeline(hadm_id)
timeline

Unnamed: 0,timestamp,event_type,description,details
0,2180-05-06 22:23:00,Admission,Admitted via TRANSFER FROM HOSPITAL,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
1,2180-05-06 22:23:00,Diagnosis,Posttraumatic stress disorder,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
2,2180-05-06 22:23:00,Diagnosis,"Bipolar disorder, unspecified","{'subject_id': 10000032, 'hadm_id': 22595853, ..."
3,2180-05-06 22:23:00,Diagnosis,"Chronic airway obstruction, not elsewhere clas...","{'subject_id': 10000032, 'hadm_id': 22595853, ..."
4,2180-05-06 22:23:00,Diagnosis,Personal history of tobacco use,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
5,2180-05-06 22:23:00,Diagnosis,Cirrhosis of liver without mention of alcohol,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
6,2180-05-06 22:23:00,Diagnosis,Other ascites,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
7,2180-05-06 22:23:00,Diagnosis,Portal hypertension,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
8,2180-05-06 22:23:00,Diagnosis,Unspecified viral hepatitis C without hepatic ...,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."
9,2180-05-07 00:00:00,Procedure,Percutaneous abdominal drainage,"{'subject_id': 10000032, 'hadm_id': 22595853, ..."


In [5]:
# Debug: Check timeline data before processing
print(f"Timeline shape: {timeline.shape}")
print(f"Timeline columns: {timeline.columns.tolist()}")
print(f"Timeline memory usage: {timeline.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
print(f"\nEvent type distribution:")
print(timeline['event_type'].value_counts())
print(f"\nTimestamp dtype: {timeline['timestamp'].dtype}")
print(f"First few timestamps: {timeline['timestamp'].head()}")

Timeline shape: (44, 4)
Timeline columns: ['timestamp', 'event_type', 'description', 'details']
Timeline memory usage: 0.02 MB

Event type distribution:
event_type
Lab Result    19
Medication    14
Diagnosis      8
Admission      1
Procedure      1
Discharge      1
Name: count, dtype: int64

Timestamp dtype: datetime64[ns]
First few timestamps: 0   2180-05-06 22:23:00
1   2180-05-06 22:23:00
2   2180-05-06 22:23:00
3   2180-05-06 22:23:00
4   2180-05-06 22:23:00
Name: timestamp, dtype: datetime64[ns]


In [5]:
def prepare_timeline_data(timeline_df):
    """
    Consolidate timeline events into patient-friendly categories.
    Optimized for performance with better memory handling.
    """
    # Early return if empty
    if timeline_df.empty:
        return pd.DataFrame()
    
    # Ensure timestamp is datetime (avoid repeated conversions in groupby)
    if not pd.api.types.is_datetime64_any_dtype(timeline_df['timestamp']):
        timeline_df['timestamp'] = pd.to_datetime(timeline_df['timestamp'])
    
    events = []
    
    # Group by event type and timestamp - use sort=False for speed
    grouped = timeline_df.groupby(['timestamp', 'event_type'], sort=False)
    
    for (timestamp, event_type), group in grouped:
        # Get descriptions once
        descriptions = group['description'].tolist()
        count = len(descriptions)
        
        # Map event types to categories (avoid multiple if-elif checks)
        if event_type == 'Admission':
            events.append({
                'timestamp': timestamp,
                'category': 'Admission',
                'title': 'Admitted to Hospital',
                'details': descriptions[0],
                'count': 1,
                'items': descriptions
            })
        
        elif event_type == 'Diagnosis':
            events.append({
                'timestamp': timestamp,
                'category': 'Diagnosis',
                'title': f'Diagnoses ({count})',
                'details': '<br>'.join([f"• {d}" for d in descriptions]),
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Procedure':
            events.append({
                'timestamp': timestamp,
                'category': 'Procedure',
                'title': f'Procedures ({count})',
                'details': '<br>'.join([f"• {p}" for p in descriptions]),
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Lab Result':
            # Limit display to first 10, but keep all in items
            details = '<br>'.join([f"• {lab}" for lab in descriptions[:10]])
            if count > 10:
                details += f"<br>• ... and {count - 10} more"
            
            events.append({
                'timestamp': timestamp,
                'category': 'Lab Tests',
                'title': f'Lab Tests ({count})',
                'details': details,
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Medication':
            # Limit display to first 10, but keep all in items
            details = '<br>'.join([f"• {med}" for med in descriptions[:10]])
            if count > 10:
                details += f"<br>• ... and {count - 10} more"
            
            events.append({
                'timestamp': timestamp,
                'category': 'Medication',
                'title': f'Medications ({count})',
                'details': details,
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Discharge':
            events.append({
                'timestamp': timestamp,
                'category': 'Discharge',
                'title': 'Discharged from Hospital',
                'details': descriptions[0],
                'count': 1,
                'items': descriptions
            })
    
    # Convert to DataFrame once at the end
    return pd.DataFrame(events)

# Prepare the data
print("Preparing timeline data...")
print(f"Timeline has {len(timeline)} rows")
events_df = prepare_timeline_data(timeline)
print(f"Consolidated into {len(events_df)} event groups")
events_df

Preparing timeline data...
Timeline has 44 rows
Consolidated into 11 event groups


Unnamed: 0,timestamp,category,title,details,count,items
0,2180-05-06 22:23:00,Admission,Admitted to Hospital,Admitted via TRANSFER FROM HOSPITAL,1,[Admitted via TRANSFER FROM HOSPITAL]
1,2180-05-06 22:23:00,Diagnosis,Diagnoses (8),• Posttraumatic stress disorder<br>• Bipolar d...,8,"[Posttraumatic stress disorder, Bipolar disord..."
2,2180-05-07 00:00:00,Procedure,Procedures (1),• Percutaneous abdominal drainage,1,[Percutaneous abdominal drainage]
3,2180-05-07 00:00:00,Medication,Medications (3),• Heparin - 5000 UNIT<br>• Sodium Chloride 0.9...,3,"[Heparin - 5000 UNIT, Sodium Chloride 0.9% Fl..."
4,2180-05-07 00:10:00,Lab Tests,Lab Tests (1),• Urobilinogen: 4.0 (abnormal),1,[Urobilinogen: 4.0 (abnormal)]
5,2180-05-07 01:00:00,Medication,Medications (8),• Albuterol Inhaler - 2 PUFF<br>• Nicotine Pat...,8,"[Albuterol Inhaler - 2 PUFF, Nicotine Patch - ..."
6,2180-05-07 02:00:00,Medication,Medications (1),• Ipratropium Bromide Neb - 1 NEB,1,[Ipratropium Bromide Neb - 1 NEB]
7,2180-05-07 05:05:00,Lab Tests,Lab Tests (11),• PT: 16.6 (abnormal)<br>• Urea Nitrogen: 25.0...,11,"[PT: 16.6 (abnormal), Urea Nitrogen: 25.0 (abn..."
8,2180-05-07 10:11:00,Lab Tests,Lab Tests (7),"• RBC, Ascites: 8800.0 (abnormal)<br>• Total N...",7,"[RBC, Ascites: 8800.0 (abnormal), Total Nuclea..."
9,2180-05-07 17:15:00,Discharge,Discharged from Hospital,Discharged to HOME,1,[Discharged to HOME]


In [8]:
def prepare_timeline_list(timeline_df):
    """
    Consolidate timeline events into patient-friendly categories.
    Optimized for performance with better memory handling.
    """
    # Early return if empty
    if timeline_df.empty:
        return {}
    
    # Ensure timestamp is datetime (avoid repeated conversions in groupby)
    if not pd.api.types.is_datetime64_any_dtype(timeline_df['timestamp']):
        timeline_df['timestamp'] = pd.to_datetime(timeline_df['timestamp'])
    
    events = []
    
    # Group by event type and timestamp - use sort=False for speed
    grouped = timeline_df.groupby(['timestamp', 'event_type'], sort=False)
    
    for (timestamp, event_type), group in grouped:
        # Get descriptions once
        descriptions = group['description'].tolist()
        count = len(descriptions)
        
        # Map event types to categories (avoid multiple if-elif checks)
        if event_type == 'Admission':
            events.append({
                'timestamp': timestamp,
                'category': 'Admission',
                'title': 'Admitted to Hospital',
                'details': descriptions[0],
                'count': 1,
                'items': descriptions
            })
        
        elif event_type == 'Diagnosis':
            events.append({
                'timestamp': timestamp,
                'category': 'Diagnosis',
                'title': f'Diagnoses ({count})',
                'details': '<br>'.join([f"• {d}" for d in descriptions]),
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Procedure':
            events.append({
                'timestamp': timestamp,
                'category': 'Procedure',
                'title': f'Procedures ({count})',
                'details': '<br>'.join([f"• {p}" for p in descriptions]),
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Lab Result':
            # Limit display to first 10, but keep all in items
            details = '<br>'.join([f"• {lab}" for lab in descriptions[:10]])
            if count > 10:
                details += f"<br>• ... and {count - 10} more"
            
            events.append({
                'timestamp': timestamp,
                'category': 'Lab Tests',
                'title': f'Lab Tests ({count})',
                'details': details,
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Medication':
            # Limit display to first 10, but keep all in items
            details = '<br>'.join([f"• {med}" for med in descriptions[:10]])
            if count > 10:
                details += f"<br>• ... and {count - 10} more"
            
            events.append({
                'timestamp': timestamp,
                'category': 'Medication',
                'title': f'Medications ({count})',
                'details': details,
                'count': count,
                'items': descriptions
            })
        
        elif event_type == 'Discharge':
            events.append({
                'timestamp': timestamp,
                'category': 'Discharge',
                'title': 'Discharged from Hospital',
                'details': descriptions[0],
                'count': 1,
                'items': descriptions
            })
    
    # Convert to DataFrame once at the end
    return events

prepare_timeline_list(timeline)

[{'timestamp': Timestamp('2180-05-06 22:23:00'),
  'category': 'Admission',
  'title': 'Admitted to Hospital',
  'details': 'Admitted via TRANSFER FROM HOSPITAL',
  'count': 1,
  'items': ['Admitted via TRANSFER FROM HOSPITAL']},
 {'timestamp': Timestamp('2180-05-06 22:23:00'),
  'category': 'Diagnosis',
  'title': 'Diagnoses (8)',
  'details': '• Posttraumatic stress disorder<br>• Bipolar disorder, unspecified<br>• Chronic airway obstruction, not elsewhere classified<br>• Personal history of tobacco use<br>• Cirrhosis of liver without mention of alcohol<br>• Other ascites<br>• Portal hypertension<br>• Unspecified viral hepatitis C without hepatic coma',
  'count': 8,
  'items': ['Posttraumatic stress disorder',
   'Bipolar disorder, unspecified',
   'Chronic airway obstruction, not elsewhere classified',
   'Personal history of tobacco use',
   'Cirrhosis of liver without mention of alcohol',
   'Other ascites',
   'Portal hypertension',
   'Unspecified viral hepatitis C without hepat

In [12]:
def prepare_lab_results_list(lab_results_df):
    """
    Consolidate lab results into a list of dictionaries.
    """
    if lab_results_df['all'].empty:
        return []
    if not lab_results_df['flagged'].empty:
        return lab_results_df[['charttime', 'label', 'valuenum', 'valueuom', 'flag']].to_dict(orient='records')

lab_results = dataset.get_lab_results_summary(hadm_id)
lab_results_list = prepare_lab_results_list(lab_results)

TypeError: unhashable type: 'list'

In [7]:
def create_timeline_visualization(events_df):
    """
    Create an interactive timeline visualization using Plotly
    """
    # Color scheme for different event categories
    color_map = {
        'Admission': '#2E86AB',      # Blue
        'Diagnosis': '#A23B72',      # Purple
        'Procedure': '#F18F01',      # Orange
        'Lab Tests': '#C73E1D',      # Red
        'Medication': '#6A994E',     # Green
        'Discharge': '#2E86AB'       # Blue
    }
    
    # Assign y-position based on category for better spacing
    category_y = {
        'Admission': 5,
        'Diagnosis': 4,
        'Procedure': 3,
        'Lab Tests': 2,
        'Medication': 1,
        'Discharge': 5
    }
    
    events_df['y_pos'] = events_df['category'].map(category_y)
    events_df['color'] = events_df['category'].map(color_map)
    
    # Create the figure
    fig = go.Figure()
    
    # Add timeline line
    fig.add_trace(go.Scatter(
        x=[events_df['timestamp'].min(), events_df['timestamp'].max()],
        y=[3, 3],
        mode='lines',
        line=dict(color='lightgray', width=3),
        showlegend=False,
        hoverinfo='skip'
    ))
    
    # Add events as markers
    for category in events_df['category'].unique():
        cat_data = events_df[events_df['category'] == category]
        
        # Create hover text with full details
        hover_text = [
            f"<b>{row['title']}</b><br><br>{row['details']}<br><br><i>{row['timestamp'].strftime('%Y-%m-%d %H:%M')}</i>"
            for _, row in cat_data.iterrows()
        ]
        
        fig.add_trace(go.Scatter(
            x=cat_data['timestamp'],
            y=cat_data['y_pos'],
            mode='markers+text',
            marker=dict(
                size=15 + cat_data['count'] * 2,  # Size based on number of items
                color=cat_data['color'],
                line=dict(color='white', width=2)
            ),
            text=cat_data['title'],
            textposition='top center',
            textfont=dict(size=10),
            name=category,
            hovertext=hover_text,
            hoverinfo='text'
        ))
    
    # Update layout
    fig.update_layout(
        title={
            'text': f'Patient Hospital Stay Timeline<br><sub>Patient {patient.subject_id} - Admission {hadm_id}</sub>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 20}
        },
        xaxis=dict(
            title='Time',
            showgrid=True,
            gridcolor='lightgray',
            tickformat='%b %d\n%H:%M'
        ),
        yaxis=dict(
            title='Event Category',
            showgrid=False,
            tickmode='array',
            tickvals=[1, 2, 3, 4, 5],
            ticktext=['Medication', 'Lab Tests', 'Procedure', 'Diagnosis', 'Admission/Discharge'],
            range=[0, 6]
        ),
        hovermode='closest',
        height=600,
        plot_bgcolor='white',
        showlegend=True,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=1
        )
    )
    
    return fig

# Create and display the visualization
fig = create_timeline_visualization(events_df)
fig.show()

## Alternative: Horizontal Gantt-style Timeline

In [None]:
def create_gantt_timeline(events_df):
    """
    Create a single-row horizontal timeline with non-overlapping bars.
    Events are shown in chronological order with equal spacing, not to time scale.
    """
    # Color scheme
    color_map = {
        'Admission': '#2E86AB',
        'Diagnosis': '#A23B72',
        'Procedure': '#F18F01',
        'Lab Tests': '#C73E1D',
        'Medication': '#6A994E',
        'Discharge': '#2E86AB'
    }
    
    # Filter out events after discharge
    # Find discharge time
    discharge_events = events_df[events_df['category'] == 'Discharge']
    if not discharge_events.empty:
        discharge_time = discharge_events['timestamp'].iloc[0]
        # Keep only events up to and including discharge
        events_filtered = events_df[events_df['timestamp'] <= discharge_time].copy()
    else:
        events_filtered = events_df.copy()
    
    # Sort events by timestamp, then by original index to maintain order for simultaneous events
    events_sorted = events_filtered.sort_values(['timestamp']).reset_index(drop=True)
    
    # Prepare data for timeline - all on same row with equal spacing
    gantt_data = []
    
    # Create sequential positions (not time-scaled)
    for idx, row in events_sorted.iterrows():
        # Each event gets a sequential position (0, 1, 2, 3, ...)
        # This ensures no overlap and equal spacing
        start_pos = idx
        end_pos = idx + 0.9  # Leave small gap between bars
        
        gantt_data.append(dict(
            Task='Hospital Stay',
            Start=start_pos,
            Finish=end_pos,
            Description=row['title'],
            Details=row['details'],
            Category=row['category'],
            Count=row['count'],
            Timestamp=row['timestamp'].strftime('%Y-%m-%d %H:%M')
        ))
    
    gantt_df = pd.DataFrame(gantt_data)
    
    # Create figure using plotly graph objects for more control
    fig = go.Figure()
    
    # Add bars manually to control positioning
    for idx, row in gantt_df.iterrows():
        fig.add_trace(go.Bar(
            x=[row['Finish'] - row['Start']],
            y=['Hospital Stay'],
            base=row['Start'],
            orientation='h',
            marker=dict(
                color=color_map.get(row['Category'], '#999999'),
                line=dict(color='white', width=2)
            ),
            name=row['Category'],
            text=row['Description'],
            textposition='inside',
            textfont=dict(size=10, color='white'),
            hovertemplate=(
                f"<b>{row['Description']}</b><br><br>"
                f"{row['Details']}<br><br>"
                f"<i>{row['Timestamp']}</i>"
                "<extra></extra>"
            ),
            showlegend=idx == gantt_df[gantt_df['Category'] == row['Category']].index[0]  # Show legend once per category
        ))
    
    # Update layout
    fig.update_layout(
        title=f'Patient Hospital Stay Timeline - Patient {patient.subject_id}',
        height=250,
        xaxis=dict(
            title='Event Sequence',
            showticklabels=False,  # Hide time scale numbers
            showgrid=False,
            zeroline=False
        ),
        yaxis=dict(
            title='',
            showticklabels=False,
            showgrid=False
        ),
        showlegend=True,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='center',
            x=0.5
        ),
        hovermode='closest',
        plot_bgcolor='white',
        barmode='overlay',
        bargap=0
    )
    
    return fig

# Create and display horizontal timeline
fig_gantt = create_gantt_timeline(events_df)
fig_gantt.show()

## Summary Statistics

In [13]:
# Display summary of the hospital stay
admission_time = events_df[events_df['category'] == 'Admission']['timestamp'].iloc[0]
discharge_time = events_df[events_df['category'] == 'Discharge']['timestamp'].iloc[0]
length_of_stay = discharge_time - admission_time

print("=" * 60)
print("HOSPITAL STAY SUMMARY")
print("=" * 60)
print(f"\nAdmission: {admission_time.strftime('%Y-%m-%d %H:%M')}")
print(f"Discharge: {discharge_time.strftime('%Y-%m-%d %H:%M')}")
print(f"Length of Stay: {length_of_stay.days} days, {length_of_stay.seconds // 3600} hours")
print("\n" + "-" * 60)
print("Event Summary:")
print("-" * 60)

for category in ['Diagnosis', 'Procedure', 'Lab Tests', 'Medication']:
    cat_events = events_df[events_df['category'] == category]
    if len(cat_events) > 0:
        total_items = cat_events['count'].sum()
        num_sessions = len(cat_events)
        print(f"\n{category}:")
        print(f"  - Total {category.lower()}: {total_items}")
        print(f"  - Administered in {num_sessions} session(s)")
        
print("\n" + "=" * 60)

HOSPITAL STAY SUMMARY

Admission: 2180-05-06 22:23
Discharge: 2180-05-07 17:15
Length of Stay: 0 days, 18 hours

------------------------------------------------------------
Event Summary:
------------------------------------------------------------

Diagnosis:
  - Total diagnosis: 8
  - Administered in 1 session(s)

Procedure:
  - Total procedure: 1
  - Administered in 1 session(s)

Lab Tests:
  - Total lab tests: 19
  - Administered in 3 session(s)

Medication:
  - Total medication: 14
  - Administered in 4 session(s)



## Detailed Event List (Expandable)

In [12]:
# Use Dataset's get_discharge_medications() method
# This shows medications prescribed at/after discharge
discharge_meds = dataset.get_discharge_medications(hadm_id)

print(f"Discharge Medications: {len(discharge_meds)}")
if not discharge_meds.empty:
    print("\nMedications to Continue After Discharge:")
    display(discharge_meds[['drug', 'dose_val_rx', 'dose_unit_rx', 'route', 'starttime', 'stoptime']].head(10))
else:
    print("No discharge medications found")

Discharge Medications: 11

Medications to Continue After Discharge:


Unnamed: 0,drug,dose_val_rx,dose_unit_rx,route,starttime,stoptime
0,Acetaminophen,500.0,mg,PO/NG,2180-05-07 01:00:00,2180-05-07 22:00:00
1,Albuterol Inhaler,2.0,PUFF,IH,2180-05-07 01:00:00,2180-05-07 22:00:00
2,Emtricitabine-Tenofovir (Truvada),1.0,TAB,PO,2180-05-07 01:00:00,2180-05-07 22:00:00
3,Furosemide,40.0,mg,PO/NG,2180-05-08 08:00:00,2180-05-07 22:00:00
4,Heparin,5000.0,UNIT,SC,2180-05-07 00:00:00,2180-05-07 22:00:00
5,Influenza Vaccine Quadrivalent,0.5,mL,IM,2180-05-07 00:00:00,2180-05-07 22:00:00
6,Ipratropium Bromide Neb,1.0,NEB,IH,2180-05-07 02:00:00,2180-05-07 22:00:00
7,Nicotine Patch,14.0,mg,TD,2180-05-07 01:00:00,2180-05-07 22:00:00
8,Raltegravir,400.0,mg,PO,2180-05-07 01:00:00,2180-05-07 22:00:00
9,Sodium Chloride 0.9% Flush,3.0,mL,IV,2180-05-07 00:00:00,2180-05-07 22:00:00


In [11]:
# Use Dataset's get_lab_results_summary() method for detailed lab analysis
# This method categorizes labs as positive/negative/abnormal/normal
lab_summary = dataset.get_lab_results_summary(hadm_id, include_normal=True)

print("Lab Results Summary:")
print(f"  - Flagged (abnormal) results: {len(lab_summary['flagged'])}")
print(f"  - Positive/High results: {len(lab_summary['positive'])}")
print(f"  - Negative/Low results: {len(lab_summary['negative'])}")
print(f"  - Total lab results: {len(lab_summary['all'])}")

# Show some abnormal lab results
print("\nSample Abnormal Lab Results:")
if not lab_summary['flagged'].empty:
    display(lab_summary['flagged'][['charttime', 'label', 'valuenum', 'valueuom', 'flag']].head(10))
else:
    print("No flagged lab results")

Lab Results Summary:
  - Flagged (abnormal) results: 19
  - Positive/High results: 19
  - Negative/Low results: 38
  - Total lab results: 57

Sample Abnormal Lab Results:


Unnamed: 0,charttime,label,valuenum,valueuom,flag
7,2180-05-07 00:10:00,Urobilinogen,4.0,mg/dL,abnormal
21,2180-05-07 05:05:00,"Calcium, Total",7.8,mg/dL,abnormal
23,2180-05-07 05:05:00,Creatinine,0.3,mg/dL,abnormal
28,2180-05-07 05:05:00,Urea Nitrogen,25.0,mg/dL,abnormal
29,2180-05-07 05:05:00,Asparate Aminotransferase (AST),98.0,IU/L,abnormal
32,2180-05-07 05:05:00,INR(PT),1.5,,abnormal
33,2180-05-07 05:05:00,Alanine Aminotransferase (ALT),88.0,IU/L,abnormal
36,2180-05-07 05:05:00,MCH,33.4,pg,abnormal
38,2180-05-07 05:05:00,MCV,99.0,fL,abnormal
39,2180-05-07 05:05:00,Platelet Count,71.0,K/uL,abnormal


## Additional Data Views Using Dataset Methods

In [10]:
# Show detailed breakdown by category
for category in events_df['category'].unique():
    cat_data = events_df[events_df['category'] == category]
    print(f"\n{'='*60}")
    print(f"{category.upper()}")
    print(f"{'='*60}")
    
    for _, row in cat_data.iterrows():
        print(f"\n{row['timestamp'].strftime('%Y-%m-%d %H:%M')} - {row['title']}")
        for item in row['items']:
            print(f"  • {item}")


ADMISSION

2180-05-06 22:23 - Admitted to Hospital
  • Admitted via TRANSFER FROM HOSPITAL

DIAGNOSIS

2180-05-06 22:23 - Diagnoses (8)
  • Posttraumatic stress disorder
  • Bipolar disorder, unspecified
  • Chronic airway obstruction, not elsewhere classified
  • Personal history of tobacco use
  • Cirrhosis of liver without mention of alcohol
  • Other ascites
  • Portal hypertension
  • Unspecified viral hepatitis C without hepatic coma

PROCEDURE

2180-05-07 00:00 - Procedures (1)
  • Percutaneous abdominal drainage

MEDICATION

2180-05-07 00:00 - Medications (3)
  • Heparin - 5000 UNIT
  • Sodium Chloride 0.9%  Flush - 3 mL
  • Influenza Vaccine Quadrivalent - 0.5 mL

2180-05-07 01:00 - Medications (8)
  • Albuterol Inhaler - 2 PUFF
  • Nicotine Patch - 14 mg
  • Emtricitabine-Tenofovir (Truvada) - 1 TAB
  • Spironolactone - 50 mg
  • Raltegravir - 400 mg
  • Potassium Chloride - 40 mEq
  • Furosemide - 20 mg
  • Acetaminophen - 500 mg

2180-05-07 02:00 - Medications (1)
  • Ipra