
# Lab Turnaround Time (TAT) Optimization — Baseline EDA

This notebook performs:
- Data loading & cleaning
- KPI calculations (TATs, SLA hit rates)
- Summary tables by test, shift, bench, and priority
- Visualizations of TAT distributions
- Bottleneck analysis setup


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv("../data/lab_events.csv", parse_dates=[
    "collection_time","receipt_time","start_analysis_time","verification_time","report_time"
])
df.head()


In [None]:

# Compute stage TATs (minutes)
df["tat_pre"]  = (df["receipt_time"] - df["collection_time"]).dt.total_seconds()/60
df["tat_anal"] = (df["verification_time"] - df["start_analysis_time"]).dt.total_seconds()/60
df["tat_post"] = (df["report_time"] - df["verification_time"]).dt.total_seconds()/60
df["tat_total"]= (df["report_time"] - df["collection_time"]).dt.total_seconds()/60

# Drop invalid values (negative or > 7 days)
df = df[(df["tat_total"]>=0) & (df["tat_total"]<=7*24*60)]

# SLA thresholds (minutes)
sla_map = {"CBC":120, "CMP":240, "PT/INR":60, "UA":120, "PathReview":2880}
df["sla_min"] = df["test_code"].map(sla_map).fillna(240)
df["sla_hit"] = (df["tat_total"] <= df["sla_min"]).astype(int)

df.describe()[["tat_pre","tat_anal","tat_post","tat_total"]]


In [None]:

# Summaries
by_test = df.groupby("test_code").agg(
    n=("order_id","count"),
    tat_median=("tat_total","median"),
    tat_p95=("tat_total", lambda x: np.percentile(x,95)),
    sla_hit_rate=("sla_hit","mean")
).reset_index().sort_values("tat_median")

by_shift = df.groupby("shift").agg(
    n=("order_id","count"),
    tat_median=("tat_total","median"),
    sla_hit_rate=("sla_hit","mean")
).reset_index()

by_bench = df.groupby("bench").agg(
    n=("order_id","count"),
    tat_median=("tat_total","median"),
    tat_p95=("tat_total", lambda x: np.percentile(x,95)),
    sla_hit_rate=("sla_hit","mean")
).reset_index()

by_priority = df.groupby("priority").agg(
    n=("order_id","count"),
    tat_median=("tat_total","median"),
    sla_hit_rate=("sla_hit","mean")
).reset_index()

by_test, by_shift, by_bench, by_priority


In [None]:

sns.set(style="whitegrid")

# Boxplot of total TAT by test
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x="test_code", y="tat_total")
plt.title("Distribution of Total TAT by Test")
plt.ylabel("TAT (minutes)")
plt.show()

# Shift view
plt.figure(figsize=(6,4))
sns.barplot(data=by_shift, x="shift", y="tat_median")
plt.title("Median TAT by Shift")
plt.show()

# Priority view
plt.figure(figsize=(6,4))
sns.barplot(data=by_priority, x="priority", y="tat_median")
plt.title("Median TAT by Priority")
plt.show()


In [None]:

# Stage contribution ratio
stage_share = (df[["tat_pre","tat_anal","tat_post"]].div(df["tat_total"], axis=0)
               .replace([np.inf,-np.inf], np.nan).fillna(0))
df_share = pd.concat([df[["test_code","shift"]], stage_share], axis=1)

stage_medians = df_share.groupby("test_code")[["tat_pre","tat_anal","tat_post"]].median()
stage_medians.plot(kind="bar", stacked=True, figsize=(8,5))
plt.title("Median Stage Contributions to Total TAT (by Test)")
plt.ylabel("Share of total TAT")
plt.show()
