In [9]:
import pandas as pd
import numpy as np

In [10]:
# Reads in csv file
PatientData = pd.read_csv("CleanedTGHdata.csv")

In [11]:
def merge_intervals(intervals):
    intervals.sort(key=lambda x: x[0])
    merged = []
    for interval in intervals:
        if not merged or merged[-1][1] < interval[0]:
            merged.append(interval)
        else:
            merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1]))
    return merged

In [15]:
# READ
# Checks for 3 conditions
# If 'Arrive to Order' starts at 15 and Arrive to Order Complete ends at 60. then the value add time would be 45 for this time interval.
# If there is a case where the last arrive to order complete is 70 and the next arrive to order starts at 75. those 5 minutes in between is not counted as value add time. 
# If arrive to order complete is greater than arriveto lastdispo minutes then it will subtract the last arrive to order complete from arrive to last dispo minutes and add the new value to the value add time
def calculate_total_value_add_time(df):
    total_time_by_patient = []
    for patient_id, group in df.groupby('Patient ID'):
        intervals = list(zip(group['Arrival to Order'], group['Arrival to Order Complete']))
        merged_intervals = merge_intervals(intervals)
        
        total_value_add_time = 0
        max_last_dispo_minutes = group['ArriveTo LastDispo Minutes'].max()
        for start, end in merged_intervals:
            # Ensure that end does not exceed max_last_dispo_minutes
            end = min(end, max_last_dispo_minutes)
            # Add only positive time intervals
            total_value_add_time += max(0, end - start)
        
        total_time_by_patient.append({'Patient ID': patient_id, 'Total Value Add Time': total_value_add_time, 'ArriveTo LastDispo Minutes': max_last_dispo_minutes})
    
    return pd.DataFrame(total_time_by_patient)

In [20]:
# Example usage with your dataframe (assuming it's named 'data')
ValueAddTime = calculate_total_value_add_time(PatientData)

# Calculate value add percentage within the same DataFrame to simplify
ValueAddTime['Value Add Percentage'] = (ValueAddTime['Total Value Add Time'] / ValueAddTime['ArriveTo LastDispo Minutes']) * 100

# Rename columns to match desired output
ValueAddPercentageTable = ValueAddTime.rename(columns={'Total Value Add Time': 'Value Add Time'})

# Select only the relevant columns to return
ValueAddPercentageTable = ValueAddPercentageTable[['Patient ID', 'Value Add Time', 'Value Add Percentage', 'ArriveTo LastDispo Minutes']]

Unnamed: 0,Patient ID,Value Add Time,Value Add Percentage,ArriveTo LastDispo Minutes
0,20000,171.5,39.561707,433.5
1,20002,233.1,95.104039,245.1
2,20003,18.0,13.931889,129.2
3,20004,441.0,72.880516,605.1
4,20005,97.7,83.718937,116.7
5,20007,105.0,44.285112,237.1
6,20008,502.0,75.750717,662.7
7,20009,277.2,98.228207,282.2
8,20010,203.0,66.688568,304.4
9,20011,188.1,62.058726,303.1


In [18]:
ValueAddPercentageTable = ValueAddPercentageTable.sort_values(by=['Value Add Time'], ascending=True)
ValueAddPercentageTable

Unnamed: 0,Patient ID,Value Add Time,Value Add Percentage,ArriveTo LastDispo Minutes
6136,26740,0.0,0.000000,17.6
6708,27374,0.0,0.000000,17.0
8155,28966,0.0,0.000000,232.9
1777,21944,0.0,0.000000,144.1
437,20488,0.0,0.000000,42.5
...,...,...,...,...
2084,22274,1520.8,97.939207,1552.8
12711,33954,1984.3,99.051565,2003.3
10849,31921,2098.1,98.129180,2138.1
1662,21821,2938.6,96.328591,3050.6
