## Finding time 0 values (time where troponin has changed by more than or equals to 20%)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Read and display data
df = pd.read_csv("data-1.csv")
df.head()

Unnamed: 0,subject_id,hospital_expire_flag,deathtime,gender,anchor_age,dod,valuenum,valueuom,itemid,charttime,storetime,icu_intime,icu_outtime,aspirin,p2y12,other_antiplatelet,starttime,stoptime
0,10881690,0,,F,74,,4.9,g/dL,51222,25/4/2163 14:26,25/4/2163 15:20,25/4/2163 10:42,30/4/2163 22:43,0,0,1,25/4/2163 13:00,25/4/2163 13:00
1,10881690,0,,F,74,,4.8,g/dL,51222,25/4/2163 16:15,25/4/2163 16:48,25/4/2163 10:42,30/4/2163 22:43,0,0,1,25/4/2163 13:00,25/4/2163 13:00
2,10881690,0,,F,74,,8.6,g/dL,51222,25/4/2163 19:36,25/4/2163 20:16,25/4/2163 10:42,30/4/2163 22:43,0,0,1,25/4/2163 13:00,25/4/2163 13:00
3,10881690,0,,F,74,,9.0,g/dL,51222,25/4/2163 23:29,25/4/2163 23:58,25/4/2163 10:42,30/4/2163 22:43,0,0,1,25/4/2163 13:00,25/4/2163 13:00
4,10881690,0,,F,74,,8.2,g/dL,51222,26/4/2163 3:36,26/4/2163 4:18,25/4/2163 10:42,30/4/2163 22:43,0,0,1,25/4/2163 13:00,25/4/2163 13:00


In [3]:
# Determine dataset dimensions
df.shape

(245361, 18)

In [4]:
# Determine data types
df.dtypes

subject_id                int64
hospital_expire_flag      int64
deathtime                object
gender                   object
anchor_age                int64
dod                      object
valuenum                float64
valueuom                 object
itemid                    int64
charttime                object
storetime                object
icu_intime               object
icu_outtime              object
aspirin                   int64
p2y12                     int64
other_antiplatelet        int64
starttime                object
stoptime                 object
dtype: object

In [5]:
# Descriptive Statistics, might need to modify dataset to get accurate statistics (e.g. remove hemoglobin rows to get correct mean values for troponin levels)
df.describe()

Unnamed: 0,subject_id,hospital_expire_flag,anchor_age,valuenum,itemid,aspirin,p2y12,other_antiplatelet
count,245361.0,245361.0,245361.0,245361.0,245361.0,245361.0,245361.0,245361.0
mean,15027120.0,0.144106,67.736246,8.850248,51193.370169,0.844759,0.153749,0.001492
std,2911408.0,0.351198,13.454805,3.434529,73.826066,0.362135,0.360709,0.038594
min,10000980.0,0.0,18.0,0.01,51003.0,0.0,0.0,0.0
25%,12412350.0,0.0,59.0,8.0,51222.0,1.0,0.0,0.0
50%,15141650.0,0.0,69.0,9.4,51222.0,1.0,0.0,0.0
75%,17551800.0,0.0,78.0,10.8,51222.0,1.0,0.0,0.0
max,19997750.0,1.0,91.0,42.6,51222.0,1.0,1.0,1.0


In [6]:
# Remove rows with hemoglobin values (to reduce dataset size for the purposes of analysing troponin values only)
df_hemoglobin_removed = df[df["itemid"] != 51222] # itemid of hemoglobin is 51222
df_hemoglobin_removed.shape

(32076, 18)

In [7]:
# Count the total number of unique subjects (patients)
n = len(pd.unique(df_hemoglobin_removed['subject_id']))
print("No.of.patients :", n)

# Generate list of subject IDs
patient_ids = pd.unique(df_hemoglobin_removed['subject_id'])
print("List of Subject IDs: ", patient_ids)

No.of.patients : 5153
List of Subject IDs:  [12588030 12859888 13660560 ... 19995780 19997293 19997752]


In [8]:
# Create list to store indexes of records which represent times where troponin change is above or equal to 20%
indexes_with_troponin_change = []

# Check whether troponin levels change by more than or equal to 20% for each subject
for i in patient_ids:
    # Isolate the various rows of records for the individual subject
    subject = df_hemoglobin_removed[df_hemoglobin_removed["subject_id"] == i]
    
    # Sort the rows by storetime (time troponin measurements were made available to clinicians) in ascending order
    subject1 = subject.sort_values(by=['storetime'])
    
    # Drop duplicate rows showing troponin values taken from the same measurement
    subject1 = subject1.drop_duplicates(subset=['storetime'])
    
    if len(subject1.index) == 1:   # Exclude people with only 1 troponin data point
        continue
    
    else:
        # Convert storetime variable into datetime data type and calculate time difference between troponin measurements. Store time difference between troponin measurements in a new variable
        subject1['Time_diff_between_Troponin_measurements'] = pd.to_datetime(subject1["storetime"].astype(str), format='%d/%m/%Y %H:%M').diff(1).dt.total_seconds().div(3600)
    
        for i in range(len(subject1.index) - 1):   # Iterate over the rows (representing different troponin measurements, in chronological order) for each subject
            # Get troponin value from initial row
            troponin_1st_reading = subject1._get_value(subject1.index[i], 'valuenum')

            # Get troponin value from subsequent row
            troponin_2nd_reading = subject1._get_value(subject1.index[i+1], 'valuenum')
            
            # Get time difference between troponin measurements in initial and subsequent row
            time_diff_between_troponin_measurements = subject1._get_value(subject1.index[i+1], 'Time_diff_between_Troponin_measurements')
            
            # If loop to determine if the absolute difference in troponin values is greater than 20% and if the time difference between troponin measurements is less than 12 hours
            if np.abs((troponin_2nd_reading - troponin_1st_reading)/troponin_1st_reading) >= 0.2 and time_diff_between_troponin_measurements < 12:
                indexes_with_troponin_change.append(subject1.index[i+1])   # Include subject_id in list of subjects with troponin change greater than 20%

In [9]:
print(indexes_with_troponin_change)
print(len(indexes_with_troponin_change))

[38, 71, 66, 78, 173, 178, 223, 194132, 242, 197723, 197772, 197777, 295, 377, 372, 373, 386, 433, 492, 38581, 605, 638, 653, 670, 714, 782, 790, 788, 800, 799, 807, 808, 830, 872, 846, 853, 820, 889, 895, 900, 972, 981, 1009, 1017, 1069, 1071, 1072, 1087, 1096, 41245, 1174, 1172, 1177, 1179, 1192, 1194, 1195, 1249, 1372, 1370, 1374, 1394, 1440, 1476, 1475, 1504, 1502, 1507, 1516, 1532, 1583, 1624, 1766, 1817, 1818, 44315, 1829, 1910, 1908, 1964, 1969, 1978, 1981, 1996, 45331, 45279, 45287, 45282, 45336, 2013, 2074, 2122, 2152, 2183, 2203, 2205, 2336, 2333, 2395, 2408, 2498, 48317, 48292, 48295, 2684, 48884, 49041, 2933, 2938, 2970, 2979, 2981, 3072, 3237, 3251, 3301, 3302, 51239, 51237, 51242, 3363, 3387, 52193, 3410, 3412, 3414, 3432, 3463, 3494, 3505, 3499, 3501, 3498, 3570, 3571, 3632, 3614, 3606, 3645, 3643, 3743, 3790, 3799, 3815, 3821, 3822, 3845, 55459, 3856, 3909, 3986, 4001, 4002, 4021, 4027, 4048, 4054, 56104, 56663, 56681, 56709, 4184, 4210, 4249, 4275, 4312, 4310, 4321, 57

In [10]:
# Find rows with subject_id and storetime values. These storetime values represent the time0 values where the troponin level has changed by more than or equal to 20%
relevant_indexes_with_time0_values = df.loc[np.array(indexes_with_troponin_change), ["subject_id","storetime"]]
relevant_indexes_with_time0_values

Unnamed: 0,subject_id,storetime
38,12588030,10/12/2129 21:19
71,12859888,1/3/2121 22:24
66,12859888,1/3/2121 6:27
78,13660560,22/9/2154 19:39
173,16711022,25/10/2204 5:29
...,...,...
244654,19979663,9/5/2159 16:45
244699,19982539,24/4/2175 17:15
244709,19982539,30/4/2175 3:36
244808,19985259,20/12/2129 0:20


## Finding Anti Platelet Treatments within 24 hours of >20% Troponin Change

In [11]:
modified_df = df.loc[np.array(indexes_with_troponin_change), ["subject_id","storetime", "aspirin", "p2y12", "other_antiplatelet", "starttime"]]
modified_df.head()

Unnamed: 0,subject_id,storetime,aspirin,p2y12,other_antiplatelet,starttime
38,12588030,10/12/2129 21:19,0,0,1,11/12/2129 20:00
71,12859888,1/3/2121 22:24,0,0,1,1/3/2121 6:00
66,12859888,1/3/2121 6:27,0,0,1,1/3/2121 6:00
78,13660560,22/9/2154 19:39,0,0,1,22/9/2154 18:00
173,16711022,25/10/2204 5:29,0,0,1,25/10/2204 1:00


In [17]:
# Convert storetime variable into datetime data type and calculate time difference between troponin measurements. Store time difference between troponin measurements in a new variable
pd.to_datetime(modified_df["starttime"].astype(str), format='%d/%m/%Y %H:%M')
modified_df['Time_to_treatment_after_troponin_change'] = modified_df["storetime"] - modified_df["starttime"]
#modified_df['Time_to_treatment_after_troponin_change'] = pd.Timedelta(modified_df["starttime"] - modified_df["storetime"]).seconds / 3600.0
modified_df.head()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
# If loop to determine if the absolute difference in troponin values is greater than 20% and if the time difference between troponin measurements is less than 12 hours
if np.abs((troponin_2nd_reading - troponin_1st_reading)/troponin_1st_reading) >= 0.2 and time_diff_between_troponin_measurements < 12:
                indexes_with_troponin_change.append(subject1.index[i+1])   # Include subject_id in list of subjects with troponin change greater than 20%

## Code for Single Patient Case (For Reference, can ignore)

In [12]:
subject = df_hemoglobin_removed[df_hemoglobin_removed["subject_id"] == 16243121]
subject1 = subject.sort_values(by=['storetime'])
subject1 = subject1.drop_duplicates(subset=['storetime'])
subject1.head()

Unnamed: 0,subject_id,hospital_expire_flag,deathtime,gender,anchor_age,dod,valuenum,valueuom,itemid,charttime,storetime,icu_intime,icu_outtime,aspirin,p2y12,other_antiplatelet,starttime,stoptime
23377,16243121,0,,M,57,,6.28,ng/mL,51003,7/12/2117 10:18,7/12/2117 11:44,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00
23379,16243121,0,,M,57,,8.35,ng/mL,51003,7/12/2117 18:00,7/12/2117 19:59,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00
23380,16243121,0,,M,57,,7.11,ng/mL,51003,7/12/2117 19:45,7/12/2117 20:30,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00
23381,16243121,0,,M,57,,6.71,ng/mL,51003,8/12/2117 5:40,8/12/2117 7:36,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00


In [13]:
# Convert storetime variable into datetime data type and calculate time difference between troponin measurements
subject1['Time_diff_between_Troponin_measurements'] = pd.to_datetime(subject1["storetime"].astype(str), format='%d/%m/%Y %H:%M').diff(1).dt.total_seconds().div(3600)

subject1.head()

Unnamed: 0,subject_id,hospital_expire_flag,deathtime,gender,anchor_age,dod,valuenum,valueuom,itemid,charttime,storetime,icu_intime,icu_outtime,aspirin,p2y12,other_antiplatelet,starttime,stoptime,Time_diff_between_Troponin_measurements
23377,16243121,0,,M,57,,6.28,ng/mL,51003,7/12/2117 10:18,7/12/2117 11:44,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00,
23379,16243121,0,,M,57,,8.35,ng/mL,51003,7/12/2117 18:00,7/12/2117 19:59,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00,8.25
23380,16243121,0,,M,57,,7.11,ng/mL,51003,7/12/2117 19:45,7/12/2117 20:30,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00,0.516667
23381,16243121,0,,M,57,,6.71,ng/mL,51003,8/12/2117 5:40,8/12/2117 7:36,7/12/2117 7:01,8/12/2117 15:03,0,1,0,8/12/2117 8:00,10/12/2117 19:00,11.1


In [14]:
if len(subject1.index) == 1:    # Exclude people with only 1 troponin data point
    continue
else:    # Iterate over the rows for each subject
    for i in range(len(subject1.index) - 1):
        troponin_1st_reading = subject1._get_value(subject1.index[i], 'valuenum')     # Get troponin value from initial row
        troponin_2nd_reading = subject1._get_value(subject1.index[i+1], 'valuenum')   # Get troponin value from subsequent row
        time_diff_between_troponin_measurements = subject1._get_value(subject1.index[i+1], 'Time_diff_between_Troponin_measurements')   # Get time difference between troponin measurements in initial and subsequent row
        #print(troponin_1st_reading, troponin_2nd_reading, time_diff_between_troponin_measurements)
        if np.abs((troponin_2nd_reading - troponin_1st_reading)/troponin_1st_reading) >= 0.2 and time_diff_between_troponin_measurements < 12:   # If loop to determine if the absolute difference in troponin values is greater than 20% and if the time difference between troponin measurements is less than 12 hours
            subject_id_list.append(subject1.index[i+1])
            break

SyntaxError: 'continue' not properly in loop (<ipython-input-14-81025fc6b4a7>, line 2)