# Deidentify STS cross-reference output

Generate a new cross-reference file with MRNs replaced by IDs

In [None]:
import os
import pandas as pd

In [None]:
# Load cross-ref output (MGH ECGs ∩ MGH STS)
df = pd.read_csv('/data/sts-data/mgh-preop-ecg-outcome-labels.csv')

In [None]:
# Load map from MGH MRN to new ID, and convert to dict
fpath = os.path.expanduser('~/mgh-ecg-deid-map.csv')
df_map = pd.read_csv(fpath)
mrn_map = df_map.set_index('mrn').to_dict()['new_id']

In [None]:
# Ensure every MRN in the cross-reference dataframe is present in the MRN map dict
num_mrn_in_xref_df_also_in_map = sum(df.partners_ecg_patientid_clean.isin(mrn_map))
print(f"Number of MRNs in xref dataframe also in MRN map: {num_mrn_in_xref_df_also_in_map}")
print(f"Number of MRNs in xref datframe: {df.shape[0]}")

In [None]:
# Replace value in partners_ecg_patientid_clean with new ID in map
df = df.replace({'partners_ecg_patientid_clean': mrn_map})

In [None]:
# Rename column to simpler name and cast to int
df = df.rename(columns={'partners_ecg_patientid_clean': 'mrn'})
df['mrn'] = df.mrn.astype(int)

In [None]:
df.drop(columns=['medrecn'], inplace=True)

In [None]:
# Save df with new file name
fpath = os.path.expanduser('~/mgh-preop-ecg-outcome-labels-deid.csv')
df.to_csv(fpath)
print(f"Saved {fpath}")