In [None]:
import pandas as pd
import seaborn as sns
# IPython imports
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
# Get the order of categories from MGH and use for all cohorts
df = pd.read_csv('phewas_mgh_with_mgh_v2022_11_30.csv')
df['AUC_diff'] = df['AUC_with_ECG'] - df['AUC_no_ECG']
ms = dict(df.groupby('category')['AUC_diff'].median())
ms = list(reversed(sorted(ms.items(), key = lambda x: x[1])))
cat_order = [m[0] for m in ms ]
print(f'len(df) {len(df)} SIG len(df) {len()}')

In [None]:
sig_df = df[df.p_value < 0.05/len(df)]
sig_df.phecode
print(f'len(df) {len(df)} SIG len(df) {len(df[df.phecode.isin(sig_df.phecode.values.tolist())])}')

In [None]:
df = pd.read_csv('./phewas_ukb_with_mgh_vectors_2022_11_30.csv')
boxplot = df.boxplot(column=['AUC_with_ECG', 'AUC_no_ECG'], by=['category'], rot=90, showfliers=False, figsize=(12,5))
df['AUC_diff'] = df['AUC_with_ECG'] - df['AUC_no_ECG']
df = df.sort_values('category', ascending=True)



In [None]:
df = pd.read_csv('./phewas_ukb_with_mgh_vectors_2022_11_30.csv')
boxplot = df.boxplot(column=['AUC_with_ECG', 'AUC_no_ECG'], by=['category'], rot=90, showfliers=False, figsize=(12,5))
df['AUC_diff'] = df['AUC_with_ECG'] - df['AUC_no_ECG']
df = df[df.phecode.isin(sig_df.phecode.values.tolist())]
df = df.sort_values('category', ascending=True)
boxplot[0].set_xlabel('UKB')
boxplot[1].set_xlabel('UKB')
boxplot = df.boxplot(column=['AUC_diff'], by=['category'], rot=90, showfliers=False)
boxplot.set_title('UKB', fontsize=10)
boxplot.set_xlabel('')
print(df.groupby('category')['AUC_diff'].quantile(0.5))
print(df.groupby('category')['AUC_diff'].quantile(0.25))
print(df.groupby('category')['AUC_diff'].quantile(0.75))

In [None]:
df.groupby('category')['AUC_with_ECG'].mean()

In [None]:
import numpy as np
df.groupby('category')['AUC_with_ECG'].quantile(0.75)

In [None]:
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_with_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$')
plt.title('AUC with ECG + Age + Sex + Race in UKB')
plt.ylim(0.4, 1.0 )
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_no_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{Base Model}$')
plt.title('AUC with Age + Sex + Race in UKB')
plt.ylim(0.4, 1.0 )

In [None]:
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_diff",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$ – $AUC_{Base Model}$')
plt.title('AUC Differences By Phecode Category in UKB')
plt.ylim(-0.01, 0.15 )

In [None]:
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_diff",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)

In [None]:
df = pd.read_csv('phewas_bwh_with_mgh_v2022_11_30.csv')
boxplot = df.boxplot(column=['AUC_with_ECG', 'AUC_no_ECG'], by=['category'], rot=90, showfliers=False, figsize=(12,5))
boxplot[0].set_xlabel('BWH')
boxplot[1].set_xlabel('BWH')
df['AUC_diff'] = df['AUC_with_ECG'] - df['AUC_no_ECG']
df = df[df.phecode.isin(sig_df.phecode.values.tolist())]
boxplot = df.boxplot(column=['AUC_diff'], by=['category'], rot=90, showfliers=False)
boxplot.set_title('BWH', fontsize=10)
boxplot.set_xlabel('')
print(df.groupby('category')['AUC_diff'].quantile(0.5))
print(df.groupby('category')['AUC_diff'].quantile(0.25))
print(df.groupby('category')['AUC_diff'].quantile(0.75))


In [None]:
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_with_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$')
plt.title('AUC with ECG + Age + Sex + Race in BWH')
plt.ylim(0.4, 1.0 )
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_no_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{Base Model}$')
plt.title('AUC with Age + Sex + Race in BWH')
plt.ylim(0.4, 1.0 )
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_diff",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$ – $AUC_{Base Model}$')
plt.title('AUC Differences By Phecode Category in BWH')
plt.ylim(-0.01, 0.2 )

In [None]:
df = pd.read_csv('phewas_mgh_with_mgh_v2022_11_30.csv')
boxplot = df.boxplot(column=['AUC_with_ECG', 'AUC_no_ECG'], by=['category'], rot=90, showfliers=False, figsize=(12,5))
boxplot[0].set_xlabel('MGH')
boxplot[1].set_xlabel('MGH')
df['AUC_diff'] = df['AUC_with_ECG'] - df['AUC_no_ECG']
df = df[df.phecode.isin(sig_df.phecode.values.tolist())]
boxplot = df.boxplot(column=['AUC_diff'], by=['category'], rot=90, showfliers=False)
boxplot.set_title('MGH', fontsize=10)
boxplot.set_xlabel('')
boxplot.set_ylabel('AUC Difference')
print(df.groupby('category')['AUC_diff'].quantile(0.5))
print(df.groupby('category')['AUC_diff'].quantile(0.25))
print(df.groupby('category')['AUC_diff'].quantile(0.75))

In [None]:
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_with_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$')
plt.title('AUC with ECG + Age + Sex + Race in MGH')
plt.ylim(0.4, 1.0 )
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_no_ECG",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{Base Model}$')
plt.title('AUC with Age + Sex + Race in MGH')
plt.ylim(0.4, 1.0 )
fig = plt.figure(figsize=(10,5), dpi=300)
sns.boxplot(x='category', y="AUC_diff",  data=df, fliersize=0, order=cat_order)
plt.xticks(rotation=90)
plt.ylabel(r'$AUC_{ECG Model}$ – $AUC_{Base Model}$')
plt.title('AUC Differences By Phecode Category in MGH')
plt.ylim(-0.01, 0.2 )

In [None]:
len(df)

In [None]:
ms = list(reversed(sorted(ms.items(), key = lambda x: x[1])))

In [None]:
ms

In [None]:
cat_order