In [None]:
import os
import csv 
import pandas as pd 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

with open(os.path.expanduser("~/dropbox/sts_ecg/results/explore_on_ecg_metadata/tensors_all_intersect.csv")) as f:
    data = pd.read_csv(f)
f.close()

In [None]:
print(data.columns)
feature_cols = ['ecg_rate_md_newest_sts', 'ecg_pr_md_newest_sts',
       'ecg_qrs_md_newest_sts', 'ecg_qt_md_newest_sts',
       'ecg_qtc_md_newest_sts', 'ecg_paxis_md_newest_sts',
       'ecg_raxis_md_newest_sts', 'ecg_taxis_md_newest_sts',
       'ecg_qonset_md_newest_sts', 'ecg_qoffset_md_newest_sts',
       'ecg_ponset_md_newest_sts', 'ecg_poffset_md_newest_sts',
       'ecg_toffset_md_newest_sts', 'ecg_sex_newest_sts female',
       'ecg_sex_newest_sts male', 'ecg_age_newest_sts']

X = data[feature_cols]
y = data['sts_death sts_death']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
lr = LogisticRegression(max_iter=9000, penalty='none')
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

In [None]:
# calculate and plot AUC
sns.set_style("whitegrid")
sns.set_context("poster")
fig, ax = plt.subplots(figsize=(7,6))

y_pred_proba = lr.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label= f"AUC={auc:.3f}")
plt.legend()
plt.legend(frameon=False)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.savefig(os.path.expanduser("~/dropbox/sts_ecg/results/logistic-regression-metadata/roc_curve.png"))
plt.show()

In [None]:
print(lr.coef_)

# plot model coefficients
coeffs = pd.DataFrame({'feature_name': feature_cols, 'model_coefficient': lr.coef_.transpose().flatten()})

coeffs['feature_name'] = coeffs['feature_name'].str.replace('ecg_', '')
coeffs['feature_name'] = coeffs['feature_name'].str.replace('_md_newest_sts', '')
coeffs['feature_name'] = coeffs['feature_name'].str.replace('_newest_sts', '')
coeffs = coeffs.sort_values('model_coefficient',ascending=False).round(3)

plt.figure().set_size_inches(10, 7)
fig = sns.barplot(x='feature_name', y='model_coefficient', data=coeffs, palette="Blues_d")
fig.set_xticklabels(rotation=90, labels=coeffs.feature_name)
plt.xlabel('Feature')
plt.ylabel('Coefficient')
plt.tight_layout()
plt.savefig(os.path.expanduser("~/dropbox/sts_ecg/results/logistic-regression-metadata/coefficients.png"))

# save model coefficients to CSV
coeffs.to_csv(os.path.expanduser("~/dropbox/sts_ecg/results/logistic-regression-metadata/coefficients.csv"), index=False)