In [None]:
import os
import numpy as np 
import pandas as pd
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

%matplotlib inline
pd.options.display.max_rows = 4000
plt.rcParams["font.weight"] = "bold"


In [13]:
# Load input data
df = pd.read_csv(r'./data/Combined_data.csv')


print(df)

    SOC cTnI  Tropsensor  Elevated_troponin
0       2.50    0.431106                  0
1       2.50    0.583282                  0
2     210.00    0.530215                  1
3      69.00    0.440601                  1
4      75.00    0.625246                  1
5      60.00    0.746548                  1
6     289.00    0.458436                  1
7      20.00    0.673166                  0
8     279.00    0.839362                  1
9      70.00    0.461351                  1
10    110.00    0.773147                  1
11    120.00    0.723993                  1
12   1837.00    0.897539                  1
13    235.00    0.874225                  1
14     26.00    0.505942                  0
15    286.00    0.991186                  1
16     58.00    0.985097                  1
17      5.00    0.659110                  0
18      2.50    0.627531                  0
19      9.70    0.388004                  0
20  20000.00    0.856864                  1
21  20000.00    0.966910        

In [None]:
# preprocessing data to binary variable
final_Y = [int(val[0] > 19) for val in Y_val.values]

In [None]:
# create another variable called 'Elevated Troponin'
df['elevated_troponin'] = final_Y


X = df[['Optical Data']]
y = final_Y
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.2, random_state=2)

ns_probs = [0 for _ in range(len(trainy))]

In [None]:
# fit a model
model = LogisticRegression(solver='lbfgs', max_iter=300)
model.fit(trainX, trainy)

In [None]:
# predict probabilities
lr_probs = model.predict_proba(trainX)

# keep probabilities for the positive outcome only
lr_probs = lr_probs[:, 1]

# calculate scores
ns_auc = roc_auc_score(testy, ns_probs)
lr_auc = roc_auc_score(testy, lr_probs)

# summarize scores
# print('Random: ROC AUC=%.3f' % (ns_auc))
print('Logistic Regression: ROC AUC=%.3f' % (lr_auc))
print('X:optical sensor data ---> Y:elevated CTNI')

# calculate roc curves
ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs)
lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs)


In [None]:
# plot the roc curve for the model
pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='')
pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')

# axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')

# show the legend
pyplot.legend()
pyplot.grid()

# show the plot
pyplot.show()