In [60]:
# Import the modules
import numpy as np
import pandas as pd
import hvplot.pandas
import plotly.express as px
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline

---

In [61]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
#file_path = Path("CTG3.csv")
#file_path = Path("data_preprocessed.csv")
file_path = Path("data_normal_path.csv")
df = pd.read_csv(file_path)

# Review the DataFrame
df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP
0,120,0,0,0,0,0,0,73,0.5,43,2.4,1
1,132,4,0,4,2,0,0,17,2.1,0,10.4,0
2,133,2,0,5,2,0,0,16,2.1,0,13.4,0
3,134,2,0,6,2,0,0,16,2.4,0,23.0,0
4,132,4,0,5,0,0,0,16,2.4,0,19.9,0


In [62]:
# Select relevant data and apply StandardScaler
scaled_data = StandardScaler().fit_transform(df[["LB", "ASTV", "MSTV", "ALTV"]])
scaled_df = pd.DataFrame(scaled_data, columns=["LB", "ASTV", "MSTV", "ALTV"])
#scaled_df["NSP"] = df["NSP"]
X = scaled_df

In [63]:
# Define feature set
# Note most of the columns are not relevant to the required analysis and require removal in order to avoid
# confusing the model
#X = df[["LB", "AC", "FM", "UC", "DL", "DS", "DP", "ASTV", "MSTV", "ALTV", "MLTV", "Width", "Min", "Max", "Nmax", "Nzeros", "Mode", "Mean", "Median", "Variance", "Tendency"]]
#X = df[["LB", "AC", "FM", "UC", "DL", "DS", "DP", "ASTV", "MSTV", "ALTV", "MLTV"]]

X.head()

Unnamed: 0,LB,ASTV,MSTV,ALTV
0,-1.351659,1.512264,-0.942778,1.80181
1,-0.131892,-1.745392,0.868481,-0.535233
2,-0.030245,-1.803564,0.868481,-0.535233
3,0.071402,-1.803564,1.208092,-0.535233
4,-0.131892,-1.803564,1.208092,-0.535233


In [64]:
# Define target vector
y = df["NSP"].ravel()
y[:5]

array([1, 0, 0, 0, 0])

### Step 3: Split the data into training and testing datasets by using `train_test_split`.

In [65]:
# Split the data using train_test_split
# Assign a random_state of 1 to the function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
X_train.shape

(1593, 4)

In [66]:
# check data has been split correctly
print(X_train.shape)
print(X_test.shape)
print(X.shape)

(1593, 4)
(531, 4)
(2124, 4)


---

## Create a Logistic Regression Model with the Original Data

###  Step 1: Fit a logistic regression model by using the training data (`X_train` and `y_train`).

In [67]:
# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
logistic_regression_model = LogisticRegression(solver='lbfgs',
                                max_iter=500,
                                random_state=1)

# Fit the model using training data
lr_model = logistic_regression_model.fit(X_train, y_train)



In [68]:
# generate training predictions
training_predictions = lr_model.predict(X_train)

# generate testing predictions
testing_predictions = logistic_regression_model.predict(X_test)

### Step 2: Save the predictions on the testing data labels by using the testing feature data (`X_test`) and the fitted model.

In [69]:
# Confusion matrix for training data
training_matrix = confusion_matrix(y_train, training_predictions)

print(training_matrix)

[[1190   51]
 [ 172  180]]


In [70]:
# Print training classification report

#target_names = ["State 0: Normal", "State 1: Suspicious", "State 2: Pathologic"]
target_names = ["State 0: Normal", "State 1: Pathologic"]
training_report = classification_report(y_train, training_predictions, target_names=target_names)
print(training_report)

                     precision    recall  f1-score   support

    State 0: Normal       0.87      0.96      0.91      1241
State 1: Pathologic       0.78      0.51      0.62       352

           accuracy                           0.86      1593
          macro avg       0.83      0.74      0.77      1593
       weighted avg       0.85      0.86      0.85      1593



In [71]:
# Confusion matrix for training data
testing_matrix = confusion_matrix(y_test, testing_predictions)

print(testing_matrix)

[[385  28]
 [ 59  59]]


In [72]:
# Print testing classification report

target_names = ["State 0: Normal", "State 1: Pathologic"]
testing_report = classification_report(y_test, testing_predictions, target_names=target_names)
print(testing_report)

                     precision    recall  f1-score   support

    State 0: Normal       0.87      0.93      0.90       413
State 1: Pathologic       0.68      0.50      0.58       118

           accuracy                           0.84       531
          macro avg       0.77      0.72      0.74       531
       weighted avg       0.83      0.84      0.83       531



In [73]:
df.hvplot.scatter(
    x="LB",
    y="ASTV",
    by="NSP",
    xlabel="Baseline heart rate",
    ylabel="Abnormal Short Term Variability (%Time)"
)

In [74]:
fig = px.scatter_3d(df, x='LB', y='ASTV', z='ALTV', color='NSP')
fig.show()

In [75]:
# Get real world data


#file_path = Path("data_real_world.csv")
file_path = Path("data_real_world_np.csv")
df = pd.read_csv(file_path)

# Review the DataFrame
df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP
0,148,0,0,1,0,0,0,38,0.9,17,12.2,1
1,148,2,0,8,0,0,0,42,0.7,12,7.1,0
2,148,2,0,3,0,0,0,37,0.8,0,7.1,0
3,148,0,0,10,0,0,0,44,0.6,16,7.3,1
4,148,0,0,9,0,0,0,44,0.6,19,6.9,1


In [76]:
# Select relevant data and apply StandardScaler
scaled_data = StandardScaler().fit_transform(df[["LB", "ASTV", "MSTV", "ALTV"]])
scaled_df = pd.DataFrame(scaled_data, columns=["LB", "ASTV", "MSTV", "ALTV"])
#scaled_df["NSP"] = df["NSP"]
X = scaled_df
X

Unnamed: 0,LB,ASTV,MSTV,ALTV
0,1.244956,0.514107,-0.780869,2.439021
1,1.244956,1.207286,-1.405564,1.572071
2,1.244956,0.340813,-1.093216,-0.508611
3,1.244956,1.553875,-1.717911,2.265631
4,1.244956,1.553875,-1.717911,2.785801
5,1.370287,0.340813,-0.156174,1.22529
6,1.370287,0.167518,0.780869,1.22529
7,1.370287,-0.179071,0.468521,-0.335221
8,1.370287,-1.045544,1.717911,-0.508611
9,1.370287,-0.87225,1.093216,-0.508611


In [77]:
real_world_predictions = logistic_regression_model.predict(X)
real_world_predictions
predictions_df = pd.DataFrame(real_world_predictions, columns=["predictions"])


In [78]:

# build the result dataframe
result_df = scaled_df
result_df["NSP"] = df["NSP"]
result_df["predictions"] = predictions_df["predictions"]
result_df


Unnamed: 0,LB,ASTV,MSTV,ALTV,NSP,predictions
0,1.244956,0.514107,-0.780869,2.439021,1,1
1,1.244956,1.207286,-1.405564,1.572071,0,1
2,1.244956,0.340813,-1.093216,-0.508611,0,0
3,1.244956,1.553875,-1.717911,2.265631,1,1
4,1.244956,1.553875,-1.717911,2.785801,1,1
5,1.370287,0.340813,-0.156174,1.22529,1,1
6,1.370287,0.167518,0.780869,1.22529,1,1
7,1.370287,-0.179071,0.468521,-0.335221,0,0
8,1.370287,-1.045544,1.717911,-0.508611,0,0
9,1.370287,-0.87225,1.093216,-0.508611,0,0


In [None]:
# Plot the data points based on the predictions
result_df.hvplot.scatter(
    x="ASTV", 
    y="MSTV", 
    by="NSP",
    #xlabel="Abnormal Short Term Variability (%time)",
    #ylabel="Average Short Term Variability"
)

In [None]:
# Plot the data points based on the predictions
result_df.hvplot.scatter(
    x="ASTV", 
    y="MSTV", 
    by="predictions",
    #xlabel="Abnormal Short Term Variability (%time)",
    #ylabel="Average Short Term Variability"
)