In [29]:
# Import our dependencies
from statistics import mean
from sklearn.model_selection import train_test_split, cross_validate, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
import tensorflow as tf
from imblearn.pipeline import Pipeline

In [30]:
# Import our input dataset
df = pd.read_csv('../eda/reduced_filtered_df.csv')
df.head()

Unnamed: 0,ACD,LT,VCD,SPORTHR,DADMY,delta_spheq,total_positive_screen,MYOPIC
0,3.702,3.392,15.29,4,1,1.358,8,0
1,3.462,3.514,15.52,14,0,1.929,10,0
2,3.224,3.556,15.36,10,1,2.494,26,0
3,3.186,3.654,15.49,12,1,1.433,16,0
4,3.732,3.584,15.08,12,0,2.022,8,0


In [38]:
# Define X,y
label = df["MYOPIC"].copy()
X = df.iloc[:,:-1].copy()
X.head()

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, label, 
    random_state=42, 
    stratify=label, 
    test_size=200
)

In [39]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [40]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=7))
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7/7 - 0s - loss: 0.3129 - accuracy: 0.8900
Loss: 0.31285056471824646, Accuracy: 0.8899999856948853


# Predict results and evaluate

In [41]:
y_pred = nn_model.predict(X_test_scaled)

In [42]:
# convert to binary output
y_pred_binary = [0 if i <=0.5 else 1 for i in y_pred]


In [43]:
confusion_matrix(y_test, y_pred_binary)

array([[173,   1],
       [ 21,   5]], dtype=int64)

In [44]:
print(classification_report(y_test, y_pred_binary))

              precision    recall  f1-score   support

           0       0.89      0.99      0.94       174
           1       0.83      0.19      0.31        26

    accuracy                           0.89       200
   macro avg       0.86      0.59      0.63       200
weighted avg       0.88      0.89      0.86       200



# Changing the Threshold Criteria

In [45]:
# convert to binary output
y_pred_binary = [0 if i <=0.3 else 1 for i in y_pred]

In [46]:
confusion_matrix(y_test, y_pred_binary)

array([[164,  10],
       [ 17,   9]], dtype=int64)

In [47]:
print(classification_report(y_test, y_pred_binary))

              precision    recall  f1-score   support

           0       0.91      0.94      0.92       174
           1       0.47      0.35      0.40        26

    accuracy                           0.86       200
   macro avg       0.69      0.64      0.66       200
weighted avg       0.85      0.86      0.86       200



## Pipeline for Neural Network

In [48]:
# create process steps
pipes = [
    ("scaler", StandardScaler()),
    ("nn", nn_model)
]

# create pipeline
pipeline = Pipeline(pipes)

# Train the scaler with the X_train data.
pipeline.fit(X_train, y_train)

# # evaluate model
# print(f"Score:  {pipeline.score(X_train, y_train):>10}")

y_train_pred = pipeline.predict(X_train)
y_pred_binary = [0 if i <=0.3 else 1 for i in y_train_pred]
print(f" Training Classification Report:  \n {classification_report(y_train, y_pred_binary)}")
print(f"-----"*10)
y_test_pred = pipeline.predict(X_test)
y_pred_binary = [0 if i <=0.3 else 1 for i in y_test_pred]
print(f" Testing Classification Report:  \n {classification_report(y_test, y_pred_binary)}")

 Training Classification Report:  
               precision    recall  f1-score   support

           0       0.92      0.96      0.94       149
           1       0.65      0.48      0.55        23

    accuracy                           0.90       172
   macro avg       0.78      0.72      0.75       172
weighted avg       0.89      0.90      0.89       172

--------------------------------------------------
 Testing Classification Report:  
               precision    recall  f1-score   support

           0       0.91      0.94      0.92       174
           1       0.47      0.35      0.40        26

    accuracy                           0.86       200
   macro avg       0.69      0.64      0.66       200
weighted avg       0.85      0.86      0.86       200

