In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_predict, StratifiedKFold, TimeSeriesSplit
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
import time

import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense

print(tf.__version__)


2.15.0


Dataset: https://www.kaggle.com/datasets/kukuroo3/body-signal-of-smoking/code

In [2]:
def recall(tp, fn):
    """Calculate Recall."""
    return tp / (tp + fn)

def precision(tp, fp):
    """Calculate Precision."""
    return tp / (tp + fp)

def f1_score(tp, fp, fn):
    """Calculate F1 Score."""
    return (2 * tp) / (2 * tp + fp + fn)

def accuracy(tp, fp, tn, fn):
    """Calculate Accuracy."""
    return (tp + tn) / (tp + fp + tn + fn)

def tpr(tp, fn):
    """Calculate True Positive Rate (Sensitivity, Recall)."""
    return tp / (tp + fn)

def tnr(tn, fp):
    """Calculate True Negative Rate (Specificity)."""
    return tn / (tn + fp)

def fpr(fp, tn):
    """Calculate False Positive Rate."""
    return fp / (fp + tn)

def fnr(fn, tp):
    """Calculate False Negative Rate."""
    return fn / (fn + tp)

def tss(tp, fn, fp, tn):
    """Calculate True Skill Statistic (TSS)."""
    return (tp / (tp + fn)) - (fp / (fp + tn))

def hss(tp, fn, fp, tn):
    """Calculate Heidke Skill Score (HSS)."""
    numerator = 2 * ((tp * tn) - (fp * fn))
    denominator = (tp + fn) * (fn + tn) + (tp + fp) * (fp + tn)
    return numerator / denominator

In [3]:
X_train = pd.read_csv('competition_format/x_train.csv')
X_test = pd.read_csv('competition_format/x_test.csv')
y_train = pd.read_csv('competition_format/y_train.csv')
y_test = pd.read_csv('competition_format/y_test.csv')

X_train = X_train.drop('ID', axis=1)
X_test = X_test.drop('ID', axis=1)
y_train = y_train.drop('ID', axis=1)
y_test = y_test.drop('ID', axis=1)

categorical_columns = ['gender', 'oral', 'tartar']
X_train = pd.get_dummies(X_train, columns=categorical_columns, prefix=categorical_columns)
X_test = pd.get_dummies(X_test, columns=categorical_columns, prefix=categorical_columns)

y_train_flat = y_train.values.ravel()

<h1>Random Forest</h1>

In [4]:
# Create Random Forest model
random_forest_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Define a 10-fold stratified cross-validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Perform cross-validated predictions
rand_forest_cv_start = time.time()
y_pred = cross_val_predict(random_forest_model, X_train, y_train_flat, cv=cv)
rand_forest_cv_end = time.time()

print(f"Random Forest Cross Validation Execution Time: {(rand_forest_cv_end - rand_forest_cv_start):.1f} seconds")

mean_tp = 0
mean_tn = 0
mean_fp = 0
mean_fn = 0

mean_tpr = 0
mean_tnr = 0
mean_fpr = 0
mean_fnr = 0

mean_recall = 0
mean_precision = 0
mean_f1 = 0
mean_accuracy = 0
mean_error_rate = 0

mean_tss = 0
mean_hss = 0

# Calculate and print metrics for each fold
for fold_num in range(1, 11):
    fold_indices = (y_train.index % 10 == fold_num - 1)
    y_fold_true = y_train[fold_indices]
    y_fold_pred = y_pred[fold_indices]

    confusion = confusion_matrix(y_fold_true, y_fold_pred)

    tp = confusion[1][1]
    mean_tp += tp
    tn = confusion[0][0]
    mean_tn += tn
    fp = confusion[0][1]
    mean_fp += fp
    fn = confusion[1][0]
    mean_fn += fn

    fold_recall = recall(tp, fn)
    mean_recall += fold_recall
    
    fold_precision = precision(tp, fp)
    mean_precision += fold_precision
    
    fold_f1 = f1_score(tp, fp, fn)
    mean_f1 += fold_f1
    
    fold_accuracy = accuracy(tp, fp, tn, fn)
    mean_accuracy += fold_accuracy
    
    fold_error_rate = 1 - fold_accuracy
    mean_error_rate += fold_error_rate

    fold_tpr = tpr(tp, fn)
    mean_tpr += fold_tpr

    fold_tnr = tnr(tn, fp)
    mean_tnr += fold_tnr
    
    fold_fpr = fpr(fp, tn)
    mean_fpr += fold_fpr
    
    fold_fnr = fnr(fn, tp)
    mean_fnr += fold_fnr

    fold_tss = tss(tp, fn, fp, tn)
    mean_tss += fold_tss

    fold_hss = hss(tp, fn, fp, tn)
    mean_hss += fold_hss    

    print(f"Fold {fold_num} Metrics:")
    print("Recall:", fold_recall)
    print("Precision:", fold_precision)
    print("F1 Score:", fold_f1)
    print("Accuracy:", fold_accuracy)
    print("Error Rate:", fold_error_rate)   
    
    print("Confusion Matrix:")
    print(confusion)
    print("True Positive:", tp)
    print("True Positive Rate:", fold_tpr)
    print("True Negative:", tn)
    print("True Negative Rate:", fold_tnr)
    print("False Positive:", fp)
    print("False Positive Rate:", fold_fpr)
    print("False Negative:", fn)
    print("False Negative Rate:", fold_fnr)
    print("True Skill Statistic:", fold_tss)
    print("Heidke Skill Score:", fold_hss)
    print("\n")
    
# Average metric values
mean_tp = mean_tp / 10
mean_tn = mean_tn / 10 
mean_fp = mean_fp / 10 
mean_fn = mean_fn / 10 

mean_tpr = mean_tpr / 10 
mean_tnr = mean_tnr / 10
mean_fpr = mean_fpr / 10 
mean_fnr = mean_fnr / 10

mean_recall = mean_recall / 10 
mean_precision = mean_precision / 10 
mean_f1 = mean_f1 / 10 
mean_accuracy = mean_accuracy / 10 
mean_error_rate = mean_error_rate / 10 

mean_tss = mean_tss / 10 
mean_hss = mean_hss / 10

print(f"Average Fold Metrics:")
print("Recall:", mean_recall)
print("Precision:", mean_precision)
print("F1 Score:", mean_f1)
print("Accuracy:", mean_accuracy)
print("Error Rate:", mean_error_rate)   

print("True Positive:", mean_tp)
print("True Positive Rate:", mean_tpr)
print("True Negative:", mean_tn)
print("True Negative Rate:", mean_tnr)
print("False Positive:", mean_fp)
print("False Positive Rate:", mean_fpr)
print("False Negative:", mean_fn)
print("False Negative Rate:", mean_fnr)
print("True Skill Statistic:", mean_tss)
print("Heidke Skill Score:", mean_hss)
print("\n")

# Train the model on the entire training data for later evaluation on the test set
rand_forest_train_start = time.time()
random_forest_model.fit(X_train, y_train)
rand_forest_train_end = time.time()

print(f"Random Forest Training Execution Time: {(rand_forest_train_end - rand_forest_train_start):.1f} seconds")

# Get predictions on the test set
y_test_pred = random_forest_model.predict(X_test)

# # Evaluate the model on the test set
confusion = confusion_matrix(y_test, y_test_pred)

test_tp = confusion[1][1]
test_tn = confusion[0][0]
test_fp = confusion[0][1]
test_fn = confusion[1][0]

test_recall = recall(test_tp, test_fn)
test_precision = precision(test_tp, test_fp)
test_f1 = f1_score(test_tp, test_fp, test_fn)
test_accuracy = accuracy(test_tp, test_fp, test_tn, test_fn)
test_error_rate = 1 - test_accuracy

test_tpr = tpr(test_tp, test_fn)
test_tnr = tnr(test_tn, test_fp)
test_fpr = fpr(test_fp, test_tn)
test_fnr = fnr(test_fn, test_tp)

test_tss = tss(test_tp, test_fn, test_fp, test_tn)
test_hss = hss(test_tp, test_fn, test_fp, test_tn)

print(f"Test Metrics:")
print("Recall:", test_recall)
print("Precision:", test_precision)
print("F1 Score:", test_f1)
print("Accuracy:", test_accuracy)
print("Error Rate:", test_error_rate)   

print("Confusion Matrix:")
print(confusion)
print("True Positive:", test_tp)
print("True Positive Rate:", test_tpr)
print("True Negative:", test_tn)
print("True Negative Rate:", test_tnr)
print("False Positive:", test_fp)
print("False Positive Rate:", test_fpr)
print("False Negative:", test_fn)
print("False Negative Rate:", test_fnr)
print("True Skill Statistic:", test_tss)
print("Heidke Skill Score:", test_hss)
print("\n")

Random Forest Cross Validation Execution Time: 78.2 seconds
Fold 1 Metrics:
Recall: 0.7330508474576272
Precision: 0.6538876889848813
F1 Score: 0.6912100456621004
Accuracy: 0.757181328545781
Error Rate: 0.24281867145421898
Confusion Matrix:
[[2163  641]
 [ 441 1211]]
True Positive: 1211
True Positive Rate: 0.7330508474576272
True Negative: 2163
True Negative Rate: 0.771398002853067
False Positive: 641
False Positive Rate: 0.22860199714693294
False Negative: 441
False Negative Rate: 0.2669491525423729
True Skill Statistic: 0.5044488503106942
Heidke Skill Score: 0.49220795853231913


Fold 2 Metrics:
Recall: 0.7532307692307693
Precision: 0.6465927099841522
F1 Score: 0.6958499147242752
Accuracy: 0.7598743267504489
Error Rate: 0.24012567324955114
Confusion Matrix:
[[2162  669]
 [ 401 1224]]
True Positive: 1224
True Positive Rate: 0.7532307692307693
True Negative: 2162
True Negative Rate: 0.7636877428470505
False Positive: 669
False Positive Rate: 0.23631225715294948
False Negative: 401
False

  return fit_method(estimator, *args, **kwargs)


Random Forest Training Execution Time: 8.1 seconds
Test Metrics:
Recall: 0.8018523031927858
Precision: 0.706766917293233
F1 Score: 0.7513130851792647
Accuracy: 0.8044707783463506
Error Rate: 0.1955292216536494
Confusion Matrix:
[[5671 1365]
 [ 813 3290]]
True Positive: 3290
True Positive Rate: 0.8018523031927858
True Negative: 5671
True Negative Rate: 0.8059977259806709
False Positive: 1365
False Positive Rate: 0.19400227401932915
False Negative: 813
False Negative Rate: 0.19814769680721422
True Skill Statistic: 0.6078500291734567
Heidke Skill Score: 0.5912702097727168




<h1>Decision Trees</h1>

In [5]:
# Instantiate the Decision Tree model
decision_tree_model = DecisionTreeClassifier(max_depth=10, random_state=42)

# Define a 10-fold stratified cross-validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Perform cross-validated predictions
dec_tree_cv_start = time.time()
y_pred_decision_tree = cross_val_predict(decision_tree_model, X_train, y_train_flat, cv=cv)
dec_tree_cv_end = time.time()

print(f"Decision Trees Cross Validation Execution Time: {(dec_tree_cv_end - dec_tree_cv_start):.1f} seconds")

mean_tp = 0
mean_tn = 0
mean_fp = 0
mean_fn = 0

mean_tpr = 0
mean_tnr = 0
mean_fpr = 0
mean_fnr = 0

mean_recall = 0
mean_precision = 0
mean_f1 = 0
mean_accuracy = 0
mean_error_rate = 0

mean_tss = 0
mean_hss = 0

# Calculate and print metrics for each fold
for fold_num in range(1, 11):
    fold_indices = (y_train.index % 10 == fold_num - 1)
    y_fold_true = y_train[fold_indices]
    y_fold_pred = y_pred[fold_indices]

    confusion = confusion_matrix(y_fold_true, y_fold_pred)

    tp = confusion[1][1]
    mean_tp += tp
    tn = confusion[0][0]
    mean_tn += tn
    fp = confusion[0][1]
    mean_fp += fp
    fn = confusion[1][0]
    mean_fn += fn

    fold_recall = recall(tp, fn)
    mean_recall += fold_recall
    
    fold_precision = precision(tp, fp)
    mean_precision += fold_precision
    
    fold_f1 = f1_score(tp, fp, fn)
    mean_f1 += fold_f1
    
    fold_accuracy = accuracy(tp, fp, tn, fn)
    mean_accuracy += fold_accuracy
    
    fold_error_rate = 1 - fold_accuracy
    mean_error_rate += fold_error_rate

    fold_tpr = tpr(tp, fn)
    mean_tpr += fold_tpr

    fold_tnr = tnr(tn, fp)
    mean_tnr += fold_tnr
    
    fold_fpr = fpr(fp, tn)
    mean_fpr += fold_fpr
    
    fold_fnr = fnr(fn, tp)
    mean_fnr += fold_fnr

    fold_tss = tss(tp, fn, fp, tn)
    mean_tss += fold_tss

    fold_hss = hss(tp, fn, fp, tn)
    mean_hss += fold_hss    

    print(f"Fold {fold_num} Metrics:")
    print("Recall:", fold_recall)
    print("Precision:", fold_precision)
    print("F1 Score:", fold_f1)
    print("Accuracy:", fold_accuracy)
    print("Error Rate:", fold_error_rate)   
    
    print("Confusion Matrix:")
    print(confusion)
    print("True Positive:", tp)
    print("True Positive Rate:", fold_tpr)
    print("True Negative:", tn)
    print("True Negative Rate:", fold_tnr)
    print("False Positive:", fp)
    print("False Positive Rate:", fold_fpr)
    print("False Negative:", fn)
    print("False Negative Rate:", fold_fnr)
    print("True Skill Statistic:", fold_tss)
    print("Heidke Skill Score:", fold_hss)
    print("\n")
    
# Average metric values
mean_tp = mean_tp / 10
mean_tn = mean_tn / 10 
mean_fp = mean_fp / 10 
mean_fn = mean_fn / 10 

mean_tpr = mean_tpr / 10 
mean_tnr = mean_tnr / 10
mean_fpr = mean_fpr / 10 
mean_fnr = mean_fnr / 10

mean_recall = mean_recall / 10 
mean_precision = mean_precision / 10 
mean_f1 = mean_f1 / 10 
mean_accuracy = mean_accuracy / 10 
mean_error_rate = mean_error_rate / 10 

mean_tss = mean_tss / 10 
mean_hss = mean_hss / 10

print(f"Average Fold Metrics:")
print("Recall:", mean_recall)
print("Precision:", mean_precision)
print("F1 Score:", mean_f1)
print("Accuracy:", mean_accuracy)
print("Error Rate:", mean_error_rate)   

print("True Positive:", mean_tp)
print("True Positive Rate:", mean_tpr)
print("True Negative:", mean_tn)
print("True Negative Rate:", mean_tnr)
print("False Positive:", mean_fp)
print("False Positive Rate:", mean_fpr)
print("False Negative:", mean_fn)
print("False Negative Rate:", mean_fnr)
print("True Skill Statistic:", mean_tss)
print("Heidke Skill Score:", mean_hss)
print("\n")

# Train the model on the entire training data for later evaluation on the test set
dec_tree_train_start = time.time()
decision_tree_model.fit(X_train, y_train)
dec_tree_train_end = time.time()

print(f"Decision Tree Training Execution Time: {(dec_tree_train_end - dec_tree_train_start):.1f} seconds")

# Get predictions on the test set
y_test_pred = decision_tree_model.predict(X_test)

# # Evaluate the model on the test set
confusion = confusion_matrix(y_test, y_test_pred)

test_tp = confusion[1][1]
test_tn = confusion[0][0]
test_fp = confusion[0][1]
test_fn = confusion[1][0]

test_recall = recall(test_tp, test_fn)
test_precision = precision(test_tp, test_fp)
test_f1 = f1_score(test_tp, test_fp, test_fn)
test_accuracy = accuracy(test_tp, test_fp, test_tn, test_fn)
test_error_rate = 1 - test_accuracy

test_tpr = tpr(test_tp, test_fn)
test_tnr = tnr(test_tn, test_fp)
test_fpr = fpr(test_fp, test_tn)
test_fnr = fnr(test_fn, test_tp)

test_tss = tss(test_tp, test_fn, test_fp, test_tn)
test_hss = hss(test_tp, test_fn, test_fp, test_tn)

print(f"Test Metrics:")
print("Recall:", test_recall)
print("Precision:", test_precision)
print("F1 Score:", test_f1)
print("Accuracy:", test_accuracy)
print("Error Rate:", test_error_rate)   

print("Confusion Matrix:")
print(confusion)
print("True Positive:", test_tp)
print("True Positive Rate:", test_tpr)
print("True Negative:", test_tn)
print("True Negative Rate:", test_tnr)
print("False Positive:", test_fp)
print("False Positive Rate:", test_fpr)
print("False Negative:", test_fn)
print("False Negative Rate:", test_fnr)
print("True Skill Statistic:", test_tss)
print("Heidke Skill Score:", test_hss)
print("\n")

Decision Trees Cross Validation Execution Time: 5.8 seconds
Fold 1 Metrics:
Recall: 0.7330508474576272
Precision: 0.6538876889848813
F1 Score: 0.6912100456621004
Accuracy: 0.757181328545781
Error Rate: 0.24281867145421898
Confusion Matrix:
[[2163  641]
 [ 441 1211]]
True Positive: 1211
True Positive Rate: 0.7330508474576272
True Negative: 2163
True Negative Rate: 0.771398002853067
False Positive: 641
False Positive Rate: 0.22860199714693294
False Negative: 441
False Negative Rate: 0.2669491525423729
True Skill Statistic: 0.5044488503106942
Heidke Skill Score: 0.49220795853231913


Fold 2 Metrics:
Recall: 0.7532307692307693
Precision: 0.6465927099841522
F1 Score: 0.6958499147242752
Accuracy: 0.7598743267504489
Error Rate: 0.24012567324955114
Confusion Matrix:
[[2162  669]
 [ 401 1224]]
True Positive: 1224
True Positive Rate: 0.7532307692307693
True Negative: 2162
True Negative Rate: 0.7636877428470505
False Positive: 669
False Positive Rate: 0.23631225715294948
False Negative: 401
False

<h1>LSTM</h1>

In [10]:
df = pd.DataFrame(X_train)
df_y = pd.DataFrame(y_train)
df_X_test = pd.DataFrame(X_test)
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)

# Reshape
X_train_reshaped = df.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = df_X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))
y_train_reshaped = df_y.values.reshape((y_train.shape[0],))

print("Shape of X_train_reshaped:", X_train_reshaped.shape)
print("Shape of y_train_reshaped:", y_train_reshaped.shape)

Shape of X_train: (44553, 27)
Shape of y_train: (44553, 1)
Shape of X_train_reshaped: (44553, 27, 1)
Shape of y_train_reshaped: (44553,)


<h2>Cross Validation for LSTM</h2>

In [11]:
# Create a TimeSeriesSplit with 10 splits
tscv = TimeSeriesSplit(n_splits=10)

mean_tp = 0
mean_tn = 0
mean_fp = 0
mean_fn = 0

mean_tpr = 0
mean_tnr = 0
mean_fpr = 0
mean_fnr = 0

mean_recall = 0
mean_precision = 0
mean_f1 = 0
mean_accuracy = 0
mean_error_rate = 0

mean_tss = 0
mean_hss = 0

lstm_cv_start = time.time()

# Iterate over the splits
for fold_index, (train_index, test_index) in enumerate(tscv.split(X_train_reshaped)):

    X_train_fold, X_test_fold = X_train_reshaped[train_index], X_train_reshaped[test_index]
    y_train_fold, y_test_fold = y_train_reshaped[train_index], y_train_reshaped[test_index]

    # Reshape data
    X_train_fold_reshaped = X_train_fold.reshape((X_train_fold.shape[0], X_train_fold.shape[1], 1))
    X_test_fold_reshaped = X_test_fold.reshape((X_test_fold.shape[0], X_test_fold.shape[1], 1))
    y_train_fold_reshaped = y_train_fold.reshape((y_train_fold.shape[0],))

    # Define LSTM model
    model = Sequential()
    model.add(LSTM(50, input_shape=(X_train_fold_reshaped.shape[1], X_train_fold_reshaped.shape[2])))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # Train Fold
    model.fit(X_train_fold_reshaped, y_train_fold_reshaped, epochs=10, batch_size=32, verbose=0)

    # Fold Predictions
    y_pred_fold = (model.predict(X_test_fold_reshaped) > 0.5).astype(int)
        
    # Metrics
    confusion = confusion_matrix(y_test_fold, y_pred_fold)

    tp = confusion[1][1]
    mean_tp += tp
    tn = confusion[0][0]
    mean_tn += tn
    fp = confusion[0][1]
    mean_fp += fp
    fn = confusion[1][0]
    mean_fn += fn

    fold_recall = recall(tp, fn)
    mean_recall += fold_recall
    
    fold_precision = precision(tp, fp)
    mean_precision += fold_precision
    
    fold_f1 = f1_score(tp, fp, fn)
    mean_f1 += fold_f1
    
    fold_accuracy_lstm = accuracy(tp, fp, tn, fn)
    mean_accuracy += fold_accuracy_lstm
    
    fold_error_rate = 1 - fold_accuracy_lstm
    mean_error_rate += fold_error_rate

    fold_tpr = tpr(tp, fn)
    mean_tpr += fold_tpr

    fold_tnr = tnr(tn, fp)
    mean_tnr += fold_tnr
    
    fold_fpr = fpr(fp, tn)
    mean_fpr += fold_fpr
    
    fold_fnr = fnr(fn, tp)
    mean_fnr += fold_fnr

    fold_tss = tss(tp, fn, fp, tn)
    mean_tss += fold_tss

    fold_hss = hss(tp, fn, fp, tn)
    mean_hss += fold_hss    

    print(f"Fold {fold_index + 1} Metrics:")
    print("Recall:", fold_recall)
    print("Precision:", fold_precision)
    print("F1 Score:", fold_f1)
    print("Accuracy:", fold_accuracy_lstm)
    print("Error Rate:", fold_error_rate)   
    
    print("Confusion Matrix:")
    print(confusion)
    print("True Positive:", tp)
    print("True Positive Rate:", fold_tpr)
    print("True Negative:", tn)
    print("True Negative Rate:", fold_tnr)
    print("False Positive:", fp)
    print("False Positive Rate:", fold_fpr)
    print("False Negative:", fn)
    print("False Negative Rate:", fold_fnr)
    print("True Skill Statistic:", fold_tss)
    print("Heidke Skill Score:", fold_hss)
    print("\n")

# Average metric values
mean_tp = mean_tp / 10
mean_tn = mean_tn / 10 
mean_fp = mean_fp / 10 
mean_fn = mean_fn / 10 

mean_tpr = mean_tpr / 10 
mean_tnr = mean_tnr / 10
mean_fpr = mean_fpr / 10 
mean_fnr = mean_fnr / 10

mean_recall = mean_recall / 10 
mean_precision = mean_precision / 10 
mean_f1 = mean_f1 / 10 
mean_accuracy = mean_accuracy / 10 
mean_error_rate = mean_error_rate / 10 

mean_tss = mean_tss / 10 
mean_hss = mean_hss / 10

print(f"Average Fold Metrics:")
print("Recall:", mean_recall)
print("Precision:", mean_precision)
print("F1 Score:", mean_f1)
print("Accuracy:", mean_accuracy)
print("Error Rate:", mean_error_rate)   

print("True Positive:", mean_tp)
print("True Positive Rate:", mean_tpr)
print("True Negative:", mean_tn)
print("True Negative Rate:", mean_tnr)
print("False Positive:", mean_fp)
print("False Positive Rate:", mean_fpr)
print("False Negative:", mean_fn)
print("False Negative Rate:", mean_fnr)
print("True Skill Statistic:", mean_tss)
print("Heidke Skill Score:", mean_hss)
print("\n")

lstm_cv_end = time.time()
print(f"LSTM Cross Validation Execution Time:{(lstm_cv_end - lstm_cv_start):.1f}")

# Fit model on entire dataset
lstm_start = time.time()
model.fit(X_train_reshaped, y_train_reshaped, epochs=10, batch_size=32)
lstm_end = time.time()

print(f"LSTM Training Execution Time:{(lstm_end - lstm_start):.1f}")

y_test_pred = (model.predict(X_test_reshaped) > 0.5).astype(int)

confusion = confusion_matrix(y_test, y_test_pred)

test_tp = confusion[1][1]
test_tn = confusion[0][0]
test_fp = confusion[0][1]
test_fn = confusion[1][0]

test_recall = recall(test_tp, test_fn)
test_precision = precision(test_tp, test_fp)
test_f1 = f1_score(test_tp, test_fp, test_fn)
test_accuracy = accuracy(test_tp, test_fp, test_tn, test_fn)
test_error_rate = 1 - test_accuracy

test_tpr = tpr(test_tp, test_fn)
test_tnr = tnr(test_tn, test_fp)
test_fpr = fpr(test_fp, test_tn)
test_fnr = fnr(test_fn, test_tp)

test_tss = tss(test_tp, test_fn, test_fp, test_tn)
test_hss = hss(test_tp, test_fn, test_fp, test_tn)

print(f"Test Metrics:")
print("Recall:", test_recall)
print("Precision:", test_precision)
print("F1 Score:", test_f1)
print("Accuracy:", test_accuracy)
print("Error Rate:", test_error_rate)   

print("Confusion Matrix:")
print(confusion)
print("True Positive:", test_tp)
print("True Positive Rate:", test_tpr)
print("True Negative:", test_tn)
print("True Negative Rate:", test_tnr)
print("False Positive:", test_fp)
print("False Positive Rate:", test_fpr)
print("False Negative:", test_fn)
print("False Negative Rate:", test_fnr)
print("True Skill Statistic:", test_tss)
print("Heidke Skill Score:", test_hss)
print("\n")


Fold 1 Metrics:
Recall: 0.8140180430256766
Precision: 0.6024653312788906
F1 Score: 0.692443919716647
Accuracy: 0.7427160493827161
Error Rate: 0.25728395061728393
Confusion Matrix:
[[1835  774]
 [ 268 1173]]
True Positive: 1173
True Positive Rate: 0.8140180430256766
True Negative: 1835
True Negative Rate: 0.7033346109620544
False Positive: 774
False Positive Rate: 0.2966653890379456
False Negative: 268
False Negative Rate: 0.18598195697432338
True Skill Statistic: 0.517352653987731
Heidke Skill Score: 0.47965178432052885


Fold 2 Metrics:
Recall: 0.7557732680195941
Precision: 0.6185567010309279
F1 Score: 0.6803149606299213
Accuracy: 0.7493827160493827
Error Rate: 0.25061728395061733
Confusion Matrix:
[[1955  666]
 [ 349 1080]]
True Positive: 1080
True Positive Rate: 0.7557732680195941
True Negative: 1955
True Negative Rate: 0.7458985120183136
False Positive: 666
False Positive Rate: 0.25410148798168636
False Negative: 349
False Negative Rate: 0.24422673198040587
True Skill Statistic: 0.