In [5]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump

# Load Data
file_path = ' ' #File path here
data = pd.read_csv(file_path)

# Select Features and Target
features = ['Gender', 'Subluxation_percent', 'Femoral_neck_angle', 'Lateral_center_edge_angle', 'extrusion_index']
target = 'Cluster' # Replace with the actual column name for the cluster
X = data[features]
y = data[target]

# Preprocess Data
X['Subluxation_percent'].fillna(X['Subluxation_percent'].mean(), inplace=True)
X['Femoral_neck_angle'].fillna(X['Femoral_neck_angle'].mean(), inplace=True)
X['Lateral_center_edge_angle'].fillna(X['Lateral_center_edge_angle'].mean(), inplace=True)
X['extrusion_index'].fillna(X['extrusion_index'].mean(), inplace=True)
if X['Gender'].dtype == 'object':
    X['Gender'] = X['Gender'].astype('category').cat.codes
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build and Train ANN
mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=2000, random_state=42) # Increase max_iter
mlp.fit(X_train, y_train)

# Evaluate Model
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

# Display Number of Iterations
print(f"Number of iterations to reach convergence: {mlp.n_iter_}")

# Save Model (Optional)
dump(mlp, 'cluster_model.joblib')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Subluxation_percent'].fillna(X['Subluxation_percent'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Femoral_neck_angle'].fillna(X['Femoral_neck_angle'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Lateral_center_edge_angle'].fillna(X['Lateral_center_edge_angle'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.or

Accuracy: 58.90%
              precision    recall  f1-score   support

           0       0.33      0.43      0.38        21
           1       0.74      0.65      0.69        52

    accuracy                           0.59        73
   macro avg       0.54      0.54      0.53        73
weighted avg       0.62      0.59      0.60        73

Number of iterations to reach convergence: 1883


['cluster_model.joblib']

In [6]:
#calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predict Class Labels
y_pred = mlp.predict(X_test)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate Precision
precision = precision_score(y_test, y_pred)

# Calculate Recall
recall = recall_score(y_test, y_pred)

# Calculate F1-Score
f1 = f1_score(y_test, y_pred)

# Calculate Misclassification Rate
misclassification_rate = 1 - accuracy

# Calculate AUC-ROC
# Note: You'll need to use predict_proba to get the probability estimates for the positive class
y_prob = mlp.predict_proba(X_test)[:, 1]
auc_roc = roc_auc_score(y_test, y_prob)

# Print Results
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-Score: {f1 * 100:.2f}%")
print(f"Misclassification Rate: {misclassification_rate * 100:.2f}%")
print(f"AUC of ROC: {auc_roc * 100:.2f}%")


Accuracy: 58.90%
Precision: 73.91%
Recall: 65.38%
F1-Score: 69.39%
Misclassification Rate: 41.10%
AUC of ROC: 56.32%


In [7]:
##CSV of internal dataset
# Create a new DataFrame with the original data
new_data = data.copy()

# Add the predicted cluster column
new_data['predicted_cluster'] = mlp.predict(scaler.transform(X))

# Save the new DataFrame as a CSV file
output_path = ''
new_data.to_csv(output_path, index=False)

print(f"File saved to {output_path}")


File saved to C:/Work/AI_Sports_Medicine/Hip/DDH/Cluster/Prediction_ANN/internal_predicted_clusters.csv


In [8]:
##External validatoin
# Read the external dataset
external_file_path = '' #File path here
external_data = pd.read_csv(external_file_path)

# Select features and preprocess
external_X = external_data[features]
external_X['Subluxation_percent'].fillna(external_X['Subluxation_percent'].mean(), inplace=True)
external_X['Femoral_neck_angle'].fillna(external_X['Femoral_neck_angle'].mean(), inplace=True)
external_X['Lateral_center_edge_angle'].fillna(external_X['Lateral_center_edge_angle'].mean(), inplace=True)
external_X['extrusion_index'].fillna(external_X['extrusion_index'].mean(), inplace=True)
if external_X['Gender'].dtype == 'object':
    external_X['Gender'] = external_X['Gender'].astype('category').cat.codes

# Scale the features
external_X_scaled = scaler.transform(external_X)

# Groundtruth cluster label (updated column name)
external_y = external_data['Groundtruth_Cluster']

# Predict clusters for the external dataset
external_y_pred = mlp.predict(external_X_scaled)

# Calculate metrics
external_accuracy = accuracy_score(external_y, external_y_pred)
external_precision = precision_score(external_y, external_y_pred)
external_recall = recall_score(external_y, external_y_pred)
external_f1 = f1_score(external_y, external_y_pred)
external_misclassification_rate = 1 - external_accuracy
external_y_prob = mlp.predict_proba(external_X_scaled)[:, 1]
external_auc_roc = roc_auc_score(external_y, external_y_prob)

# Print results
print(f"External Accuracy: {external_accuracy * 100:.2f}%")
print(f"External Precision: {external_precision * 100:.2f}%")
print(f"External Recall: {external_recall * 100:.2f}%")
print(f"External F1-Score: {external_f1 * 100:.2f}%")
print(f"External Misclassification Rate: {external_misclassification_rate * 100:.2f}%")
print(f"External AUC of ROC: {external_auc_roc * 100:.2f}%")

# Create a new DataFrame with the external data
external_data_with_prediction = external_data.copy()

# Adding the ground truth cluster labels (y_external) to the external predicted data DataFrame
external_data_with_prediction['ground_truth_cluster'] = external_data['Groundtruth_Cluster']

# Add the predicted cluster column
external_data_with_prediction['predicted_cluster'] = external_y_pred

# Save the new DataFrame as a CSV file
external_output_path = ' '
external_data_with_prediction.to_csv(external_output_path, index=False)

print(f"External file saved to {external_output_path}")



External Accuracy: 71.43%
External Precision: 78.69%
External Recall: 81.36%
External F1-Score: 80.00%
External Misclassification Rate: 28.57%
External AUC of ROC: 70.92%
External file saved to C:/Work/AI_Sports_Medicine/Hip/DDH/Cluster/Prediction_ANN/external_predicted_clusters.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  external_X['Subluxation_percent'].fillna(external_X['Subluxation_percent'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  external_X['Femoral_neck_angle'].fillna(external_X['Femoral_neck_angle'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  external_X['Lateral_center_edge_angle'].fillna(external_X['Lateral_center_edge_angle'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the 