In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.calibration import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Show all columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Import the data
df = pd.read_csv('diabetes_prediction_dataset.csv')
#df_test = pd.read_csv('Testing.csv')


#df = df.drop(df.columns[-1], axis=1)

label_encoder = LabelEncoder()
df['gender_encoded'] = label_encoder.fit_transform(df['gender'])
df['smoking_history_encoded'] = label_encoder.fit_transform(df['smoking_history'])

df.head()

In [None]:
# Barplot of the prognosis
plt.figure(figsize=(20, 10))
sns.countplot(df['diabetes'])
plt.xticks(rotation=90)
plt.show()

In [None]:
df.corr()

In [None]:
# Split data into test and train
train, test = train_test_split(df, test_size=0.3, random_state=42)


# Split data into X and y

X_train = train[['age', 'hypertension', 'heart_disease', 'bmi', 'HbA1c_level', 'blood_glucose_level', 'gender_encoded', 'smoking_history_encoded']]
y_train = train['diabetes']

X_test = test[['age', 'hypertension', 'heart_disease', 'bmi', 'HbA1c_level', 'blood_glucose_level', 'gender_encoded', 'smoking_history_encoded']]
y_test = test['diabetes']



In [None]:
from sklearn.discriminant_analysis import StandardScaler


scaler = StandardScaler()
xtrain = scaler.fit_transform(X_train)
xtest = scaler.transform(X_test)

In [None]:
model = Sequential(
    [
        Dense(32, input_shape=(X_train.shape[1],), activation='relu'),
        Dense(32, activation="relu"),
        Dense(1, activation="sigmoid"),
    ]
)

model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'MeanSquaredError'])

In [None]:
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
)

In [None]:
test_loss, test_accuracy, test_auc, test_mean_squared_error = model.evaluate(X_test, y_test)

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot Training and Test Loss
axes[0, 0].plot(history.history["loss"], label='Training Loss', color='b')
axes[0, 0].plot(history.history["val_loss"], label='Test Loss', color='r')
axes[0, 0].set_title('Loss')
axes[0, 0].set_xlabel('Epochs')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend(loc='upper right')

# Plot Training and Test AUC
axes[0, 1].plot(history.history["auc"], label='Training AUC', color='b')
axes[0, 1].plot(history.history["val_auc"], label='Test AUC', color='r')
axes[0, 1].set_title('AUC')
axes[0, 1].set_xlabel('Epochs')
axes[0, 1].set_ylabel('AUC')
axes[0, 1].legend(loc='lower right')

# Plot Training and Test Mean Squared Error (MSE)
axes[1, 0].plot(history.history["mean_squared_error"], label='Training MSE', color='b')
axes[1, 0].plot([test_mean_squared_error] * len(history.history["mean_squared_error"]), label='Test MSE', color='r')
axes[1, 0].set_title('Mean Squared Error (MSE)')
axes[1, 0].set_xlabel('Epochs')
axes[1, 0].set_ylabel('MSE')
axes[1, 0].legend(loc='upper right')

# Plot Training and Test Accuracy
axes[1, 1].plot(history.history["accuracy"], label='Training Accuracy', color='b')
axes[1, 1].plot([test_accuracy] * len(history.history["accuracy"]), label='Test Accuracy', color='r')
axes[1, 1].set_title('Accuracy')
axes[1, 1].set_xlabel('Epochs')
axes[1, 1].set_ylabel('Accuracy')
axes[1, 1].legend(loc='upper right')

plt.tight_layout()
plt.show()
print("Test Loss:", test_loss)

model.save_weights('cnn2.h5')
model.save("modeldigit.keras")


In [None]:
from sklearn.metrics import confusion_matrix, classification_report, f1_score

# Step 1: Make predictions on test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert one-hot encoded predictions to class labels

# Step 2: Compute the confusion matrix using true class labels
y_true_classes = np.argmax(y_test, axis=1)  # Convert one-hot encoded true labels to class labels
confusion_mtx = confusion_matrix(y_true_classes, y_pred_classes)

# Step 3: Calculate the F1-score
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

# Save the classification report to a text file
with open('classification_report.txt', 'w') as report_file:
    report = classification_report(y_true_classes, y_pred_classes)
    report_file.write(report)

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_mtx)

# Print the F1-score
print("\nWeighted F1-Score:", f1)
