In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
import numpy as np
import joblib

# --- Step 1: Load and Analyze Data ---
file_name = '/content/preprocessed_diabetes_data.csv'
df = pd.read_csv(file_name)
print(f"Successfully loaded '{file_name}'")

# --- Data Diagnosis ---
print("\n--- Data Diagnosis ---")
target_counts = df['Diabetes_Diagnosis'].value_counts()
print("Distribution of Target Variable ('Diabetes_Diagnosis'):")
print(target_counts)
print("----------------------\n")

# --- Step 2: Prepare Data ---
y = df['Diabetes_Diagnosis']
X = df.drop('Diabetes_Diagnosis', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 3: Scale Data ---
scaler = StandardScaler()
columns_to_scale = ['Age', 'BMI']
X_train[columns_to_scale] = scaler.fit_transform(X_train[columns_to_scale])
X_test[columns_to_scale] = scaler.transform(X_test[columns_to_scale])

# --- Step 4: Calculate Class Weights ---
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = {i : weights[i] for i in range(len(weights))}
print(f"Calculated Class Weights: {class_weights}")

# --- Step 5: Build and Compile Model ---
model = Sequential([
    InputLayer(input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'), Dropout(0.3),
    Dense(16, activation='relu'), Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model.summary()
custom_optimizer = Adam(learning_rate=0.0005)
model.compile(optimizer=custom_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# --- Step 6: Train the Model ---
print("\nStarting model training...")
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
history = model.fit(
    X_train, y_train,
    epochs=150, batch_size=20,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
    class_weight=class_weights,
    verbose=1
)
print("Model training complete.")

# --- Step 7: Evaluate the Final Model ---
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nFinal Model Evaluation:")
print(f"  - Test Loss: {loss:.4f}")
print(f"  - Test Accuracy: {accuracy * 100:.2f}%")

# --- Step 8: Save Model, Scaler, and History (History part is re-added) ---
print("\nSaving all artifacts...")
model.save('diabetes_model.keras')
joblib.dump(scaler, 'scaler.joblib')

# Convert the history object to a DataFrame and save it as a CSV
history_df = pd.DataFrame(history.history)
history_df.to_csv('training_history.csv', index=False)
print("Model, scaler, and training history saved successfully.")

print("\nTraining script finished.")