# 1. Library Management

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 2. Load & Preprocess Data

In [None]:
# Load Dataset
data = pd.read_csv("../data/diabete.csv")

# Drop unnecessary columns
data = data.drop(columns=["Unnamed: 0"], errors="ignore")

# Features and Target
X = data.drop(columns=["target"])
y = data["target"]

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Testing set shape:", X_test.shape)


# 3. Model Creation

In [None]:
# Define a Sequential Model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Model Summary
model.summary()

# 4. Training Model

In [None]:
# Early Stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)


# 5. Model Evaluation

In [None]:
# Evaluate the model on the test data
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")

# Plot Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


## Stratified Cross Validation

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_absolute_error

# Initialize Stratified K-Fold
k = 5  # Number of folds
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

# Store metrics for each fold
fold_mae_scores = []
fold_mse_scores = []

# Perform Stratified K-Fold Cross-Validation
for fold, (train_idx, val_idx) in enumerate(skf.split(X_scaled, y)):
    print(f"Training fold {fold + 1}/{k}...")
    
    # Split into train and validation sets for the current fold
    X_train_fold, X_val_fold = X_scaled[train_idx], X_scaled[val_idx]
    y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]
    
    # Create a new model for each fold
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train_fold.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1)
    ])
    
    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    # Train the model
    model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_val_fold, y_val_fold),
        epochs=50,
        batch_size=32,
        verbose=0
    )
    
    # Evaluate the model on the validation fold
    val_predictions = model.predict(X_val_fold)
    fold_mae = mean_absolute_error(y_val_fold, val_predictions)
    fold_mse = np.mean((y_val_fold - val_predictions.flatten())**2)
    
    fold_mae_scores.append(fold_mae)
    fold_mse_scores.append(fold_mse)
    
    print(f"Fold {fold + 1} - MAE: {fold_mae:.4f}, MSE: {fold_mse:.4f}")

# Calculate average MAE and MSE across all folds
average_mae = np.mean(fold_mae_scores)
average_mse = np.mean(fold_mse_scores)

print(f"\nAverage MAE across {k} folds: {average_mae:.4f}")
print(f"Average MSE across {k} folds: {average_mse:.4f}")


# 6. Model Saving

In [None]:
# Save the model to a file
model.save("diabetes_model.keras")
print("Model saved as diabetes_model.keras")

# 7.  Model Fine Tunning

In [None]:
# Load the saved model for fine-tuning
loaded_model = load_model("diabetes_model.keras")

# Optionally modify the learning rate or add more layers
loaded_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Fine-tune the model
history_fine_tune = loaded_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate after fine-tuning
loss, mae = loaded_model.evaluate(X_test, y_test, verbose=0)
print(f"Fine-Tuned Test Loss: {loss:.4f}, Fine-Tuned Test MAE: {mae:.4f}")


In [None]:
# Hyper Parameter and Architecture Tunning

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import matplotlib.pyplot as plt

# Scaling with MinMaxScaler for comparison
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Update train-validation-test split with the new scaled data
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Define a more complex model with adjusted parameters
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

# Compile the model with a lower learning rate using Adam optimizer
# In newer versions of TensorFlow, you can set learning rate directly in optimizer parameters
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mean_absolute_error'])

# Early stopping with increased patience
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model with more epochs and smaller batch size
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=16,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model on the test data
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")

# Plot Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Scaling with MinMaxScaler for comparison
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Update train-validation-test split with the new scaled data
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Define a more complex model with adjusted parameters
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

# Compile the model with a lower learning rate using Adam optimizer
# In newer versions of TensorFlow, you can set learning rate directly in optimizer parameters
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mean_absolute_error'])

# Early stopping with increased patience
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model with more epochs and smaller batch size
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=16,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model on the test data
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")

# Plot Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
