In [34]:
import numpy as np
import pandas as pd
df=pd.read_csv('/content/diabetes.csv')

In [35]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Data preparation complete.")


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
import keras_tuner as kt

def build_model(hp):
    model = Sequential()

    # Input layer with He Normal initialization
    model.add(Dense(
        units=hp.Int('units_input', min_value=16, max_value=128, step=16),
        activation=hp.Choice('activation_input', values=['relu', 'leaky_relu', 'elu', 'selu', 'gelu', 'swish']),
        kernel_initializer=tf.keras.initializers.HeNormal(),
        input_dim=X_train.shape[1]
    ))

    # Hidden layers with He Normal initialization
    for i in range(hp.Int('num_hidden_layers', 1, 3)):
        model.add(Dense(
            units=hp.Int(f'units_{i}', min_value=16, max_value=128, step=16),
            activation=hp.Choice(f'activation_{i}', values=['relu', 'leaky_relu', 'PReLU', 'selu',]),
            kernel_initializer=tf.keras.initializers.HeNormal()
        ))
        model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)))

    # Output layer with linear activation
    model.add(Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal()
    ))

    # Optimizer tuning
    optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop'])
    if optimizer == 'adam':
        opt = Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4]))
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4]))

    # Compile the model
    model.compile(optimizer=opt, loss='mse', metrics=['mae'])
    return model


# Random search or hyperband

In [None]:
# Instantiate a Keras Tuner instance
tuner = kt.RandomSearch(
    build_model,
    objective='val_mae',  # Tuning for validation mean absolute error
    max_trials=10,  # Number of hyperparameter trials
    executions_per_trial=1,  # Number of model executions per trial
    directory='my_tuning_dir',  # Directory for saving tuning results
    project_name='regression_tuning'
)

# Perform hyperparameter search
tuner.search(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, verbose=1)

# Get the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.hypermodel.build(best_hps)

# Train the best model on the full dataset
history = best_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32)


In [None]:
# Step 3: Hyperparameter Search with Keras Tuner

tuner = kt.Hyperband(
    build_model,
    objective='val_mae',
    max_epochs=50,
    factor=3,
    directory='tuner_dir',
    project_name='regression_with_optimizer_tuning'
)

# Perform hyperparameter search
tuner.search(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)

# Save the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best hyperparameters found:")
print(best_hps.values)


In [None]:
# Step 4: Train the Best Model

# Rebuild the model using the best hyperparameters
model = Sequential()

# Input layer
model.add(Dense(
    units=int(best_hps.get('units_input')),
    activation=best_hps.get('activation_input'),
    input_dim=X_train.shape[1]
))

# Hidden layers
for i in range(int(best_hps.get('num_hidden_layers'))):
    model.add(Dense(
        units=int(best_hps.get(f'units_{i}')),
        activation=best_hps.get(f'activation_{i}')
    ))
    model.add(Dropout(best_hps.get(f'dropout_{i}')))

# Output layer
model.add(Dense(1, activation='linear'))

# Optimizer
if best_hps.get('optimizer') == 'adam':
    opt = Adam(learning_rate=best_hps.get('learning_rate'))
elif best_hps.get('optimizer') == 'rmsprop':
    opt = RMSprop(learning_rate=best_hps.get('learning_rate'))


# Compile the model
model.compile(optimizer=opt, loss='mse', metrics=['mae'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)

print("Training complete.")


In [None]:
# Step 5: Evaluate and Save the Model

# Evaluate on the test set
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Save the trained model
model.save('final_regression_model.h5')
print("Model saved as 'final_regression_model.h5'.")


In [None]:
# Step 2: Generate Predictions

# Predictions for training and testing datasets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Combine data for whole dataset analysis
X = np.concatenate((X_train, X_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)
y_pred = model.predict(X)

print("Predictions generated for all datasets.")


In [None]:
# Step 3: Calculate Metrics

def calculate_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    max_error = np.max(np.abs(y_true - y_pred))
    min_error = np.min(np.abs(y_true - y_pred))
    return r2, rmse, mae, mape, max_error, min_error

# Calculate metrics for training, testing, and whole dataset
metrics_train = calculate_metrics(y_train, y_train_pred)
metrics_test = calculate_metrics(y_test, y_test_pred)
metrics_whole = calculate_metrics(y, y_pred)

# Store metrics in a DataFrame for easy saving and visualization
metrics_df = pd.DataFrame({
    'Dataset': ['Training', 'Testing', 'Whole'],
    'R2': [metrics_train[0], metrics_test[0], metrics_whole[0]],
    'RMSE': [metrics_train[1], metrics_test[1], metrics_whole[1]],
    'MAE': [metrics_train[2], metrics_test[2], metrics_whole[2]],
    'MAPE': [metrics_train[3], metrics_test[3], metrics_whole[3]],
    'Max Error': [metrics_train[4], metrics_test[4], metrics_whole[4]],
    'Min Error': [metrics_train[5], metrics_test[5], metrics_whole[5]],
})

print("Metrics calculated:")
print(metrics_df)


In [None]:
# Step 4: Save Results and Predictions

import os

# Create an output directory if it doesn't exist
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)

# Save metrics
metrics_df.to_csv(os.path.join(output_dir, 'evaluation_metrics.csv'), index=False)

# Save predictions and actual data
np.save(os.path.join(output_dir, 'y_train'), y_train)
np.save(os.path.join(output_dir, 'y_train_pred'), y_train_pred)
np.save(os.path.join(output_dir, 'y_test'), y_test)
np.save(os.path.join(output_dir, 'y_test_pred'), y_test_pred)
np.save(os.path.join(output_dir, 'X'), X)
np.save(os.path.join(output_dir, 'y_pred'), y_pred)

print(f"Results saved in '{output_dir}' directory.")


In [None]:
# Step 5: Visualize Metrics (Optional)

import matplotlib.pyplot as plt

# Plot metrics
metrics_df.set_index('Dataset')[['R2', 'RMSE', 'MAE', 'MAPE']].plot(
    kind='bar', figsize=(10, 6), color=['blue', 'orange', 'green', 'red']
)
plt.title('Model Evaluation Metrics')
plt.ylabel('Metric Value')
plt.xlabel('Dataset')
plt.xticks(rotation=0)
plt.legend(title='Metrics')
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'metrics_plot.png'), dpi=2000)
plt.show()


In [None]:
# Step 6: Visualize Training History

import matplotlib.pyplot as plt

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.title('Model Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()
