In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras import Sequential, layers, optimizers
from keras import regularizers
from keras_tuner import Hyperband

# Load your dataset

housing_dataset = pd.read_csv(
    '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/HousingData.csv')

housing_dataset = housing_dataset.rename(
    columns={'MEDV': 'Price'})  # Rename target column if needed
housing_dataset = housing_dataset.fillna(0)

X = housing_dataset.drop('Price', axis=1)
y = housing_dataset['Price']

# Standardize the features
standard_scalar = StandardScaler()
X_scaled = standard_scalar.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.33, random_state=33)

# Define the model-building function for hyperparameter tuning


def build_model(hp):
    model = Sequential()
    model.add(layers.Input(shape=(13,)))  # Adjust input shape based on dataset

    # Tune the number of layers and units per layer
    for i in range(hp.Int('num_layers', 2, 4)):  # 2 to 4 hidden layers
        model.add(layers.Dense(
            units=hp.Int(f'units_{i}', min_value=64,
                         max_value=256, step=64),  # Units per layer
            # Activation function
            activation=hp.Choice('activation', ['relu', 'tanh']),
            # L2 Regularization to avoid overfitting
            kernel_regularizer=regularizers.l2(0.01)
        ))

        # Optional: Add Dropout for regularization
        # Dropout with 20% probability to avoid overfitting
        model.add(layers.Dropout(0.2))

    # Output layer with 1 neuron for regression
    # Single output for regression
    model.add(layers.Dense(1, activation='linear'))

    # Compile the model with tunable learning rate
    model.compile(
        optimizer=optimizers.Adam(learning_rate=hp.Choice(
            'learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_squared_error',
        metrics=['mae']
    )

    return model


# Initialize the tuner (Hyperband)
tuner = Hyperband(
    build_model,
    objective='val_mae',  # Minimize validation MAE
    max_epochs=50,        # Maximum number of epochs for tuning
    factor=3,             # Factor for reducing the range of hyperparameters
    directory='hyperparam_tuning',  # Directory to store the results
    project_name='housing_price_prediction_optimized'
)

# Start the search for the best hyperparameters
tuner.search(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal number of layers is {best_hps.get('num_layers')}.
The optimal units in each layer are {[best_hps.get(f'units_{i}') for i in range(best_hps.get('num_layers'))]}.
The optimal activation function is {best_hps.get('activation')}.
The optimal learning rate is {best_hps.get('learning_rate')}.
""")

# Build the best model with the selected hyperparameters
best_model = tuner.hypermodel.build(best_hps)

# Train the best model on the entire training dataset
history = best_model.fit(
    X_train, y_train, validation_split=0.2, epochs=100, batch_size=32)

# Evaluate the best model on the test set
test_loss, test_mae = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")

# Plot the training history (optional)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error (MAE)')
plt.legend()
plt.show()

ModuleNotFoundError: No module named 'keras'

In [None]:
best_model.summary()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(loss)+1)
# Plot training and validation loss
plt.plot(epochs,loss,'y',label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Predict values for the top 10 samples in the training set
top_10_samples = X_train[:10]  # Extract the first 10 samples
predicted_values = best_model.predict(top_10_samples)

# Extract the corresponding actual values from y_train
actual_values = y_train[:10].to_numpy() if isinstance(
    y_train, pd.Series) else y_train[:10]


# Print the predictions and actual values side by side
print("Predicted vs Actual values for the top 10 samples:")
print(f"{'Sample':<10}{'Predicted':<15}{'Actual':<10}")
print("-" * 35)
for i, (pred, actual) in enumerate(zip(predicted_values, actual_values), start=1):
    print(f"{i:<10}{pred[0]:<15.3f}{actual:<10.3f}")

actual_values = actual_values.flatten()
predicted_values = predicted_values.flatten()

# Compute errors
absolute_errors = np.abs(predicted_values - actual_values)
mae = mean_absolute_error(actual_values, predicted_values)
mse = mean_squared_error(actual_values, predicted_values)
rmse = np.sqrt(mse)

# Print side-by-side comparison and errors
print("Predicted vs Actual values and Errors:")
print(f"{'Sample':<10}{'Predicted':<15}{'Actual':<10}{'Abs Error':<15}")
print("-" * 50)
for i, (pred, actual, abs_err) in enumerate(zip(predicted_values, actual_values, absolute_errors), start=1):
    print(f"{i:<10}{pred:<15.3f}{actual:<10.3f}{abs_err:<15.3f}")

# Print metrics
print("\nError Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.3f}")
print(f"Mean Squared Error (MSE): {mse:.3f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.3f}")