# Importing Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

: 

# Getting Data and it's information

In [None]:
dataset = pd.read_csv('/kaggle/input/housing-prices-dataset/Housing.csv')

In [None]:
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.info()

In [None]:
dataset.describe()

# Visualising Dataset

In [None]:
# Plot the distribution of Price
plt.figure(figsize=(10, 6))
sns.histplot(dataset['price'], kde=True)
plt.title('Distribution of Price')
plt.xlabel('Price')
plt.ylabel('Count')
plt.show()

In [None]:
# Understanding Relationship between Numerical Columns
sns.pairplot(dataset[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']])
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.countplot(x='airconditioning', data=dataset)
plt.title('Counts of Air Conditioning')
plt.xlabel('Air Conditioning')
plt.ylabel('Count')
plt.show()

In [None]:
 # Relationship between 2 features
plt.figure(figsize=(10, 6))
sns.boxplot(x='bedrooms', y='price', data=dataset)
plt.title('Price by Bedrooms')
plt.xlabel('Bedrooms')
plt.ylabel('Price')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(x='furnishingstatus', data=dataset)
plt.title('Counts of Furnishing Status')
plt.xlabel('Furnishing Status')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Example for 'prefarea' variable
plt.figure(figsize=(8, 5))
sns.countplot(x='prefarea', data=dataset)
plt.title('Counts of Preferred Area')
plt.xlabel('Preferred Area')
plt.ylabel('Count')
plt.show()


In [None]:
# Example: Boxplot for 'price' by 'mainroad'
plt.figure(figsize=(10, 6))
sns.boxplot(x='mainroad', y='price', data=dataset)
plt.title('Price by Mainroad')
plt.xlabel('Mainroad')
plt.ylabel('Price')
plt.show()


In [None]:
# Example for 'area' variable
plt.figure(figsize=(10, 6))
sns.histplot(dataset['area'], kde=True, color='green')
plt.title('Distribution of Area')
plt.xlabel('Area')
plt.ylabel('Count')
plt.show()


# Converting Categorical Columns 

In [None]:
encoded_data = pd.get_dummies( dataset,columns=['mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea','furnishingstatus'])
encoded_data.columns

# Understanding Correlation

In [None]:
sns.heatmap(encoded_data.corr(), cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
encoded_data.shape

In [None]:
encoded_data.info()

# Dividing Target and Features

In [None]:
X = encoded_data.drop(['price'],axis=1)
Y = encoded_data['price']

# Feature Scaling

In [None]:
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Using Train Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y,
test_size=0.2, random_state=100)

# Model Building

In [None]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(20,)))

    hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
    hp_activation1 = hp.Choice('activation1', values=['relu', 'tanh', 'sigmoid'])

    model.add(layers.Dense(units=hp_units1, activation=hp_activation1))

    hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
    hp_activation2 = hp.Choice('activation2', values=['relu', 'tanh', 'sigmoid'])

    model.add(layers.Dense(units=hp_units2, activation=hp_activation2))

    model.add(layers.Dense(1, activation='linear'))  

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_absolute_error'])

    return model

# Hyperparameter Tuning

In [None]:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='my_project'
)

In [None]:
tuner.search(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10)

In [None]:
tuner.results_summary()

# Retraining Model

In [None]:
# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Train the best model
best_model.fit(X_train, Y_train, epochs=50, validation_data=(X_test, Y_test))

# Evaluation Metrices 

In [None]:
y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5)
# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(Y_test, y_pred)
print(f'MAE: {mae}')

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(Y_test, y_pred)
print(f'MSE: {mse}')

# Calculate Root Mean Squared Error (RMSE)
rmse = mean_squared_error(Y_test, y_pred, squared=False)
print(f'RMSE: {rmse}')

# Calculate Root Mean Squared Logarithmic Error (RMSLE)
rmsle = mean_squared_log_error(Y_test, y_pred, squared=False)
print(f'RMSLE: {rmsle}')

# Calculate R-squared (Coefficient of Determination)
r2 = r2_score(Y_test, y_pred)
print(f'R-squared (R2): {r2}')

