In [4]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Load the dataset
data = pd.read_csv('kc_house_data.csv')

# Define the target column and feature columns
target_column = 'price'  # Target variable for prediction
features = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view',
    'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated',
    'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15'
]

# Separate the features and the target variable
X = data[features]
y = data[target_column]

# Train a RandomForestRegressor to get feature importances
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Get feature importances and select the top N features
N = 7  # Number of top features to select
feature_importances = pd.Series(rf.feature_importances_, index=X.columns)
top_features = feature_importances.nlargest(N).index

# Use only the top N features for the model
X = X[top_features]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build a neural network model using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # First hidden layer
    tf.keras.layers.Dropout(0.2),  # Dropout for regularization
    tf.keras.layers.Dense(32, activation='relu'),  # Second hidden layer
    tf.keras.layers.Dropout(0.2),  # Dropout for regularization
    tf.keras.layers.Dense(1, activation='linear')  # Output layer
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model using various metrics
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Error (MAE):', mae)
print('R^2 Score:', r2)

# Predict the output for a set of predefined feature values
random_features = {
    'bedrooms': 3,
    'bathrooms': 2,
    'sqft_living': 1800,
    'sqft_lot': 7500,
    'floors': 1,
    'waterfront': 0,
    'view': 0,
    'condition': 3,
    'grade': 7,
    'sqft_above': 1800,
    'sqft_basement': 0,
    'yr_built': 2000,
    'yr_renovated': 2010,
    'zipcode': 98105,
    'lat': 47.6205,
    'long': -122.3493,
    'sqft_living15': 1800,
    'sqft_lot15': 7500
}

# Filter random_features to include only top features
random_features = {key: random_features[key] for key in top_features}

# Convert the random features to a DataFrame and scale them
random_features_df = pd.DataFrame([random_features])
random_features_scaled = scaler.transform(random_features_df)

# Predict the output for the random features
predicted_output = model.predict(random_features_scaled)

# Print the random features and the corresponding predicted output
print('Random Features:', random_features)
print('Predicted Output:', predicted_output[0][0])


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 439345119232.0000 - val_loss: 396420186112.0000
Epoch 2/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 420628234240.0000 - val_loss: 384398557184.0000
Epoch 3/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 390994919424.0000 - val_loss: 355426631680.0000
Epoch 4/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 375638360064.0000 - val_loss: 311083270144.0000
Epoch 5/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 334329151488.0000 - val_loss: 255414616064.0000
Epoch 6/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 261361238016.0000 - val_loss: 197824102400.0000
Epoch 7/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 221847404544.0000 - val_loss: 147799785472.0000
Epoch 8/50
[1m43