In [3]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Load and preprocess the data
data = pd.read_csv('kc_house_data.csv')

# Define the target and feature columns
target_column = 'price'  # Replace 'price' with your actual target column name
features = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view',
    'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated',
    'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15'
]

# Separate the features and target
X = data[features]
y = data[target_column]

# Train a RandomForestRegressor to get feature importances
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Get feature importances and select top 7 features
N = 7  # Number of top features to select
feature_importances = pd.Series(rf.feature_importances_, index=X.columns)
top_features = feature_importances.nlargest(N).index

# Use only the top 7 features
X = X[top_features]

# Preprocess the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model using various metrics
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Error (MAE):', mae)
print('R^2 Score:', r2)

# Predict the output for a set of predefined feature values
random_features = {
    'bedrooms': 3,
    'bathrooms': 2,
    'sqft_living': 1800,
    'sqft_lot': 7500,
    'floors': 1,
    'waterfront': 0,
    'view': 0,
    'condition': 3,
    'grade': 7,
    'sqft_above': 1800,
    'sqft_basement': 0,
    'yr_built': 2000,
    'yr_renovated': 2010,
    'zipcode': 98105,
    'lat': 47.6205,
    'long': -122.3493,
    'sqft_living15': 1800,
    'sqft_lot15': 7500
}

# Filter random_features to include only top features
random_features = {key: random_features[key] for key in top_features}

random_features_df = pd.DataFrame([random_features])
random_features_scaled = scaler.transform(random_features_df)
predicted_output = model.predict(random_features_scaled)

print('Random Features:', random_features)
print('Predicted Output:', predicted_output[0][0])


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 411620933632.0000 - val_loss: 396784631808.0000
Epoch 2/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 417845313536.0000 - val_loss: 387913580544.0000
Epoch 3/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 430757806080.0000 - val_loss: 366217560064.0000
Epoch 4/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 379450359808.0000 - val_loss: 330267394048.0000
Epoch 5/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 340236763136.0000 - val_loss: 283011186688.0000
Epoch 6/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 304715595776.0000 - val_loss: 231184596992.0000
Epoch 7/50
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 247490740224.0000 - val_loss: 179889635328.0000
Epoch 8/50
[1m43