In [2]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split



In [3]:
# Load the data
train_data = pd.read_csv(r"D:\kaggle\house prediction\data.csv")
test_data = pd.read_csv(r"D:\kaggle\house prediction\output.csv")



In [4]:
# Select the features and target variable
features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 
            'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement', 
            'yr_built', 'yr_renovated', 'street', 'city', 'statezip', 'country']

X = train_data[features]
y = train_data['price']



In [5]:
# Encode categorical features
X = pd.get_dummies(X, columns=['street', 'city', 'statezip', 'country'], drop_first=True)



In [6]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Gradient Boosting Regressor model
gb_model = GradientBoostingRegressor(random_state=42)
gb_model.fit(X_train, y_train)



In [7]:
# Prepare test data
X_test_data = test_data[features]
X_test_data = pd.get_dummies(X_test_data, columns=['street', 'city', 'statezip', 'country'], drop_first=True)



In [8]:
# Align the test data columns with the training data columns
X_test_data = X_test_data.reindex(columns=X.columns, fill_value=0)



In [9]:
# Predict prices for the test data
test_data['predicted_price'] = gb_model.predict(X_test_data)

# Save the test DataFrame with predictions to a new CSV file
test_data.to_csv(r'D:\kaggle\house prediction\predicted_output_gb.csv', index=False)



In [10]:
# Calculate metrics
test_mae = mean_absolute_error(test_data['price'], test_data['predicted_price'])
test_mse = mean_squared_error(test_data['price'], test_data['predicted_price'])
test_rmse = test_mse ** 0.5
test_r2 = r2_score(test_data['price'], test_data['predicted_price'])

# Print metrics
print(f"Gradient Boosting Regressor Model Test Accuracy:")
print(f"Test Mean Absolute Error (MAE): {test_mae}")
print(f"Test Mean Squared Error (MSE): {test_mse}")
print(f"Test Root Mean Squared Error (RMSE): {test_rmse}")
print(f"Test R-squared (R²): {test_r2}")



Gradient Boosting Regressor Model Test Accuracy:
Test Mean Absolute Error (MAE): 116430.47812043784
Test Mean Squared Error (MSE): 212658196370.48645
Test Root Mean Squared Error (RMSE): 461148.77899706777
Test R-squared (R²): 0.3309278633435495
