# **Requirement: [Dataset Link](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)**

# **Import necessary libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
import xgboost as xgb

# **Load the data**

In [None]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

# **Data Preprocessing**

In [None]:
# Combine train and test data for preprocessing
combined_data = pd.concat([train_data, test_data], ignore_index=True)

# Handle missing values
combined_data.fillna(method='ffill', inplace=True)  # Forward fill missing values

# Encode categorical variables
categorical_cols = combined_data.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    combined_data[col] = le.fit_transform(combined_data[col])
    label_encoders[col] = le

# Split the combined data back into train and test sets
train_data = combined_data.iloc[:train_data.shape[0], :]
test_data = combined_data.iloc[train_data.shape[0]:, :]

# **Prepare the data**

In [None]:
# Split features and target variable for training data
X = train_data.drop(columns=['Id', 'SalePrice'])
y = train_data['SalePrice']

# Split the data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# **Model Training**

In [None]:
# Initialize XGBoost model
model = xgb.XGBRegressor(n_estimators=1000, learning_rate=0.05, random_state=42)

# Train the model on the training data
model.fit(X_train, y_train, early_stopping_rounds=5, eval_set=[(X_valid, y_valid)], verbose=False)

# **Model Evaluation**

In [None]:
# Predict on the validation set
y_pred = model.predict(X_valid)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse}")

# **Plot**

In [None]:
import plotly.express as px

# Select three features for the 3D plot
feature1 = 'OverallQual'
feature2 = 'GrLivArea'
feature3 = 'TotalBsmtSF'

# Create a 3D scatter plot using Plotly Express
fig = px.scatter_3d(
    train_data, x=feature1, y=feature2, z=feature3,
    color='SalePrice', opacity=0.7, title='3D Scatter Plot of Features vs. Sale Price'
)

# Show the plot
fig.show()