In [None]:
import os 
import numpy as np
import pandas as pd

In [None]:
os.chdir("/kaggle/input")


from housepricesxgbregressortuning.xgboost_hyperparameter_tuning import *

os.chdir("/kaggle/working")

In [None]:
tuner = HousePriceXGBoostTuner(data_path="/kaggle/input/house-prices-advanced-regression-techniques/train.csv", random_state=42)

# Run complete pipeline
tuner.run_complete_pipeline(
    tuning_method='randomized',  # Change to 'grid' for grid search
    n_iter=50  # Adjust based on available time/compute
)

In [None]:
best_model = tuner.best_model
df = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")

numeric_features, categorical_features = tuner.load_and_explore_data()
all_features = numeric_features + categorical_features

X = df[all_features].copy()
X_processed = X.copy()

# Process numeric features
if numeric_features:
    numeric_imputer = SimpleImputer(strategy='median')
    scaler = StandardScaler()
    X_processed[numeric_features] = scaler.fit_transform(
        numeric_imputer.fit_transform(X_processed[numeric_features])
    )

# Process categorical features
for feature in categorical_features:
    # Fill missing values
    X_processed[feature] = X_processed[feature].fillna('Missing')
    # Label encode
    le = LabelEncoder()
    X_processed[feature] = le.fit_transform(X_processed[feature].astype(str))

X = X_processed
feature_names = all_features

original_features = len(X.columns)

# Total area features
area_features = [col for col in X.columns if 'SF' in str(col) or 'Area' in str(col)]
if len(area_features) >= 2:
    X['TotalArea'] = X[area_features].sum(axis=1)

# Age features
if 'YearBuilt' in X.columns and 'YrSold' in X.columns:
    X['HouseAge'] = X['YrSold'] - X['YearBuilt']

# Quality-related features
quality_features = [col for col in X.columns if 'Qual' in str(col)]
if len(quality_features) >= 2:
    X['OverallQuality'] = X[quality_features].mean(axis=1)

new_features = len(X.columns) - original_features
print(f"Added {new_features} engineered features")
print(f"Final feature set: {X.shape[1]} features")

# Update feature names
feature_names = list(X.columns)

X.shape

In [None]:
logpreds = best_model.predict(X)
preds = np.expm1(logpreds)
preds

In [None]:
df["SalePrice"] = preds
df

In [None]:
submission_df = df[["Id", "SalePrice"]].set_index("Id")
submission_df.to_csv("/kaggle/working/submission.csv")

In [None]:
submission_df