In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
data = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv' )
X_train = data.drop('SalePrice', axis = 1)
y_train = data.SalePrice
categorical_cols = [col_name for col_name in X_train.columns if X_train[col_name].dtype == "object"]
numerical_cols = [col_name for col_name in X_train.columns if X_train[col_name].dtype in ['int64', 'float64']]
numerical_transformer = SimpleImputer(strategy='constant')
categorical_transformer = Pipeline(steps=
                                   [('imputer', SimpleImputer(strategy= 'most_frequent')), 
                                    ('encoding', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=
                                 [('numerical', numerical_transformer, numerical_cols), 
                                  ('categorical', categorical_transformer, categorical_cols)])

In [None]:
GBR_model = GradientBoostingRegressor(random_state=0)
GBR_training = Pipeline(steps=
                        [('preprocessing', preprocessor),
                         ('training', GBR_model)])
GBR_training.fit(X_train, y_train)
X_test = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/test.csv' )
predictions = GBR_training.predict(X_test)


In [None]:
submissions = pd.DataFrame({'Id': X_test['Id'], 'SalePrice': predictions})
submissions.to_csv('submission.csv', index=False)