In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor 

X_train = pd.read_csv('../input/housing-prices-competition-for-kaggle-learn-users/train.csv', index_col='Id') 
X_test = pd.read_csv('../input/housing-prices-competition-for-kaggle-learn-users/test.csv', index_col='Id')

X_train.dropna(axis=0, subset=['SalePrice'], inplace=True) 

y_train = X_train.SalePrice 
X_train.drop(['SalePrice'], axis=1, inplace=True) 

object_cols = [col for col in X_train.columns if X_train[col].dtype == "object"]
numeric_cols = [cname for cname in X_train.columns if X_train[cname].dtype in ['int64', 'float64']]

imputer_numeric = SimpleImputer(strategy='median')
imputer_categorical = SimpleImputer(strategy='most_frequent')

imputed_X_train_numeric = pd.DataFrame(imputer_numeric.fit_transform(X_train[numeric_cols]), columns=numeric_cols, index=X_train.index)
imputed_X_test_numeric = pd.DataFrame(imputer_numeric.transform(X_test[numeric_cols]), columns=numeric_cols, index=X_test.index)

imputed_X_train_categorical = pd.DataFrame(imputer_categorical.fit_transform(X_train[object_cols]), columns=object_cols, index=X_train.index)
imputed_X_test_categorical = pd.DataFrame(imputer_categorical.transform(X_test[object_cols]), columns=object_cols, index=X_test.index)

X_train = pd.concat([imputed_X_train_numeric, imputed_X_train_categorical], axis=1)
X_test = pd.concat([imputed_X_test_numeric, imputed_X_test_categorical], axis=1)

X_train_baseline = pd.get_dummies(X_train) 
X_test_baseline = pd.get_dummies(X_test) 
X_train_baseline, X_test_baseline = X_train_baseline.align(X_test_baseline, join='left', axis=1) 

model = XGBRegressor(n_estimators=1000, learning_rate=0.05, random_state=0, n_jobs=-1) 
model.fit(X_train_baseline, y_train) 
predictions_test = model.predict(X_test_baseline) 
output = pd.DataFrame({'Id': X_test_baseline.index, 'SalePrice': predictions_test}) 
output.to_csv('submission.csv', index=False) 