In [None]:
# Chios
import numpy as np 
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('/content/houseprice/train_processed.csv')
test = pd.read_csv('/content/houseprice/test_processed.csv')

In [None]:
target_variable = train.resale_price.values
train = train.drop(['resale_price'], axis=1)

In [None]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
cv_scores = []
cv_std = []
    
def rmse(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))

def cv_rmse(model):
    rmse = np.sqrt(-cross_val_score(model, train, target_variable, scoring="neg_mean_squared_error", cv=kf))
    return (rmse)

In [None]:
xgb = XGBRegressor(
    reg_lambda=0.69,
    max_depth=8,
    reg_alpha=0.95,
    colsample_bytree=0.8,
    colsample_bylevel=0.65,
    objective="reg:squarederror",
    n_estimators=2800,
    random_state=42,
    n_jobs=-1,
    learning_rate=0.05,
    subsample=0.8,
    tree_method='gpu_hist'
)

In [None]:
xgb_score = cv_rmse(xgb)
print("mean: {}".format(xgb_score.mean()))
print("std: {}".format(xgb_score.std()))

In [None]:
xgb.fit(train, target_variable)

In [None]:
feature_scores = pd.Series(xgb.feature_importances_, index=train.columns).sort_values(ascending=False)
f, ax = plt.subplots(figsize=(30, 24))
ax = sns.barplot(x=feature_scores, y=feature_scores.index)
ax.set_title("Visualize feature scores of the features")
ax.set_yticklabels(feature_scores.index)
ax.set_xlabel("Feature importance score")
ax.set_ylabel("Features")
plt.show()
plt.savefig("XGB-Features Importances")

In [None]:
data = pd.DataFrame({
    'prediction': np.expm1(xgb.predict(train)).reshape(1, -1)[0],
    'target': np.expm1(target_variable)
})
sns.regplot(x='prediction', y='target', palette='Blues', data=data)

In [None]:
prediction = np.expm1(xgb.predict(test))
data = {'Id': np.arange(prediction.shape[0]), 'Predicted': prediction.reshape(1, -1)[0]}
submission = pd.DataFrame(data=data)
submission.to_csv('submission.csv', index=False)