In [None]:
# Chios
import numpy as np 
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
train = pd.read_csv('/content/houseprice/train_processed.csv')
test = pd.read_csv('/content/houseprice/test_processed.csv')

In [None]:
target_variable = train.resale_price.values
train = train.drop(['resale_price'], axis=1)

In [None]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
cv_scores = []
cv_std = []
    
def rmse(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))

def cv_rmse(model):
    rmse = np.sqrt(-cross_val_score(model, train, target_variable, scoring="neg_mean_squared_error", cv=kf))
    return (rmse)

In [None]:
cat = CatBoostRegressor(loss_function='RMSE', 
                              random_seed=42, 
                              max_depth = 10,
                              learning_rate = 0.05,
                              random_strength=1,
                              max_ctr_complexity=1,
                              l2_leaf_reg=8,
                              grow_policy = 'Lossguide',
                              task_type='GPU',
                              iterations=3000,
                              verbose=0
                              )

In [None]:
cat_score = cv_rmse(cat)
print("mean: {}".format(cat_score.mean()))
print("std: {}".format(cat_score.std()))

In [None]:
cat.fit(train, target_variable)

In [None]:
data = pd.DataFrame({
    'prediction': np.expm1(cat.predict(train)).reshape(1, -1)[0],
    'target': np.expm1(target_variable)
})
sns.regplot(x='prediction', y='target', palette='Blues', data=data)

In [None]:
feature_scores = pd.Series(cat.feature_importances_, index=train.columns).sort_values(ascending=False)
f, ax = plt.subplots(figsize=(30, 24))
ax = sns.barplot(x=feature_scores, y=feature_scores.index)
ax.set_title("Visualize feature scores of the features")
ax.set_yticklabels(feature_scores.index)
ax.set_xlabel("Feature importance score")
ax.set_ylabel("Features")
plt.show()
plt.savefig("RF-Features Importances")

In [None]:
prediction = np.expm1(cat.predict(test))
data = {'Id': np.arange(prediction.shape[0]), 'Predicted': prediction.reshape(1, -1)[0]}
submission = pd.DataFrame(data=data)
submission.to_csv('submission.csv', index=False)