# Setup

In [1]:
!pip install sklearn --upgrade -q
!pip install catboost -q

In [2]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from catboost import CatBoostRegressor

# Preprocessing

In [3]:
train_df = pd.read_csv(r"../input/ventilator-pressure-prediction/train.csv")

In [4]:
train_ids = train_df["id"]
train_df = train_df.drop(columns=["breath_id", "id"])

In [5]:
y = train_df["pressure"]
X = train_df.drop(columns=["pressure"])

In [6]:
num_attribs = ["time_step", "u_in"]
cat_attribs = ["R", "C", "u_out"]

num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("standard_scaler", StandardScaler())
])


full_pipeline = ColumnTransformer([
    ("num_pipeline", num_pipeline, num_attribs),
    ("1hot_encoder", OneHotEncoder(), cat_attribs)
])

X_tfm = full_pipeline.fit_transform(X)

# Training

In [7]:
xgb_clf = XGBRegressor(tree_method='gpu_hist')
cb_clf = CatBoostRegressor(task_type="GPU", )
# rnd_frst_clf = RandomForestRegressor(criterion="mae", max_depth=15, n_jobs=-1)
# ext_trs_clf = ExtraTreesRegressor(criterion="mae", max_depth=15, n_jobs=-1)

In [8]:
n_obs_train = 100000

xgb_clf.fit(X_tfm, y.values)
cb_clf.fit(X_tfm, y.values)

# Inference

In [9]:
test_df = pd.read_csv(r"../input/ventilator-pressure-prediction/test.csv")
test_ids = test_df["id"]
test_df = test_df.drop(columns=["id"])

In [10]:
X_test = full_pipeline.fit_transform(test_df)
preds = xgb_clf.predict(X_test)
preds_1 = cb_clf.predict(X_test)
preds = (preds + preds_1)/2

# Submission

In [11]:
submission = pd.DataFrame({
    "id": test_ids,
    "pressure": preds
})
submission.to_csv("submission.csv", index=False)