# Setup

In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

# Preprocessing

In [2]:
train_df = pd.read_csv(r"../input/ventilator-pressure-prediction/train.csv")

In [3]:
train_ids = train_df["id"]
train_df = train_df.drop(columns=["breath_id", "id"])

In [4]:
y = train_df["pressure"]
X = train_df.drop(columns=["pressure"])

In [5]:
num_attribs = ["time_step", "u_in"]
cat_attribs = ["R", "C", "u_out"]

num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("standard_scaler", StandardScaler())
])


full_pipeline = ColumnTransformer([
    ("num_pipeline", num_pipeline, num_attribs),
    ("1hot_encoder", OneHotEncoder(), cat_attribs)
])

X_tfm = full_pipeline.fit_transform(X)

# Training

In [6]:
clf = XGBRegressor(tree_method='gpu_hist')

In [7]:
clf.fit(X_tfm[:100000], y.values[:100000])

# Inference

In [8]:
test_df = pd.read_csv(r"../input/ventilator-pressure-prediction/test.csv")
test_ids = test_df["id"]
test_df = test_df.drop(columns=["id"])

In [9]:
X_test = full_pipeline.fit_transform(test_df)
preds = clf.predict(X_test)

# Submission

In [10]:
submission = pd.DataFrame({
    "id": test_ids,
    "pressure": preds
})
submission.to_csv("submission.csv", index=False)