# Setup

In [14]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

# Preprocessing

In [2]:
train_df = pd.read_csv(r"../input/ventilator-pressure-prediction/train.csv")

In [4]:
train_ids = train_df["id"]
train_df = train_df.drop(columns=["breath_id", "id"])

In [6]:
y = train_df["pressure"]
X = train_df.drop(columns=["pressure"])

In [8]:
num_attribs = ["time_step", "u_in"]
cat_attribs = ["R", "C", "u_out"]

num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("standard_scaler", StandardScaler())
])


full_pipeline = ColumnTransformer([
    ("num_pipeline", num_pipeline, num_attribs),
    ("1hot_encoder", OneHotEncoder(), cat_attribs)
])

X_tfm = full_pipeline.fit_transform(X)

# Training

In [12]:
clf = DecisionTreeRegressor(max_depth=2) # what is max_depth?
clf.fit(X_tfm[:100000], y.values[:100000])

In [16]:
test_df = pd.read_csv(r"../input/ventilator-pressure-prediction/test.csv")
test_ids = test_df["id"]
test_df = test_df.drop(columns=["id"])

# Inference

In [18]:
X_test = full_pipeline.fit_transform(test_df)
preds = clf.predict(X_test)

# Submission

In [22]:
submission = pd.DataFrame({
    "id": test_ids,
    "pressure": preds
})
submission.to_csv("submission.csv", index=False)