In [None]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, root_mean_squared_error, r2_score, f1_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd
from sqlalchemy import create_engine

In [None]:
# Conectar e carregar os dados do banco de dados

database_url = "postgresql://gms@localhost/mimiciv"

engine = create_engine(database_url)

with engine.connect() as connection:
    df = pd.read_sql(sql="SELECT * FROM mimiciv_icu.full_patient_data;", con=connection)

In [None]:
df.info()

In [None]:
df_simplified = df.drop(['subject_id', 'hadm_id', 'stay_id', 'intime', 'outtime'], axis=1)
# df_simplified.info()

In [None]:
df_subset = df_simplified.loc[df_simplified['careunit'] != 'NA']
# df_subset.info()

In [None]:
# Preparar os dados para treinamento e teste
label_encoder = LabelEncoder()
df_encoded = df_subset.copy()
cat_cols = df_encoded.select_dtypes(include=['object']).columns
for col in cat_cols:
    df_encoded[col] = label_encoder.fit_transform(df_encoded[col])

In [None]:
dfx = df_encoded.drop(['los'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(dfx, df_encoded['los'], test_size=0.2, random_state=42)

In [None]:
model = XGBRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", root_mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))