In [1]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor
import graph_utils

from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
mistral_frame = pd.read_csv("../processed_data/cc_mistral_ratings.csv")
chatgpt_frame = pd.read_csv("../processed_data/gptrated_frame.csv")
llama31_frame = pd.read_csv("../processed_data/cc_llama31_ratings.csv")

edstays = pd.read_csv("../raw_data/edstay_encounters.csv")

In [3]:
# remove unrated from mistral -1
mistral_frame = mistral_frame[mistral_frame["rating"] != -1]

# remove unrated from llama31 -1
llama31_frame = llama31_frame[llama31_frame["rating"] != -1]

In [4]:
mistral_ed = edstays.copy()
chatgpt_ed = edstays.copy()
llama31_ed = edstays.copy()

In [5]:
# Create a dictionary from mistral_frame
rating_dict = mistral_frame.set_index("chiefcomplaint")["rating"].to_dict()

# Map the dictionary to the 'chiefcomplaint' column in mistral_ed
mistral_ed["rating"] = mistral_ed["chiefcomplaint"].map(rating_dict)

# Replace NaN values with -9999
mistral_ed["rating"].fillna(-9999, inplace=True)

# Repeat the process for chatgpt_ed and chatgpt_frame
rating_dict = chatgpt_frame.set_index("chiefcomplaint")["rating"].to_dict()
chatgpt_ed["rating"] = chatgpt_ed["chiefcomplaint"].map(rating_dict)
chatgpt_ed["rating"].fillna(-9999, inplace=True)

# Repeat the process for llama31_ed and llama31_frame
rating_dict = llama31_frame.set_index("chiefcomplaint")["rating"].to_dict()
llama31_ed["rating"] = llama31_ed["chiefcomplaint"].map(rating_dict)
llama31_ed["rating"].fillna(-9999, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  mistral_ed['rating'].fillna(-9999, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  chatgpt_ed['rating'].fillna(-9999, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting va

In [6]:
train_columns = [
    "temperature",
    "heartrate",
    "o2sat",
    "sbp",
    "dbp",
    "resprate",
    "pain",
    "acuity",
    "age_on_adm",
    "gender",
    "arrival_transport",
    "rating",
]
feature_column = "los"

In [7]:
mistral_train = mistral_ed[train_columns + [feature_column]]
chatgpt_train = chatgpt_ed[train_columns + [feature_column]]
llama31_train = llama31_ed[train_columns + [feature_column]]

In [8]:
category_cols = ["pain", "acuity", "gender", "arrival_transport"]

# cast category columns to string
mistral_train[category_cols] = mistral_train[category_cols].apply(
    lambda col: col.astype(str)
)

# replace nan in category columns with 'missing'
mistral_train[category_cols] = mistral_train[category_cols].fillna("missing")

chatgpt_train[category_cols] = chatgpt_train[category_cols].apply(
    lambda col: col.astype(str)
)

chatgpt_train[category_cols] = chatgpt_train[category_cols].fillna("missing")

llama31_train[category_cols] = llama31_train[category_cols].apply(
    lambda col: col.astype(str)
)

llama31_train[category_cols] = llama31_train[category_cols].fillna("missing")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mistral_train[category_cols] = mistral_train[category_cols].apply(lambda col: col.astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mistral_train[category_cols] = mistral_train[category_cols].fillna('missing')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chatgpt_train[category_cols] = c

In [9]:
X_mistral = mistral_train.drop(columns=[feature_column])
y_mistral = mistral_train[feature_column]

X_chatgpt = chatgpt_train.drop(columns=[feature_column])
y_chatgpt = chatgpt_train[feature_column]

X_llama31 = llama31_train.drop(columns=[feature_column])
y_llama31 = llama31_train[feature_column]

In [10]:
X_mistral_train, X_mistral_test, y_mistral_train, y_mistral_test = train_test_split(
    X_mistral, y_mistral, test_size=0.2, random_state=42
)
X_chatgpt_train, X_chatgpt_test, y_chatgpt_train, y_chatgpt_test = train_test_split(
    X_chatgpt, y_chatgpt, test_size=0.2, random_state=42
)

X_mistral_test, X_mistral_val, y_mistral_test, y_mistral_val = train_test_split(
    X_mistral_test, y_mistral_test, test_size=0.5, random_state=42
)
X_chatgpt_test, X_chatgpt_val, y_chatgpt_test, y_chatgpt_val = train_test_split(
    X_chatgpt_test, y_chatgpt_test, test_size=0.5, random_state=42
)

X_llama31_train, X_llama31_test, y_llama31_train, y_llama31_test = train_test_split(
    X_llama31, y_llama31, test_size=0.2, random_state=42
)
X_llama31_test, X_llama31_val, y_llama31_test, y_llama31_val = train_test_split(
    X_llama31_test, y_llama31_test, test_size=0.5, random_state=42
)

In [11]:
X_mistral_test.to_csv("../predicted_data/Xtest.csv", index=False)

In [11]:
mistral_model = CatBoostRegressor(
    iterations=10000,
    loss_function="RMSE",
    verbose=100,
    early_stopping_rounds=100,
    cat_features=category_cols,
    task_type="GPU",
)

In [12]:
mistral_model.fit(
    X_mistral_train,
    y_mistral_train,
    eval_set=(X_mistral_val, y_mistral_val),
    use_best_model=True,
)

Learning rate set to 0.060467
0:	learn: 397.5620832	test: 396.7911717	best: 396.7911717 (0)	total: 37.4ms	remaining: 6m 14s
100:	learn: 380.1994026	test: 379.8039979	best: 379.8039979 (100)	total: 2.78s	remaining: 4m 32s
200:	learn: 378.8004338	test: 379.1411053	best: 379.1411053 (200)	total: 5.86s	remaining: 4m 45s
300:	learn: 377.9922687	test: 378.9124505	best: 378.9100983 (299)	total: 8.94s	remaining: 4m 48s
400:	learn: 376.9313898	test: 378.7664245	best: 378.7654705 (399)	total: 11.9s	remaining: 4m 44s
500:	learn: 376.1775028	test: 378.6829103	best: 378.6829103 (500)	total: 14.9s	remaining: 4m 42s
600:	learn: 375.6301818	test: 378.6057403	best: 378.6057403 (600)	total: 17.9s	remaining: 4m 39s
700:	learn: 375.0415185	test: 378.5968963	best: 378.5769328 (669)	total: 20.8s	remaining: 4m 35s
bestTest = 378.5769328
bestIteration = 669
Shrink model to first 670 iterations.


<catboost.core.CatBoostRegressor at 0x7f59e2aef730>

In [14]:
y_pred = mistral_model.predict(X_mistral_test)

In [15]:
# save mistral predictions with real data
mistral_test = X_mistral_test.copy()
mistral_test["los"] = y_mistral_test
mistral_test["pred"] = y_pred
mistral_test.to_csv("../predicted_data/mistralpreds.csv", index=False)

In [16]:
rmse = metrics.mean_squared_error(y_mistral_test, y_pred, squared=False)
print(f"Mistral RMSE: {rmse}")
mae = metrics.mean_absolute_error(y_mistral_test, y_pred)
print(f"Mistral MAE: {mae}")
r2 = metrics.r2_score(y_mistral_test, y_pred)
print(f"Mistral R2: {r2}")

Mistral RMSE: 365.6396459282988
Mistral MAE: 222.9922767085095
Mistral R2: 0.0932398374277239




In [14]:
chatgpt_model = CatBoostRegressor(
    iterations=10000,
    loss_function="RMSE",
    verbose=100,
    early_stopping_rounds=100,
    cat_features=category_cols,
    task_type="GPU",
)

In [15]:
chatgpt_model.fit(
    X_chatgpt_train,
    y_chatgpt_train,
    eval_set=(X_chatgpt_val, y_chatgpt_val),
    use_best_model=True,
)

Learning rate set to 0.060467
0:	learn: 397.6286687	test: 396.8653822	best: 396.8653822 (0)	total: 38.9ms	remaining: 6m 28s
100:	learn: 381.0577464	test: 380.6066160	best: 380.6066160 (100)	total: 2.77s	remaining: 4m 31s
200:	learn: 379.6839566	test: 379.9821166	best: 379.9781068 (194)	total: 5.63s	remaining: 4m 34s
300:	learn: 378.7755521	test: 379.7424232	best: 379.7398065 (299)	total: 8.54s	remaining: 4m 35s
400:	learn: 377.5884067	test: 379.6857555	best: 379.6696244 (376)	total: 11.5s	remaining: 4m 34s
500:	learn: 376.9496833	test: 379.6168326	best: 379.6156745 (496)	total: 14.2s	remaining: 4m 29s
600:	learn: 376.3484424	test: 379.6042363	best: 379.5934959 (545)	total: 17s	remaining: 4m 26s
bestTest = 379.5934959
bestIteration = 545
Shrink model to first 546 iterations.


<catboost.core.CatBoostRegressor at 0x7f5961a29330>

In [19]:
y_pred = chatgpt_model.predict(X_chatgpt_test)

In [20]:
# save predcitions with original data
chatgpt_test = X_chatgpt_test.copy()
chatgpt_test["los"] = y_chatgpt_test
chatgpt_test["pred"] = y_pred
chatgpt_test.to_csv("../predicted_data/chatgpt35.csv", index=False)

In [21]:
rmse = metrics.mean_squared_error(y_chatgpt_test, y_pred, squared=False)
print(f"ChatGPT RMSE: {rmse}")
mae = metrics.mean_absolute_error(y_chatgpt_test, y_pred)
print(f"ChatGPT MAE: {mae}")
r2 = metrics.r2_score(y_chatgpt_test, y_pred)
print(f"ChatGPT R2: {r2}")

ChatGPT RMSE: 366.1559438627346
ChatGPT MAE: 223.45400060729978
ChatGPT R2: 0.09067726559801648




In [12]:
llama31_model = CatBoostRegressor(
    iterations=10000,
    loss_function="RMSE",
    verbose=100,
    early_stopping_rounds=100,
    cat_features=category_cols,
    task_type="GPU",
)

In [13]:
llama31_model.fit(
    X_llama31_train,
    y_llama31_train,
    eval_set=(X_llama31_val, y_llama31_val),
    use_best_model=True,
)

Learning rate set to 0.060467
0:	learn: 397.5396786	test: 396.8237866	best: 396.8237866 (0)	total: 33.1ms	remaining: 5m 30s
100:	learn: 379.4918110	test: 379.4634757	best: 379.4634757 (100)	total: 2.64s	remaining: 4m 19s
200:	learn: 377.8878496	test: 378.6984156	best: 378.6984156 (200)	total: 5.27s	remaining: 4m 16s
300:	learn: 376.9558501	test: 378.4189690	best: 378.4189690 (300)	total: 8.05s	remaining: 4m 19s
400:	learn: 375.7746553	test: 378.3011057	best: 378.3011057 (400)	total: 10.9s	remaining: 4m 21s
500:	learn: 374.9194601	test: 378.2632797	best: 378.2617673 (499)	total: 13.6s	remaining: 4m 17s
600:	learn: 374.2294199	test: 378.2823046	best: 378.2483457 (518)	total: 16.5s	remaining: 4m 17s
bestTest = 378.2483457
bestIteration = 518
Shrink model to first 519 iterations.


<catboost.core.CatBoostRegressor at 0x7fcf7fa3b5e0>

In [14]:
y_pred = llama31_model.predict(X_llama31_test)

In [15]:
llama31_test = X_llama31_test.copy()
llama31_test["los"] = y_llama31_test
llama31_test["pred"] = y_pred
llama31_test.to_csv("../predicted_data/llama31.csv", index=False)

In [16]:
rmse = metrics.mean_squared_error(y_llama31_test, y_pred, squared=False)
print(f"Llama31 RMSE: {rmse}")
mae = metrics.mean_absolute_error(y_llama31_test, y_pred)
print(f"Llama31 MAE: {mae}")
r2 = metrics.r2_score(y_llama31_test, y_pred)
print(f"Llama31 R2: {r2}")

Llama31 RMSE: 364.3015499224548
Llama31 MAE: 222.0678448337494
Llama31 R2: 0.09986445818858036




In [None]:
graph_utils.plot_feature_importances(mistral_model, "Mistral")

In [None]:
graph_utils.plot_feature_importances(chatgpt_model, "ChatGPT")

In [None]:
graph_utils.plot_regression_results(y_mistral_test, y_pred, "Mistral")

In [None]:
graph_utils.plot_regression_results(y_chatgpt_test, y_pred, "ChatGPT")

In [None]:
graph_utils.plot_residuals(y_mistral_test, y_pred, "Mistral")

In [None]:
graph_utils.plot_residuals(y_chatgpt_test, y_pred, "ChatGPT")

In [None]:
graph_utils.plot_feature_importances(llama31_model, "Llama31")

In [None]:
graph_utils.plot_regression_results(y_llama31_test, y_pred, "Llama31")