In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

from utils.train_test_split import train_test_split
from utils.cross_validation import cross_validate
from utils.estimators import DecisionTree
import utils.metrics as metrics

from sklearn.metrics import PrecisionRecallDisplay, RocCurveDisplay

In [8]:
# we assume we are running from ./experimentacion
gene_dataset = pd.read_csv("../catedra/datos/data.csv", delimiter=',', encoding="utf-8")
X = gene_dataset.drop("target", axis=1).to_numpy()
y = gene_dataset.target.to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# ej. 2.1
model = DecisionTree(max_depth=3)
trained_model = model.clone().fit(X_train, y_train)

In [None]:
# ej. 2.2

# accuracy
acc_split_train, acc_split_val, acc_y_pred = cross_validate(model, X_train, y_train, metrics.accuracy, False)
acc_tot = metrics.accuracy(y_train, acc_y_pred)
acc_mean_train = acc_split_train.mean()
acc_mean_val = acc_split_val.mean()
print(
    f"---accuracy---\ntot:{acc_tot},\
    \n mean_train:{acc_mean_train}, split_train:{acc_split_train},\
    \n mean_val:{acc_mean_val}, split_val:{acc_split_val}")

# auprc
auprc_split_train, auprc_split_val, auprc_y_prob = cross_validate(model, X_train, y_train, metrics.auprc, True)
auprc_tot = metrics.auprc(y_train, auprc_y_prob)
auprc_mean_train = auprc_split_train.mean()
auprc_mean_val = auprc_split_val.mean()
print(
    f"---auprc---\ntot:{auprc_tot},\
    \n mean_train:{auprc_mean_train}, split_train:{auprc_split_train},\
    \n mean_val:{auprc_mean_val}, split_val:{auprc_split_val}")
plt.figure(figsize=(10, 10))
PrecisionRecallDisplay.from_predictions(y_train, auprc_y_prob).plot()
plt.show()

# aucroc
aucroc_split_train, aucroc_split_val, aucroc_y_prob = cross_validate(model, X_train, y_train, metrics.aucroc, True)
aucroc_tot = metrics.aucroc(y_train, aucroc_y_prob)
aucroc_mean_train = aucroc_split_train.mean()
aucroc_mean_val = aucroc_split_val.mean()
print(
    f"---aucroc---\ntot:{aucroc_tot},\
    \n mean_train:{aucroc_mean_train}, split_train:{aucroc_split_train},\
    \n mean_val:{aucroc_mean_val}, split_val:{aucroc_split_val}")
plt.figure(figsize=(10, 10))
RocCurveDisplay.from_predictions(y_train, aucroc_y_prob).plot()
plt.show()