In [1]:
import logging
import pandas as pd
import pickle

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

from src.constants.constants import FEATURES_PATH, MODELS_PATH


In [2]:
X_train = pd.read_parquet(FEATURES_PATH / "X_train_scaled.parquet")
y_train = pd.read_parquet(FEATURES_PATH / "y_train.parquet")['target']


In [3]:
X_train.head(2)

Unnamed: 0,sma_7,sma_14,sma_21,sma_50,sma_200,ema_12,ema_26,rsi_14,macd,macd_signal,...,year,day_sin,day_cos,month_sin,month_cos,is_weekend,is_month_start,is_month_end,is_quarter_start,is_quarter_end
0,-2.342933,-2.192504,-2.144133,-1.939454,-1.491174,-2.296576,-2.185962,-2.12656,-0.715491,-0.513344,...,-1.756551,-0.609265,-1.271214,-1.240593,-0.203508,-0.633845,-0.179993,-0.179993,-0.102815,-0.102815
1,-2.384752,-2.235595,-2.159306,-1.954108,-1.490596,-2.332227,-2.208738,-2.093536,-0.804216,-0.580138,...,-1.756551,-1.374201,-0.31146,-1.240593,-0.203508,1.577673,-0.179993,-0.179993,-0.102815,-0.102815


In [156]:
X_test = pd.read_parquet(FEATURES_PATH / "X_test_scaled.parquet")
y_test = pd.read_parquet(FEATURES_PATH / "y_test.parquet")['target']


In [122]:
model = LogisticRegression(
    max_iter=100,
    random_state=42,
    n_jobs=1,
    fit_intercept=True,
    class_weight='balanced'  # Para manejar desbalance de clases
)

In [252]:
model = GradientBoostingClassifier(
    n_estimators=5,
    learning_rate=0.8,
    max_depth=6,
    min_samples_split=15,
    min_samples_leaf=10,
    random_state=42,
    warm_start=True,
    verbose=0,
    subsample=0.6,
    validation_fraction=0.2,
)

In [392]:
model = RandomForestClassifier(
    n_estimators=10,
    max_depth=5,
    min_samples_split=0.05,
    min_samples_leaf=0.01,
    random_state=42,
    class_weight='balanced',
    warm_start=False,
    max_features=0.5,
    oob_score=True,
    n_jobs=1
)

In [393]:
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)

train_accuracy = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)

  warn(


In [394]:
print(f"âœ… Modelo entrenado")
print(f"ðŸ“Š MÃ©tricas en TRAIN:")
print(f"   Accuracy: {train_accuracy:.4f}")
print(f"   Precision: {train_precision:.4f}")
print(f"   Recall: {train_recall:.4f}")
print(f"   F1-Score: {train_f1:.4f}")

âœ… Modelo entrenado
ðŸ“Š MÃ©tricas en TRAIN:
   Accuracy: 0.7322
   Precision: 0.7418
   Recall: 0.7358
   F1-Score: 0.7388


In [395]:
# Predicciones
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# df_preds = pd.DataFrame({"y_test": y_test, "y_pred": y_pred, "y_proba": y_proba})

# y_pred = [1 if x > 0.55 else 0 for x in df_preds.y_proba]

# MÃ©tricas
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

print(f"ðŸ“Š MÃ©tricas en TEST:")
print(f"   Accuracy: {accuracy:.4f}")
print(f"   Precision: {precision:.4f}")
print(f"   Recall: {recall:.4f}")
print(f"   F1-Score: {f1:.4f}")
print(f"   ROC-AUC: {roc_auc:.4f}")
print(f"\nðŸ“Š Confusion Matrix:")
print(f"   TN: {cm[0,0]}  FP: {cm[0,1]}")
print(f"   FN: {cm[1,0]}  TP: {cm[1,1]}")

ðŸ“Š MÃ©tricas en TEST:
   Accuracy: 0.4815
   Precision: 0.4600
   Recall: 0.9583
   F1-Score: 0.6216
   ROC-AUC: 0.4965

ðŸ“Š Confusion Matrix:
   TN: 3  FP: 27
   FN: 1  TP: 23


In [396]:
# df_imp = pd.DataFrame({"feature": X_train.columns, "imp": model.feature_importances_}).sort_values("imp", ascending=False)

In [351]:
# df_imp.head(10).feature.tolist()

In [352]:
df_preds = pd.DataFrame({"y_test": y_test, "y_pred": y_pred, "y_proba": y_proba})

In [353]:
df_preds.tail(10)

Unnamed: 0,y_test,y_pred,y_proba
44,0,0,0.492486
45,0,0,0.492486
46,1,0,0.492486
47,1,0,0.492486
48,0,0,0.492486
49,1,0,0.492486
50,0,0,0.492486
51,0,0,0.492486
52,1,0,0.492486
53,1,0,0.492486


In [243]:
df_preds.head(10)

Unnamed: 0,y_test,y_pred,y_proba
0,0,0,0.485039
1,1,0,0.485039
2,0,0,0.485039
3,1,0,0.485039
4,1,0,0.485039
5,1,0,0.485039
6,1,0,0.485039
7,1,0,0.485039
8,0,0,0.485039
9,0,0,0.485039


In [213]:
df_preds.y_proba.max()

0.7106407139703336

In [214]:
df_preds[df_preds.y_proba > 0.7]

Unnamed: 0,y_test,y_pred,y_proba
44,0,1,0.710641
102,1,1,0.703203
