In [241]:
import logging
import pandas as pd
import pickle

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

from src.constants.constants import FEATURES_PATH, MODELS_PATH


In [32]:
ft_imp = ['rsi_14',
 'bb_width',
 'volume_ratio',
 'day_of_month',
 'macd_histogram',
 'price_diff_1',
 'volume_sma_20',
 'dist_to_min_14d',
 'sma_14',
 'volatility_14']

In [59]:
X_train = pd.read_parquet(FEATURES_PATH / "X_train_scaled.parquet")#[ft_imp]
y_train = pd.read_parquet(FEATURES_PATH / "y_train.parquet")['target']


In [143]:
X_train.head(2)

Unnamed: 0,sma_7,sma_14,sma_21,sma_50,sma_200,ema_12,ema_26,rsi_14,macd,macd_signal,...,year,day_sin,day_cos,month_sin,month_cos,is_weekend,is_month_start,is_month_end,is_quarter_start,is_quarter_end
0,-2.342933,-2.192504,-2.144133,-1.939454,-1.491174,-2.296576,-2.185962,-2.12656,-0.715491,-0.513344,...,-1.756551,-0.609265,-1.271214,-1.240593,-0.203508,-0.633845,-0.179993,-0.179993,-0.102815,-0.102815
1,-2.384752,-2.235595,-2.159306,-1.954108,-1.490596,-2.332227,-2.208738,-2.093536,-0.804216,-0.580138,...,-1.756551,-1.374201,-0.31146,-1.240593,-0.203508,1.577673,-0.179993,-0.179993,-0.102815,-0.102815


In [359]:
model = GradientBoostingClassifier(
    n_estimators=10,
    learning_rate=0.05,
    max_depth=3,
    random_state=42,
    warm_start=True,
    verbose=0,
    subsample=0.8,
    validation_fraction=0.5,
)

In [373]:
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=3,
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42,
    class_weight='balanced',
    verbose=0,
    warm_start=False,
    max_features=0.7,
    oob_score=True,
    n_jobs=-1  # Usar todos los cores
)

In [335]:
model = LogisticRegression(
    max_iter=5,
    random_state=42,
    n_jobs=1,
    fit_intercept=False,
    class_weight='balanced'  # Para manejar desbalance de clases
)

In [313]:
model = MLPClassifier(
    hidden_layer_sizes=(100,), activation='relu', solver='adam',
    alpha=0.001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, 
    power_t=0.5, max_iter=10000, shuffle=False, random_state=42, tol=0.0001, verbose=False, 
    warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=True, 
    validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=100, max_fun=15000
)

In [289]:
model = AdaBoostClassifier(estimator=None, n_estimators=10, learning_rate=1, random_state=42)

In [374]:
model.fit(X_train, y_train)

In [375]:
y_train_pred = model.predict(X_train)

In [376]:
train_accuracy = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)

In [377]:
print(f"âœ… Modelo entrenado")
print(f"ðŸ“Š MÃ©tricas en TRAIN:")
print(f"   Accuracy: {train_accuracy:.4f}")
print(f"   Precision: {train_precision:.4f}")
print(f"   Recall: {train_recall:.4f}")
print(f"   F1-Score: {train_f1:.4f}")

âœ… Modelo entrenado
ðŸ“Š MÃ©tricas en TRAIN:
   Accuracy: 0.7908
   Precision: 0.7944
   Recall: 0.8008
   F1-Score: 0.7976


In [378]:
X_test = pd.read_parquet(FEATURES_PATH / "X_test_scaled.parquet")#[ft_imp]
y_test = pd.read_parquet(FEATURES_PATH / "y_test.parquet")['target']


In [379]:
# Predicciones
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [380]:
#df_preds = pd.DataFrame({"y_test": y_test, "y_pred": y_pred, "y_proba": y_proba})

In [381]:
# y_pred = [1 if x > 0.75 else 0 for x in df_preds.y_proba]

In [382]:
# MÃ©tricas
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

print(f"ðŸ“Š MÃ©tricas en TEST:")
print(f"   Accuracy: {accuracy:.4f}")
print(f"   Precision: {precision:.4f}")
print(f"   Recall: {recall:.4f}")
print(f"   F1-Score: {f1:.4f}")
print(f"   ROC-AUC: {roc_auc:.4f}")
print(f"\nðŸ“Š Confusion Matrix:")
print(f"   TN: {cm[0,0]}  FP: {cm[0,1]}")
print(f"   FN: {cm[1,0]}  TP: {cm[1,1]}")

ðŸ“Š MÃ©tricas en TEST:
   Accuracy: 0.3704
   Precision: 0.3864
   Recall: 0.7083
   F1-Score: 0.5000
   ROC-AUC: 0.3611

ðŸ“Š Confusion Matrix:
   TN: 3  FP: 27
   FN: 7  TP: 17


In [369]:
df_imp = pd.DataFrame({"feature": X_train.columns, "imp": model.feature_importances_}).sort_values("imp", ascending=False)

In [370]:
df_imp.head(10).feature.tolist()

['rsi_14',
 'sma_200',
 'volume_ratio',
 'bb_width',
 'close_lag_2',
 'macd_signal',
 'close_lag_1',
 'min_close_7d',
 'sma_14',
 'price_diff_3']

In [371]:
df_preds = pd.DataFrame({"y_test": y_test, "y_pred": y_pred, "y_proba": y_proba})

In [372]:
df_preds.head(10)

Unnamed: 0,y_test,y_pred,y_proba
0,0,0,0.489948
1,1,1,0.551181
2,0,0,0.461211
3,1,0,0.461211
4,1,0,0.478736
5,1,1,0.527281
6,1,0,0.478736
7,1,0,0.495385
8,0,1,0.524736
9,0,1,0.524736


In [267]:
df_preds.shape

(54, 3)

In [268]:
df_preds.tail(10)

Unnamed: 0,y_test,y_pred,y_proba
44,0,0,0.32677
45,0,0,0.333828
46,1,0,0.333828
47,1,0,0.326876
48,0,0,0.327249
49,1,0,0.333183
50,0,0,0.326141
51,0,0,0.330517
52,1,0,0.330014
53,1,0,0.331094


In [131]:
df_preds.y_proba.max()

0.7100403790310092

In [132]:
df_preds[df_preds.y_proba > 0.7]

Unnamed: 0,y_test,y_pred,y_proba
37,0,1,0.71004
