In [136]:
# %%
## üìå Configuraci√≥n Inicial y Reproducibilidad
import numpy as np
import tensorflow as tf
import random

np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

import warnings
warnings.filterwarnings('ignore')


In [137]:
# %%
## üìå Importar Librer√≠as y Funciones de Descarga y Preprocesamiento

import numpy as np
import pandas as pd
import datetime
import time
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.fundamentaldata import FundamentalData
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import plotly.graph_objects as go
import yfinance as yf

# --- API Key y funci√≥n para descargar datos ---
ALPHA_VANTAGE_API_KEY = "6XE23J2QP58EE8L7"

def obtener_datos_alpha_vantage(ticker, api_key, intentos=5):
    ts = TimeSeries(key=api_key, output_format='pandas')
    for i in range(intentos):
        try:
            print(f"Descargando datos para {ticker} (Intento {i+1}/{intentos})...")
            data, _ = ts.get_daily(symbol=ticker, outputsize='full')
            data.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
            data.index = pd.to_datetime(data.index)
            return data.sort_index()
        except Exception as e:
            print(f"Error: {e}")
            time.sleep(5)
    raise Exception("No se pudieron descargar los datos.")


In [138]:
# %%
## üìå Configuraci√≥n del Ticker, Descarga de Datos e Indicadores T√©cnicos
ticker = 'TTD'
start_date = datetime.datetime(2022, 1, 1)
data = obtener_datos_alpha_vantage(ticker, ALPHA_VANTAGE_API_KEY)
data = data.loc[data.index >= start_date]
print(f"√öltima fecha disponible en los datos: {data.index.max()}")

# --- C√°lculo del RSI ---
window_rsi = 14
delta = data['Close'].diff(1)
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain, index=data.index).ewm(span=window_rsi, adjust=False).mean()
avg_loss = pd.Series(loss, index=data.index).ewm(span=window_rsi, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-10)
data['RSI'] = 100 - (100 / (1 + rs))

# --- MACD, L√≠nea de Se√±al y Histograma ---
data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()
data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean()
data['MACD_Histogram'] = data['MACD'] - data['Signal_Line']

# --- Estoc√°stico ---
data['%K'] = 100 * (data['Close'] - data['Low'].rolling(14).min()) / (data['High'].rolling(14).max() - data['Low'].rolling(14).min())
data['%D'] = data['%K'].rolling(3).mean()

# --- √çndice de Capital Institucional (ICI) basado en Volumen ---
data['Volume_MA_50'] = data['Volume'].rolling(window=50).mean()
data['Institutional_Index'] = data['Volume'] / data['Volume_MA_50']

# --- OBV ---
data['OBV'] = (np.sign(data['Close'].diff()) * data['Volume']).fillna(0).cumsum()

# --- Definir Entrada y Salida de Capital ---
entrada_umbral = 2.0
salida_umbral = 0.5
data['Entrada_Capital'] = (data['Institutional_Index'] > entrada_umbral).astype(int)
data['Salida_Capital'] = (data['Institutional_Index'] < salida_umbral).astype(int)


Descargando datos para TTD (Intento 1/5)...
√öltima fecha disponible en los datos: 2025-03-05 00:00:00


In [139]:
# %%
## üìå Preprocesamiento de Datos y Normalizaci√≥n
data.fillna(method='ffill', inplace=True)
data.dropna(inplace=True)

# --- Seleccionar caracter√≠sticas ---
features = ['RSI', 'MACD', 'Signal_Line', '%K', '%D', 'Volume', 'Institutional_Index', 'OBV']
X = data[features].values
y = data['Entrada_Capital'].values  

# --- Normalizaci√≥n ---
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

# Definir input_dim para el modelo LSTM
input_dim = X_scaled.shape[1]


In [140]:
# %%
## Wrapper Personalizado para KerasClassifier (Soluci√≥n para Python 3.12)
from scikeras.wrappers import KerasClassifier
from types import SimpleNamespace

class MyKerasClassifier(KerasClassifier):
    def _more_tags(self):
        return {
            "estimator_type": "classifier",
            "requires_y": True,
            "classifier_tags": {"multi_class": False, "multi_label": False}
        }



In [141]:
# %%
## üìå Pre-compilaci√≥n de predict para reducir retracing
dummy_input = np.reshape(X_scaled[:1], (1, 1, X_scaled.shape[1]))
_ = build_lstm_model(64, 25, 0.2, 0.001).predict(dummy_input)


[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 241ms/step


In [142]:
# %%
## Funci√≥n para Optimizar el Modelo LSTM con RandomizedSearchCV (parche para __sklearn_tags__)
def optimize_lstm(X_train, y_train, X_val, y_val, param_dist, cv_splits=3):
    # Configuramos EarlyStopping para evitar sobreentrenar
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
    # Creamos la instancia del wrapper personalizado
    lstm_classifier = MyKerasClassifier(model=build_lstm_model, fit__callbacks=[early_stop], verbose=0)
    
    # Forzamos que la instancia devuelva las tags esperadas usando SimpleNamespace
    from types import SimpleNamespace
    lstm_classifier.__sklearn_tags__ = lambda: SimpleNamespace(
        estimator_type="classifier",
        requires_y=True,
        classifier_tags=SimpleNamespace(multi_class=False, multi_label=False)
    )
    
    from sklearn.model_selection import RandomizedSearchCV
    random_search = RandomizedSearchCV(estimator=lstm_classifier,
                                       param_distributions=param_dist,
                                       cv=TimeSeriesSplit(n_splits=cv_splits),
                                       scoring='accuracy',
                                       n_iter=5,
                                       random_state=42)
    random_search.fit(X_train, y_train)
    return random_search.best_estimator_.model, random_search.best_params_


In [143]:
# %%
## üìå Configuraci√≥n de Variables para la Validaci√≥n Cruzada
fold = 1
metrics_list = []
best_params_folds = []
y_test_final = None
y_pred_final = None

from sklearn.model_selection import TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)


In [144]:
# %%
## üìå Bucle de Validaci√≥n Cruzada y Optimizaci√≥n

for train_index, test_index in tscv.split(X_scaled):
    print(f"\n--- Fold {fold} ---")
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # --- Aplicar SMOTE si hay al menos 2 muestras de la clase minoritaria ---
    minority_class_samples = np.sum(y_train == 1)
    if minority_class_samples >= 2:
        print(f"Aplicando SMOTE en Fold {fold} con {minority_class_samples} muestras de la clase minoritaria...")
        from imblearn.over_sampling import SMOTE
        smote = SMOTE(random_state=42, k_neighbors=min(3, minority_class_samples-1))
        X_train, y_train = smote.fit_resample(X_train, y_train)
    
    # --- Remodelar datos para LSTM ---
    X_train_reshaped = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test_reshaped = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    
    # --- Optimizaci√≥n de hiperpar√°metros para LSTM ---
    param_dist_lstm = {
        'neurons1': [64, 128],
        'neurons2': [25, 50],
        'dropout': [0.2, 0.3],
        'learning_rate': [0.001, 0.0005],
        'batch_size': [16, 32],
        'epochs': [30, 50]
    }
    best_lstm_model, best_params = optimize_lstm(X_train_reshaped, y_train, X_test_reshaped, y_test, param_dist_lstm)
    print(f"Fold {fold} - Mejores par√°metros LSTM: {best_params}")
    
    # --- Generar caracter√≠sticas usando el mejor modelo LSTM ---
    X_train_lstm_features = best_lstm_model.predict(X_train_reshaped).flatten()
    X_test_lstm_features = best_lstm_model.predict(X_test_reshaped).flatten()
    
    # --- Optimizaci√≥n de hiperpar√°metros para XGBoost ---
    param_dist_xgb = {
        'n_estimators': [100, 200],
        'learning_rate': [0.1, 0.05],
        'max_depth': [6, 8],
        'scale_pos_weight': [10, 15]
    }
    from sklearn.model_selection import RandomizedSearchCV
    grid_xgb = RandomizedSearchCV(estimator=xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
                                  param_distributions=param_dist_xgb, 
                                  cv=TimeSeriesSplit(n_splits=3), 
                                  scoring='accuracy', 
                                  n_iter=5, 
                                  random_state=42)
    grid_xgb.fit(X_train_lstm_features.reshape(-1, 1), y_train)
    print(f"Fold {fold} - Mejores par√°metros XGBoost: {grid_xgb.best_params_}")
    
    best_params_folds.append({
        'fold': fold,
        'lstm_params': best_params,
        'xgb_params': grid_xgb.best_params_
    })
    
    best_xgb_model = grid_xgb.best_estimator_
    y_pred = best_xgb_model.predict(X_test_lstm_features.reshape(-1, 1))
    
    # --- Evaluaci√≥n del fold ---
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_pred)
    print(f"Fold {fold} - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}, AUC-ROC: {auc:.4f}")
    
    metrics_list.append({
        'fold': fold,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1,
        'auc_roc': auc
    })
    
    y_test_final = y_test
    y_pred_final = y_pred
    
    fold += 1

avg_metrics = {key: np.mean([m[key] for m in metrics_list]) for key in ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc']}
print("\n--- M√©tricas Promedio en Cross-Validation ---")
for metric, value in avg_metrics.items():
    print(f"{metric}: {value:.4f}")

print("\n--- Mejores Par√°metros por Fold ---")
for params in best_params_folds:
    print(params)



--- Fold 1 ---
Aplicando SMOTE en Fold 1 con 7 muestras de la clase minoritaria...


AttributeError: 'dict' object has no attribute 'classifier_tags'

In [None]:
# %%
## üìå Visualizaci√≥n: Matriz de Confusi√≥n para el √∫ltimo fold
import seaborn as sns
plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test_final, y_pred_final), annot=True, fmt="d", cmap="Blues")
plt.xlabel('Predicho')
plt.ylabel('Real')
plt.title('Matriz de Confusi√≥n - XGBoost (√öltimo Fold)')
plt.show()


In [None]:
# %%
## üìå Visualizaci√≥n: Gr√°fico del RSI
plt.figure(figsize=(12, 5))
plt.plot(data.index, data['RSI'], color='purple', label="RSI (14)")
plt.axhline(70, linestyle='--', color='red', alpha=0.5, label="Sobrecompra (70)")
plt.axhline(30, linestyle='--', color='green', alpha=0.5, label="Sobreventa (30)")
plt.ylim(0, 100)
plt.title(f'√çndice de Fuerza Relativa (RSI) de {ticker}')
plt.legend()
plt.show()


In [None]:
# %%
## üìå Visualizaci√≥n: Gr√°fico MACD
plt.figure(figsize=(12, 5))
plt.bar(data.index, data['MACD_Histogram'], color='gray', label="MACD Histogram")
plt.plot(data.index, data['MACD'], color='blue', label="MACD Line")
plt.plot(data.index, data['Signal_Line'], color='orange', label="L√≠nea de Se√±al")
plt.title(f'MACD de {ticker}')
plt.legend()
plt.show()


In [None]:
# %%
## üìå Visualizaci√≥n: Gr√°fico del OBV
plt.figure(figsize=(12, 5))
plt.plot(data.index, data['OBV'], color='blue', label="OBV")
plt.xlabel("Fecha")
plt.ylabel("OBV")
plt.title(f'On Balance Volume (OBV) de {ticker}')
plt.legend()
plt.show()


In [None]:
# %%
## üìå Datos Fundamentales desde Alpha Vantage y asignaci√≥n de 'Valoraci√≥n'
if 'Valoraci√≥n' not in data.columns:
    print("‚ö†Ô∏è Advertencia: La columna 'Valoraci√≥n' no existe en el DataFrame. Se asignar√° un valor por defecto.")
    data['Valoraci√≥n'] = np.nan

def obtener_valuacion_alpha(ticker, api_key):
    try:
        fd = FundamentalData(key=api_key, output_format='pandas')
        data_fundamental, _ = fd.get_company_overview(symbol=ticker)
        market_cap = float(data_fundamental.loc['MarketCapitalization'].values[0])
        return market_cap
    except Exception as e:
        print(f"‚ùå Error obteniendo datos fundamentales de Alpha Vantage: {e}")
        return np.nan

valuation_index = obtener_valuacion_alpha(ticker, ALPHA_VANTAGE_API_KEY)
if not np.isnan(valuation_index):
    data['Valoraci√≥n'] = np.full(len(data), valuation_index, dtype=np.float64)
else:
    data['Valoraci√≥n'] = 2.5  # Valor por defecto


In [None]:
# %%
## üìå Nueva condici√≥n de compra con OBV y otros indicadores
buy_signal = (data['RSI'] < 30) & \
             (data['%K'] < 20) & \
             (data['MACD'] < -5) & \
             (data['Valoraci√≥n'] < 2.5) & \
             (data['OBV'].diff() > 0)
data['Se√±al_Compra'] = 0
data.loc[buy_signal, 'Se√±al_Compra'] = 1


In [None]:
# %%
## üìå Gr√°fico de Velas Japonesas con Se√±ales Institucionales usando Plotly
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Candlestick(
    x=data.index, 
    open=data['Open'], 
    high=data['High'], 
    low=data['Low'], 
    close=data['Close'], 
    name='Precio'
))
fig.add_trace(go.Scatter(
    x=data.index[data['Entrada_Capital'] == 1], 
    y=data['Close'][data['Entrada_Capital'] == 1], 
    mode='markers', 
    marker=dict(size=10, color='green', symbol='triangle-up'), 
    name='Entrada de Capital'
))
fig.add_trace(go.Scatter(
    x=data.index[data['Salida_Capital'] == 1], 
    y=data['Close'][data['Salida_Capital'] == 1], 
    mode='markers', 
    marker=dict(size=10, color='blue', symbol='triangle-down'), 
    name='Salida de Capital'
))
fig.add_trace(go.Scatter(
    x=data.index[data['Se√±al_Compra'] == 1], 
    y=data['Close'][data['Se√±al_Compra'] == 1], 
    mode='markers', 
    marker=dict(size=10, color='yellow', symbol='triangle-up'), 
    name='Se√±al de Compra'
))
fig.update_layout(
    title=f'Se√±ales Institucionales para {ticker}',
    xaxis_title='Fecha',
    yaxis_title='Precio',
    xaxis_rangeslider_visible=False,
    template='plotly_dark',
    height=600,
    width=1000
)
fig.show()


In [None]:
# %%
## üìå Verificar valores de 'Valoraci√≥n'
print("Valores de 'Valoraci√≥n':")
print(data[['Valoraci√≥n']].dropna().head())


In [None]:
# %%
## üìå Obtenci√≥n de datos fundamentales desde Yahoo Finance
def obtener_valuacion_yahoo(ticker):
    try:
        print(f"üìä Descargando datos fundamentales para {ticker} desde Yahoo Finance...")
        stock = yf.Ticker(ticker)
        info = stock.info
        pe_ratio = info.get("trailingPE", None)
        pb_ratio = info.get("priceToBook", None)
        ev_to_ebitda = info.get("enterpriseToEbitda", None)
        valid_metrics = [m for m in [pe_ratio, pb_ratio, ev_to_ebitda] if m is not None]
        valuation_index = sum(valid_metrics) / len(valid_metrics) if valid_metrics else None
        return pe_ratio, pb_ratio, ev_to_ebitda, valuation_index
    except Exception as e:
        print(f"‚ùå Error obteniendo datos fundamentales: {e}")
        return None, None, None, None

pe_ratio, pb_ratio, ev_to_ebitda, valuation_index_yahoo = obtener_valuacion_yahoo(ticker)
if valuation_index_yahoo is not None:
    print(f"üìä P/E Ratio: {pe_ratio if pe_ratio else 'N/A'} | P/B Ratio: {pb_ratio if pb_ratio else 'N/A'} | EV/EBITDA: {ev_to_ebitda if ev_to_ebitda else 'N/A'}")
    print(f"üìä √çndice de Valoraci√≥n (Yahoo): {valuation_index_yahoo:.2f}")
    if valuation_index_yahoo > 2.5:
        print("‚ö†Ô∏è La empresa est√° MUY SOBREVALORADA üìà (Riesgo de ca√≠da)")
    elif valuation_index_yahoo < 1:
        print("‚úÖ La empresa est√° INFRAVALORADA üìâ (Oportunidad de compra)")
    else:
        print("üîç La empresa tiene una valoraci√≥n razonable.")
else:
    print("‚ùå No se pudo calcular el √≠ndice de valoraci√≥n desde Yahoo Finance.")


In [None]:
# %%
## üìå Agregar 'Valoraci√≥n' como caracter√≠stica y actualizar el conjunto de entrenamiento
features = ['RSI', 'MACD', 'Signal_Line', '%K', '%D', 'Volume', 'Institutional_Index', 'Valoraci√≥n']
X = data[features].values


In [None]:
# %%
## üìå Gr√°fico del √çndice de Valoraci√≥n con suavizado
plt.figure(figsize=(12, 5))
data['Valoraci√≥n_Suave'] = data['Valoraci√≥n'].rolling(window=5).mean()
plt.plot(data.index, data['Valoraci√≥n_Suave'], color='blue', linewidth=2, label="√çndice de Valoraci√≥n (Suavizado)")
min_valor = data['Valoraci√≥n'].min()
max_valor = data['Valoraci√≥n'].max()
margen = (max_valor - min_valor) * 0.1
plt.ylim(min_valor - margen, max_valor + margen)
plt.axhline(2.5, linestyle='--', color='red', alpha=0.5, label="Sobrevalorada (>2.5)")
plt.axhline(1, linestyle='--', color='green', alpha=0.5, label="Infravalorada (<1)")
plt.title(f'√çndice de Valoraci√≥n de {ticker}')
plt.legend()
plt.show()
