<a href="https://colab.research.google.com/github/dante161121/colapPraboomMil/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nest_asyncio




Instala la librería nest_asyncio

In [None]:
import nest_asyncio, asyncio, json, websockets
import pandas as pd

nest_asyncio.apply()

APP_ID = 127820
TOKEN = "EkjypVRBbLlJhFA"

# buffer global donde se guardan los ticks
ticks_buffer = []

async def connect_and_stream(symbol="BOOM1000", n_ticks=50, timeout=10):
    url = f"wss://ws.derivws.com/websockets/v3?app_id={APP_ID}"
    print("Conectando a:", url)

    async with websockets.connect(url, ping_interval=None, close_timeout=timeout) as ws:
        # 1) Autorizar
        auth_req = {"authorize": TOKEN}
        print("Enviando authorize:", auth_req)
        await ws.send(json.dumps(auth_req))

        auth_resp = json.loads(await ws.recv())
        print("Auth msg_type:", auth_resp.get("msg_type"))
        if auth_resp.get("error"):
            print("ERROR en authorize:", auth_resp["error"])
            return
        print("Autorizado. Login ID:", auth_resp["authorize"]["loginid"])

        # 2) Suscribirse a ticks
        sub_req = {"ticks": symbol, "subscribe": 1}
        print("Enviando suscripción:", sub_req)
        await ws.send(json.dumps(sub_req))

        # 3) Leer n_ticks mensajes y guardarlos en ticks_buffer
        for i in range(n_ticks):
            print(f"Esperando tick {i+1}...")
            msg_raw = await asyncio.wait_for(ws.recv(), timeout=timeout)
            msg = json.loads(msg_raw)

            if msg.get("msg_type") == "tick":
                tick = msg["tick"]
                row = {
                    "symbol": tick["symbol"],
                    "epoch": tick["epoch"],
                    "quote": tick["quote"]
                }
                ticks_buffer.append(row)
                print(f"{i+1} | {row}")
            else:
                print(f"Mensaje {i+1} (no tick):", msg.get("msg_type"))

# Ejecutar: recoge 200 ticks de BOOM1000
asyncio.run(connect_and_stream(symbol="BOOM1000", n_ticks=200, timeout=10))


Conectando a: wss://ws.derivws.com/websockets/v3?app_id=127820
Enviando authorize: {'authorize': 'EkjypVRBbLlJhFA'}
Auth msg_type: authorize
Autorizado. Login ID: VRTC4588120
Enviando suscripción: {'ticks': 'BOOM1000', 'subscribe': 1}
Esperando tick 1...
1 | {'symbol': 'BOOM1000', 'epoch': 1771693231, 'quote': 14619.286}
Esperando tick 2...
2 | {'symbol': 'BOOM1000', 'epoch': 1771693232, 'quote': 14619.279}
Esperando tick 3...
3 | {'symbol': 'BOOM1000', 'epoch': 1771693233, 'quote': 14619.256}
Esperando tick 4...
4 | {'symbol': 'BOOM1000', 'epoch': 1771693234, 'quote': 14619.242}
Esperando tick 5...
5 | {'symbol': 'BOOM1000', 'epoch': 1771693235, 'quote': 14619.218}
Esperando tick 6...
6 | {'symbol': 'BOOM1000', 'epoch': 1771693236, 'quote': 14619.193}
Esperando tick 7...
7 | {'symbol': 'BOOM1000', 'epoch': 1771693237, 'quote': 14619.181}
Esperando tick 8...
8 | {'symbol': 'BOOM1000', 'epoch': 1771693238, 'quote': 14619.169}
Esperando tick 9...
9 | {'symbol': 'BOOM1000', 'epoch': 17716

para recoger 200 ticks y dejarlos almacenados en ticks_buffer

In [None]:
df = pd.DataFrame(ticks_buffer)
df["datetime"] = pd.to_datetime(df["epoch"], unit="s")
df.set_index("datetime", inplace=True)
df.head()


Unnamed: 0_level_0,symbol,epoch,quote
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2026-02-21 17:00:31,BOOM1000,1771693231,14619.286
2026-02-21 17:00:32,BOOM1000,1771693232,14619.279
2026-02-21 17:00:33,BOOM1000,1771693233,14619.256
2026-02-21 17:00:34,BOOM1000,1771693234,14619.242
2026-02-21 17:00:35,BOOM1000,1771693235,14619.218


Bloque 2 – Construir el DataFrame base

In [None]:
import numpy as np

# 1) Retorno logarítmico por tick
df["ret_log"] = np.log(df["quote"]).diff()

# 2) Volatilidad rolling (filtro de volatilidad)
df["vol_50"] = df["ret_log"].rolling(50).std()

# 3) Medias móviles sobre ticks (tendencia)
df["ma_20"] = df["quote"].rolling(20).mean()
df["ma_50"] = df["quote"].rolling(50).mean()

# 4) Señal de cruce de medias (trend-following básico)
df["signal_ma"] = np.where(df["ma_20"] > df["ma_50"], 1, -1)

# 5) Sesión intradía (para trend-following por sesión)
# Suponiendo hora UTC; ajusta si hace falta
hour = df.index.hour
df["session"] = np.select(
    [
        (hour >= 0) & (hour < 8),   # Asia
        (hour >= 8) & (hour < 16),  # Europa
        (hour >= 16) & (hour < 24)  # América
    ],
    ["asia", "europa", "america"],
    default="otro"
)

# 6) Detección simple de spikes (para probabilidad de spike y mean reversion)
# Umbral de spike -> ajusta este número según tus datos
spike_threshold = df["quote"].diff().quantile(0.995)
df["is_spike"] = (df["quote"].diff() >= spike_threshold).astype(int)

# 7) Tiempo desde el último spike (modelo de 'spike intensity')
df["ticks_since_spike"] = df["is_spike"].groupby((df["is_spike"] == 1).cumsum()).cumcount()

df.tail()


Unnamed: 0_level_0,symbol,epoch,quote,ret_log,vol_50,ma_20,ma_50,signal_ma,session,is_spike,ticks_since_spike
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2026-02-21 17:03:46,BOOM1000,1771693426,14616.266,-8.210028e-07,7.878285e-07,14616.44145,14616.67604,-1,america,0,106
2026-02-21 17:03:47,BOOM1000,1771693427,14616.246,-1.368339e-06,7.86085e-07,14616.42305,14616.65936,-1,america,0,107
2026-02-21 17:03:48,BOOM1000,1771693428,14616.245,-6.841702e-08,7.994319e-07,14616.40505,14616.6429,-1,america,0,108
2026-02-21 17:03:49,BOOM1000,1771693429,14616.219,-1.778844e-06,8.044108e-07,14616.3869,14616.6263,-1,america,0,109
2026-02-21 17:03:50,BOOM1000,1771693430,14616.2,-1.299927e-06,7.982322e-07,14616.36925,14616.60986,-1,america,0,110


Bloque 3 – Features cuantitativas básicas

Construir el DataFrame base

In [None]:
# Horizonte de predicción: próximos 10 ticks (ajusta a tu gusto)
horizon = 10

# Spike futuro: 1 si hay al menos un spike en los próximos 'horizon' ticks
df["future_spike"] = (
    df["is_spike"]
    .shift(-horizon)
    .rolling(horizon)
    .max()
)

df["future_spike"] = df["future_spike"].fillna(0).astype(int)
df[["is_spike", "future_spike"]].tail()


Unnamed: 0_level_0,is_spike,future_spike
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2026-02-21 17:03:46,0,0
2026-02-21 17:03:47,0,0
2026-02-21 17:03:48,0,0
2026-02-21 17:03:49,0,0
2026-02-21 17:03:50,0,0


Bloque 4 – Definir objetivo para Random Forest
Necesitamos decirle al modelo qué queremos predecir. Por ejemplo:

Objetivo: “¿habrá spike en los próximos N ticks?” (clasificación binaria).

In [None]:
# Codificar la sesión como variables dummy (one-hot)
df_dummies = pd.get_dummies(df, columns=["session"], drop_first=False)

# Expected session dummy columns
expected_session_dummies = ["session_asia", "session_europa", "session_america"]

# Add missing session dummy columns with 0 if they don't exist
for col in expected_session_dummies:
    if col not in df_dummies.columns:
        df_dummies[col] = 0

# Lista de features cuantitativas
feature_cols = [
    "quote",
    "ret_log",
    "vol_50",
    "ma_20",
    "ma_50",
    "signal_ma",
    "is_spike",
    "ticks_since_spike",
    # dummies de sesión:
    "session_asia",
    "session_europa",
    "session_america",
]

# Filtrar solo filas sin NaNs (por rolling)
data = df_dummies.dropna(subset=feature_cols + ["future_spike"]).copy()

X = data[feature_cols]
y = data["future_spike"]

X.head(), y.value_counts()

(                         quote       ret_log        vol_50        ma_20  \
 datetime                                                                  
 2026-02-21 17:01:21  14618.612 -2.052178e-07  6.269759e-07  14618.74010   
 2026-02-21 17:01:22  14618.609 -2.052179e-07  6.320966e-07  14618.72675   
 2026-02-21 17:01:23  14618.586 -1.573338e-06  6.320981e-07  14618.71260   
 2026-02-21 17:01:24  14618.584 -1.368121e-07  6.415987e-07  14618.69880   
 2026-02-21 17:01:25  14618.562 -1.504935e-06  6.386584e-07  14618.68470   
 
                            ma_50  signal_ma  is_spike  ticks_since_spike  \
 datetime                                                                   
 2026-02-21 17:01:21  14618.93438         -1         0                 50   
 2026-02-21 17:01:22  14618.92098         -1         0                 51   
 2026-02-21 17:01:23  14618.90758         -1         0                 52   
 2026-02-21 17:01:24  14618.89442         -1         0                 53   
 202

Bloque 5 – Preparar dataset para Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Train / test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=False  # respetar orden temporal
)

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

# Evaluación básica
y_pred = rf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[45]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        45

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45





Bloque 6 – Entrenar el modelo Random Forest

In [None]:
# Probabilidad estimada de spike
# Check if the model was trained on more than one class, specifically if class 1 is known
if 1 in rf.classes_:
    # If class 1 is known, get its probability
    proba_spike = rf.predict_proba(X_test)[:, list(rf.classes_).index(1)]
else:
    # If class 1 was not in the training data (e.g., y_train contained only class 0),
    # then the probability of spike (class 1) should be considered 0.
    proba_spike = np.zeros(len(X_test))

# Crear un DataFrame alineado con X_test
signal_df = data.loc[X_test.index].copy()
signal_df["proba_spike"] = proba_spike

# Ejemplo de regla:
# - Entrar largo sólo si probabilidad de spike es alta
# - Y la media corta está encima de la larga (tendencia a tu favor)
# - Y volatilidad en rango razonable (no demasiado baja)
p_thr = 0.5  # umbral de probabilidad, ajusta
vol_min, vol_max = signal_df["vol_50"].quantile([0.1, 0.9])

signal_df["signal_model"] = 0
signal_df.loc[
    (signal_df["proba_spike"] >= p_thr) &
    (signal_df["ma_20"] > signal_df["ma_50"]) &
    (signal_df["vol_50"].between(vol_min, vol_max)),
    "signal_model"
] = 1

# Retorno logarítmico alineado
signal_df["ret_log"] = df.loc[signal_df.index, "ret_log"]

# Retorno de la estrategia usando la señal del modelo (shift para evitar look-ahead)
signal_df["ret_strategy"] = signal_df["signal_model"].shift(1) * signal_df["ret_log"]

# Curva de equity
signal_df["equity"] = signal_df["ret_strategy"].cumsum().apply(np.exp)

signal_df[["proba_spike", "signal_model", "equity"]].tail()

Unnamed: 0_level_0,proba_spike,signal_model,equity
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2026-02-21 17:03:46,0.36,0,1.0
2026-02-21 17:03:47,0.355,0,1.0
2026-02-21 17:03:48,0.43,0,1.0
2026-02-21 17:03:49,0.44,0,1.0
2026-02-21 17:03:50,0.425,0,1.0


Bloque 7 – Convertir predicción en estrategia y equity
Celda 8 (señal del modelo + combinación con medias / filtros)

In [None]:
# Retornos y estadística básica
ret = signal_df["ret_strategy"].dropna()

mean_ret = ret.mean()
vol_ret = ret.std()
sharpe = mean_ret / vol_ret * np.sqrt(252*24*60*60)  # ajusta si consideras ticks/segundos

max_dd = (signal_df["equity"].cummax() - signal_df["equity"]).max()

print("Retorno medio por tick:", mean_ret)
print("Volatilidad estrategia:", vol_ret)
print("Sharpe aproximado:", sharpe)
print("Max drawdown:", max_dd)


Retorno medio por tick: 0.0
Volatilidad estrategia: 0.0
Sharpe aproximado: nan
Max drawdown: 0.0


  sharpe = mean_ret / vol_ret * np.sqrt(252*24*60*60)  # ajusta si consideras ticks/segundos


Bloque 8 – Estadísticas del modelo / estrategia

In [None]:
signal_df.to_csv("boom1000_rf_features_signals.csv")
