In [None]:
def get_thresholds():
    data = []
    with open('./bot/config/thresholds.txt', 'r') as file:
        for line in file:
            line = line.strip()  # Удаляем пробелы и переносы строк
            if line:  # Игнорируем пустые строки
                # Преобразуем строку в кортеж с помощью literal_eval
                tuple_data = ast.literal_eval(line)
                data.append(tuple_data)
    return data

In [None]:
from bot.core.exchange.http_api import ExchangeManager, BybitRestAPI, GateIORestAPI

import pandas as pd
import polars as pl
import numpy as np
# pd.options.display.float_format = '{:.2f}'.format
from datetime import datetime, timezone, timedelta

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm

from bot.utils.files import get_saved_coins

In [None]:
async def get_data(symbol, interval, n_iters, exchange):
    df = await exc_manager.get_candles(symbol=symbol, interval=interval, n_iters=n_iters)
    df = df[f'{exchange}_linear'].sort_index()
    return df.dropna()

In [None]:
exc_manager = ExchangeManager()
exc_manager.add_market("bybit_linear", BybitRestAPI('linear'))
coin_information = exc_manager.get_instrument_data()

In [None]:
exchange = 'bybit'

token_list = [
            '1INCH_USDT',
            'ACH_USDT', 'ADA_USDT', 'AGLD_USDT', 'ALGO_USDT', 'APT_USDT', 'ARB_USDT', 'ARKM_USDT', 'ATOM_USDT',
            'BLUR_USDT',
            'CELO_USDT', 'CHZ_USDT', 'CRV_USDT',
            'DOGE_USDT', 'DOT_USDT', 'DRIFT_USDT', 'DYDX_USDT',
            'EGLD_USDT', 'ENA_USDT', 'ETHFI_USDT', 'FARTCOIN_USDT', 'FLOW_USDT',
            'GALA_USDT', 'GLM_USDT', 'GMT_USDT', 'GRT_USDT',
            'HBAR_USDT', 'IMX_USDT', 'INJ_USDT', 'IOTA_USDT',
            'JASMY_USDT', 'JUP_USDT', 'KAS_USDT', 'KSM_USDT',
            'LDO_USDT', 'LPT_USDT',
            'MANA_USDT', 'MANTA_USDT', 'MORPHO_USDT', 'MOVE_USDT',
            'NEAR_USDT', 'NXPC_USDT',
            'ONDO_USDT', 'OP_USDT', 'ORDI_USDT',
            'PNUT_USDT', 'POL_USDT', 'RENDER_USDT', 'ROSE_USDT', 'RUNE_USDT',
            'S_USDT', 'SAND_USDT', 'SEI_USDT', 'STRK_USDT',
            'STX_USDT', 'SUI_USDT', 'SUSHI_USDT',
            'THETA_USDT', 'TIA_USDT', 'TON_USDT', 'TRX_USDT', 'VET_USDT',
            'WIF_USDT', 'XLM_USDT', 'XRP_USDT', 'ZRX_USDT'
              ]

syms = [col[:-5] for col in token_list]

In [None]:
exc_manager = ExchangeManager()
exc_manager.add_market("bybit_linear", BybitRestAPI('linear'))

start_date = datetime.now().replace(microsecond=0) - timedelta(days=60)
end_date = None

for symbol in syms:
    fund_hist = await exc_manager.get_funding_history(symbol=f'{symbol}_USDT', start_date=start_date, limit=500)
    fund_hist = fund_hist[0]

    fund_sum = float(fund_hist['funding'].sum())
    fund_len = fund_hist['funding'].shape[0]

    hour_df = await get_data(symbol=f'{symbol}_USDT', interval='1h', n_iters=5, exchange=exchange)
    hour_df = hour_df.tail(4320) # Берём последние пол года
    hour_df.drop(['Exchange', 'Market_type'], axis=1, inplace=True)
    daily_df = await get_data(symbol=f'{symbol}_USDT', interval='1d', n_iters=1, exchange=exchange)
    daily_df = daily_df.tail(180) # Берём последние пол года

    hour_df['Max_range'] = hour_df['High'] - hour_df['Low']
    hour_df['Max_range_perc'] = (hour_df['High'] - hour_df['Low']) / hour_df['Low'] * 100
    hour_10 = hour_df[hour_df['Max_range_perc'] > 10].shape[0]
    hour_10_perc = 100 * hour_10 / hour_df.shape[0]
    hour_atr = hour_df['Max_range'].mean()
    hour_atr_perc = hour_df['Max_range_perc'].mean()
    hour_mtr_perc = hour_df['Max_range_perc'].max()

    daily_df['Max_range'] = daily_df['High'] - daily_df['Low']
    daily_df['Max_range_perc'] = (daily_df['High'] - daily_df['Low']) / daily_df['Low'] * 100
    days_10 = daily_df[daily_df['Max_range_perc'] > 10].shape[0]
    days_10_perc = 100 * days_10 / daily_df.shape[0]
    day_atr = daily_df['Max_range'].mean()
    day_atr_perc = daily_df['Max_range_perc'].mean()
    day_mtr_perc = daily_df['Max_range_perc'].max()

    print(f'========= {symbol} =========')
    print(f'Суммарный фандинг: {fund_sum * 100:.2f}%, расчётов по фандингу: {fund_len}')
    print(f'Фандинг. min: {fund_hist['funding'].min() * 100}%; max: {fund_hist['funding'].max() * 100}%')
    print()
    print(f'ATR за 1 день: {day_atr:.4f} ({day_atr_perc:.2f}%); за 1 час: {hour_atr:.4f} ({hour_atr_perc:.2f}%)')
    print(f'MaxTR за 1 день: {day_mtr_perc:.2f}%; за 1 час: {hour_mtr_perc:.2f}%')
    print(f'Колебания цены более 10%. Дни: {days_10} ({days_10_perc:.1f}%); Часы: {hour_10} ({hour_10_perc:.1f}%)')
    print()

In [None]:
daily_df

#### Исследование монет

In [None]:
import matplotlib.pyplot as plt
import polars as pl
import numpy as np
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo

from tqdm.notebook import tqdm

from bot.core.db.postgres_manager import DBManager
from bot.config.credentials import host, user, password, db_name
db_params = {'host': host, 'user': user, 'password': password, 'dbname': db_name}
db_manager = DBManager(db_params)

from bot.utils.data import make_price_df_from_orderbooks_bulk, normalize
from bot.utils.pair_trading import make_df_from_orderbooks
from bot.core.exchange.http_api import ExchangeManager, BybitRestAPI


In [None]:
# Если будет нужно отфильтровать датафрейм по времени

start_date = datetime(2025, 11, 8, 12, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_date = datetime(2025, 11, 9, 2, 0, tzinfo=ZoneInfo("Europe/Moscow"))
# tdf = spread_df.filter((pl.col('time') > start_date) & (pl.col('time') < end_date))

In [None]:
# Задаём названия токенов для анализа
token_list = ['1INCH_USDT', 'APT_USDT', 'ARB_USDT', 'ARKM_USDT',
              'BLUR_USDT',
              'CELO_USDT', 'CHZ_USDT', 'CRV_USDT', 'CVX_USDT',
              'DOT_USDT', 'DYDX_USDT', 'FIL_USDT', 'FLOW_USDT',
              'GALA_USDT', 'GMT_USDT', 'GRT_USDT', 'JASMY_USDT',
              'IMX_USDT', 'IOTA_USDT', 'KAS_USDT', 'KSM_USDT',
              'LDO_USDT',
              'MANA_USDT', 'MANTA_USDT', 'MORPHO_USDT', 'MOVE_USDT', 'NEAR_USDT',
              'ONDO_USDT', 'OP_USDT', 'ORDI_USDT',
              'POL_USDT', 'RENDER_USDT', 'ROSE_USDT',
              'SAND_USDT', 'SEI_USDT', 'STRK_USDT',
              'STX_USDT', 'SUI_USDT', 'SUSHI_USDT',
              'TIA_USDT', 'VET_USDT', 'XRP_USDT',
              'ZEN_USDT', 'ZK_USDT']

tokens = [x[:-5] for x in token_list]

exc_manager = ExchangeManager()
exc_manager.add_market("bybit_linear", BybitRestAPI('linear'))
coin_information = exc_manager.get_instrument_data()

In [None]:
for token in tokens:
    df_token = db_manager.get_orderbooks(symbol=token + '_USDT', interval='5m', start_date=start_date)

    daily_counts = df_token.group_by(pl.col("time").dt.date().alias("date")).agg(
        pl.len().alias("row_count")
    )

    plt.figure(figsize=(15, 3))
    plt.title(f'{token}')
    plt.plot(df_token['time'], df_token['price'])
    plt.grid()
    plt.show()

In [None]:
# Создадим список из датафреймов для удобства пакетной обработки, а также получим кол-во знаков после запятой для округления
dfs = []
token_dp = {}

for token in tokens:
    df_token = db_manager.get_orderbooks(symbol=token + '_USDT', interval='5m', start_date=start_date, end_date=end_date)
    try:
        dp = coin_information['bybit_linear'][token + '_USDT']['qty_step']
    except IndexError:
        dp = 0

    token_dp[token] = dp
    dfs.append(df_token)

In [None]:
# Создадим датафрейм, содержащий только цены, для анализа взаимосвязи монет
price_df = make_price_df_from_orderbooks_bulk(dfs=dfs, tokens=tokens, trunc='5m')
price_df.head(1)

In [None]:
price_df.shape

In [None]:
price_df.drop_nans().shape

In [None]:
# Нормируем значения
normed_df = normalize(df=price_df, method='minimax', shift_to_zero=False)

In [None]:
# Нарисуем график с нормализованными ценами
date_col = 'bucket' if 'bucket' in normed_df.columns else 'time'
price_cols = [c for c in normed_df.columns if c != date_col]

plt.figure(figsize=(14, 4))
for col in price_cols:
    plt.plot(normed_df[date_col].to_list(), normed_df[col].to_list(), label=col)

plt.xlabel("Время")
plt.ylabel("Нормализованная цена")
plt.title("Нормализованные цены монет")
plt.grid(True)
plt.tight_layout()

In [None]:
# Посчитаем евклидово расстояние между парами криптовалют
import itertools
import math

results = []

# Перебираем все уникальные пары колонок
for col1, col2 in itertools.combinations(price_cols, 2):
    diff_sq = (normed_df[col1] - normed_df[col2]) ** 2
    distance = math.sqrt(diff_sq.sum())
    results.append((col1, col2, distance))

In [None]:
pl.DataFrame(results, schema=["coin1", "coin2", "dist"], orient="row").sort('dist'
            ).filter((pl.col('coin1') == 'ARKM') & (pl.col('coin2') == 'STG'))

In [None]:
# "DOT"  "XRP"   2.17 - 15.73
# "FIL"  "XRP"   1.67 - 10.63
# "IOTA" "XAI"   1.55 - 9.29
# "IOTA" "SAND"  1.36 - 5.21
# "ONDO" "XRP"   1.28 - 12.52

# "ARKM" "STG"  -1.14 - 31.48
# "CELO" "STG"  -1.21 - 26.66
# "C98"  "VET"  -1.32 - 19.26
# "NEAR" "ONDO" -2.21 - 17.78
# "IMX"  "SNX"  -3.85 - 26.0


In [None]:
df.tail(2)

In [None]:
token_1 = 'CELO'
token_2 = 'ONDO'

df_1 = db_manager.get_orderbooks(symbol=token_1 + '_USDT', interval='1min')
df_2 = db_manager.get_orderbooks(symbol=token_2 + '_USDT', interval='1min')

In [None]:
start_time = datetime(2025, 9, 6, 12, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 9, 6, 21, 0, tzinfo=ZoneInfo("Europe/Moscow"))

df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=start_time, end_time=end_time)
cols = [col for col in df.columns if 'ask' in col or 'bid' in col]
df = df.drop(cols)
# df = make_zscore_df(df, token_1, token_2, wind=480).drop_nulls()
# coef = df[0][token_1].item() / df[0][token_2].item()

In [None]:
# Нарисуем график с нормализованными ценами
date_col = 'bucket' if 'bucket' in df.columns else 'time'
price_cols = [c for c in df.columns if c != date_col]

plt.figure(figsize=(14, 4))
plt.plot(df[date_col], df[token_1], label=token_1);
plt.plot(df[date_col], coef * df[token_2], label=token_2);
plt.xlabel("Время")
plt.ylabel("Цена")
plt.title(f"Приведённые к одному масштабу цены монет. Coef: {coef:.2f}")
plt.legend()
plt.grid(True)
plt.tight_layout()

In [None]:
std = 2
upper_bound = df['mean'] + std * df['std']
lower_bound = df['mean'] - std * df['std']

plt.figure(figsize=(14, 2))
plt.plot(df[date_col], df['spread']);
plt.plot(df[date_col], upper_bound)
plt.plot(df[date_col], lower_bound)
plt.grid()

In [None]:
train_time = datetime(2025, 8, 15, 3, 0, tzinfo=ZoneInfo("Europe/Moscow")) # За какое время рассчитываем mean & std для StandartScaler

spr_train = df.filter(pl.col(date_col) <= train_time)['spread'].to_numpy().reshape(-1, 1)
spr_test = df.filter(pl.col(date_col) > train_time)['spread'].to_numpy().reshape(-1, 1)

In [None]:
scaler = StandardScaler()
scaler.fit(spr_train)
spr_normed = scaler.transform(spr_test).ravel()

In [None]:
plt.figure(figsize=(14, 2))
plt.plot(spr_normed);
plt.title("Стандартизованный спред")
plt.grid()

#### Бектест одной торговой пары

In [None]:
from bot.analysis.pair_trading import backtest
from bot.utils.pair_trading import make_df_from_orderbooks, make_trunc_df, create_zscore_df
from bot.analysis.strategy_analysis import analyze_strategy

from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
import polars as pl
import numpy as np
import pickle
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['timezone'] = 'Europe/Moscow'

from bot.core.db.postgres_manager import DBManager
from bot.config.credentials import host, user, password, db_name
db_params = {'host': host, 'user': user, 'password': password, 'dbname': db_name}
db_manager = DBManager(db_params)

In [None]:
token_1 = 'SUI'
token_2 = 'CRV'
spread_method = 'lr'
min_order = 40

valid_length = 12
train_length = 20

end_time = datetime.now().replace(tzinfo=ZoneInfo("Europe/Moscow"))
valid_time = (end_time - timedelta(days=valid_length)).replace(hour=0, minute=0, second=0, microsecond=0)
start_time = valid_time - timedelta(days=train_length)
start_ts = int(datetime.timestamp(valid_time))

n_tf = 1

if n_tf == 1:
    tf = '4h'
    winds = np.array([12, 18, 24, 30])
elif n_tf == 2:
    tf_long = '4h'
    tf_short = '1h'
    winds_long = np.array([12, 18, 24, 30])
    winds_short = np.array([24, 48, 72, 96, 120])

In [None]:
df_1 = db_manager.get_tick_ob(token=token_1 + '_USDT',
                                     start_time=start_time,
                                     end_time=end_time)
df_2 = db_manager.get_tick_ob(token=token_2 + '_USDT',
                                     start_time=start_time,
                                     end_time=end_time)
print(f'{token_1:>8} first date: {df_1['time'][0]}')
print(f'{token_2:>8} first date: {df_2['time'][0]}')

df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=start_time)
tick_df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=start_time)

df_rev = make_df_from_orderbooks(df_2, df_1, token_2, token_1, start_time=start_time)
tick_df_rev = make_df_from_orderbooks(df_2, df_1, token_2, token_1, start_time=start_time)

if n_tf == 1:
    agg_df = make_trunc_df(df, timeframe=tf, token_1=token_1, token_2=token_2, method='triple')
    spread_df = create_zscore_df(token_1, token_2, tick_df, agg_df, tf, winds, min_order, start_ts, median_length=6, spr_method='lr')
    spread_df_dist = create_zscore_df(token_1, token_2, tick_df, agg_df, tf, winds, min_order, start_ts, median_length=6, spr_method='dist')
    agg_df_rev = make_trunc_df(df_rev, timeframe=tf, token_1=token_2, token_2=token_1, method='triple')
    spread_df_rev = create_zscore_df(token_2, token_1, tick_df_rev, agg_df_rev, tf, winds, min_order, start_ts, median_length=6, spr_method='lr')
    search_space = [(tf, int(w)) for w in winds]

elif n_tf == 2:
    agg_df_long = make_trunc_df(df, timeframe=tf_long, token_1=token_1, token_2=token_2, method='triple')
    agg_df_short = make_trunc_df(df, timeframe=tf_short, token_1=token_1, token_2=token_2, method='triple')

    spread_df_long = create_zscore_df(token_1, token_2, tick_df, agg_df_long, tf_long, winds_long, min_order, start_ts, median_length=6)
    spread_df_short = create_zscore_df(token_1, token_2, tick_df, agg_df_short, tf_short, winds_short, min_order, start_ts, median_length=6)
    cols_to_drop = [col for col in spread_df_short.columns if col in spread_df_long.columns]
    cols_to_drop.remove('time')

    spread_df = spread_df_long.join(spread_df_short.drop(cols_to_drop), on='time', coalesce=True)
    search_space = [(tf_long, int(w)) for w in winds_long] + [(tf_short, int(w)) for w in winds_short]

with open("./data/coin_information.pkl", "rb") as f:
    coin_information = pickle.load(f)

dp_1 = float(coin_information['bybit_linear'][token_1 + '_USDT']['qty_step'])
ps_1 = int(coin_information['bybit_linear'][token_1 + '_USDT']['price_scale'])
dp_2 = float(coin_information['bybit_linear'][token_2 + '_USDT']['qty_step'])
ps_2 = int(coin_information['bybit_linear'][token_2 + '_USDT']['price_scale'])

In [None]:
for tf, wind in search_space:
    try:
        df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
        df_rev = spread_df_rev.select('time', 'ts', token_2, token_1, f'{token_2}_size', f'{token_1}_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
        df_dist = spread_df_dist.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
    except pl.exceptions.ColumnNotFoundError:
        continue

    plt.figure(figsize=(15, 3))
    plt.title(f'{tf}; {wind}')
    plt.plot(df['time'], df['z_score'], c='green')
    plt.plot(df_rev['time'], df_rev['z_score'], c='red')
    plt.plot(df_dist['time'], df_dist['z_score'], c='black')
    plt.grid()
    plt.show()

In [None]:
t = 720 * 24 * 15 #

tf = '4h'
wind = 24

if n_tf == 1:
    df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
    df_rev = spread_df_rev.select('time', 'ts', token_2, token_1, f'{token_2}_size', f'{token_1}_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
    df_dist = spread_df_dist.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'z_score_{wind}_{tf}').rename({f'z_score_{wind}_{tf}': 'z_score'})
    double_lr_df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
             f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
             f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
             f'z_score_{wind}_{tf}'
    ).rename({f'z_score_{wind}_{tf}': 'z_score_long'}
        ).join(spread_df_rev.select('ts', f'z_score_{wind}_{tf}'), on='ts'
    ).rename({f'z_score_{wind}_{tf}': 'z_score_short'}
    ).with_columns(
        -pl.col('z_score_short')
    ).with_columns(
        pl.when((pl.col('z_score_long') > 0) & (pl.col('z_score_short') > 0))
            .then(pl.min_horizontal('z_score_long', 'z_score_short'))
        .when((pl.col('z_score_long') < 0) & (pl.col('z_score_short') < 0))
            .then(pl.max_horizontal('z_score_long', 'z_score_short')).otherwise(0).alias('z_score')
    )
    
elif n_tf == 2:
    df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
         f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
         f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
         f'z_score_{wind}_{tf_long}')
    df = df.rename({f'z_score_{wind_long}_{tf_long}': 'z_score_long', f'z_score_{wind_short}_{tf_short}': 'z_score_short'})
    df = df.with_columns(
        pl.when((pl.col('z_score_long') > 0) & (pl.col('z_score_short') > 0))
        .then(pl.min_horizontal('z_score_long', 'z_score_short'))
        .when((pl.col('z_score_long') < 0) & (pl.col('z_score_short') < 0))
        .then(pl.max_horizontal('z_score_long', 'z_score_short')).otherwise(0).alias('z_score')
)    

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 6), sharex=True)
# --- Графики цены ---
ax1.plot(df.select('time'), df.select(token_1), label=token_1, color='orange')
if n_tf == 1:
    ax1.set_title(f'{token_1} - {token_2} ({tf}, {wind})')
elif n_tf == 2:
    ax1.set_title(f'{token_1} - {token_2} ({tf_long=}, {wind_long=}; {tf_short=}, {wind_short=})')

ax1.set_ylabel('udst')
ax1.legend()
ax1.grid()

ax4 = ax1.twinx()
ax4.plot(df.select('time'), df.select(token_2), label=token_2, color='blue')

# --- График z_score ---
ax2.plot(df.select('time')[-t:], df.select('z_score')[-t:], label='long');
ax2.plot(df_rev['time'][-t:], df_rev['z_score'][-t:], c='red', label='rev')
ax2.plot(df_dist['time'], df_dist['z_score'], c='black', label='dist')
ax2.set_title('z_score')
ax2.grid()
ax2.legend();

In [None]:
params = {'thresh_in': 2.0, 'thresh_out': 0.25}

leverage = 2

# double_lr_df
trades_df = backtest(df_dist, token_1, token_2, dp_1, dp_2, ps_1, ps_2,
            thresh_low_in=-params['thresh_in'], thresh_high_in=params['thresh_in'],
            thresh_low_out=-params['thresh_out'], thresh_high_out=params['thresh_out'],
            long_possible=True, short_possible=True, dist_in=0, dist_out=0,
            balance=100, order_size=50, fee_rate=0.00055, stop_loss_std=5.0, sl_method='leave',
            sl_seconds = 60,
            leverage=leverage,
            verbose=2)
end_date = df['time'][-1]
metrics = analyze_strategy(trades_df, start_date=valid_time, end_date=end_date, initial_balance=200.0)

In [None]:
#                                          dist    rev     doub
# IMX (0.3886) - CELO (0.2074)  : -0.81; -11.64; -10.99;  -4.80
# ARKM (0.3036) - MANTA (0.1297): 18.40;  16.29;  15.77;  17.62
# GMT (0.02185) - GALA (0.00932):  2.85;  -1.33;   4.88;   2.20
# OP (0.3924) - TIA (0.91)      :  6.66;   6.70;  -7.24;  -5.80
# GRT (0.05918) - SEI (0.16480) :-10.95;  -4.96;  -7.21;  -9.23
# CRV (0.4458) - SUI (1.8014)   : -3.90;  -6.16;   7.55;   5.24

#### Симуляция торговли

In [1]:
from bot.analysis.pair_trading import backtest
from bot.utils.pair_trading import make_df_from_orderbooks, get_qty, run_single_tf_backtest
from bot.utils.pair_trading import run_double_tf_backtest, run_single_tf_backtest_reverse, select_cols_1tf, select_cols_2tf
from bot.analysis.strategy_analysis import analyze_strategy
from bot.utils.coins import get_step_info, get_price_scale

from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
import polars as pl
import numpy as np
import pickle
import random
import json
import ast
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['timezone'] = 'Europe/Moscow'

from bot.core.db.postgres_manager import DBManager
from bot.config.credentials import host, user, password, db_name
db_params = {'host': host, 'user': user, 'password': password, 'dbname': db_name}
db_manager = DBManager(db_params)

In [2]:
method = 'dist'
start_bt_time = datetime(2025, 10, 22, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 11, 12, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))

with open("./data/coin_information.pkl", "rb") as f:
    coin_information = pickle.load(f)

token_pairs = []
with open('./bot/config/token_pairs.txt', 'r') as file:
    for line in file:
        a, b = line.strip().split()
        token_pairs.append((a, b))

In [3]:
def check_pos(name):
    token_1, token_2, *_ = name.split('_')
    return any(a == token_1 and b == token_2 for a, b, _ in pairs)

In [4]:
def get_thresholds():
    data = []
    with open('./bot/config/thresholds_test.txt', 'r') as file:
        for line in file:
            line = line.strip()  # Удаляем пробелы и переносы строк
            if line:  # Игнорируем пустые строки
                # Преобразуем строку в кортеж с помощью literal_eval
                tuple_data = ast.literal_eval(line)
                data.append(tuple_data)
    return data

In [5]:
def create_single_tf_backtest_df(token_pairs, start_time, end_time, tf, wind, method):
    time_series = pl.datetime_range(start=start_time, end=end_time, interval="5s", eager=True)
    main_df = pl.DataFrame({'time': time_series})

    for token_1, token_2 in token_pairs:
        try:
            spread_df = pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{method}_full.parquet',
                low_memory=True, use_pyarrow=True).filter(
                    (pl.col('time') >= start_time) & (pl.col('time') < end_time)
                ).select('time', token_1, f'{token_1}_size', f'{token_1}_bid_price', f'{token_1}_ask_price',
                    f'{token_1}_bid_size', f'{token_1}_ask_size', token_2, f'{token_2}_size',
                    f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
                    f'z_score_{wind}_{tf}'
                ).rename({f'z_score_{wind}_{tf}': f'{token_1}_{token_2}_z_score'})

            if token_1 not in main_df.columns:
                df = spread_df.select('time', token_1, f'{token_1}_size', f'{token_1}_bid_price', f'{token_1}_ask_price',
                    f'{token_1}_bid_size', f'{token_1}_ask_size')
                main_df = main_df.join(df, on='time', how='full', coalesce=True)
            if token_2 not in main_df.columns:
                df = spread_df.select('time', token_2, f'{token_2}_size', f'{token_2}_bid_price', f'{token_2}_ask_price',
                    f'{token_2}_bid_size', f'{token_2}_ask_size')
                main_df = main_df.join(df, on='time', how='full', coalesce=True)

            df = spread_df.select('time', f'{token_1}_{token_2}_z_score')
            main_df = main_df.join(df, on='time', how='full', coalesce=True)
        except FileNotFoundError:
            continue

    return main_df

In [6]:
def create_double_tf_backtest_df(token_pairs, start_time, end_time, tf_1, wind_1, tf_2, wind_2):

    time_series = pl.datetime_range(start=start_time, end=end_time, interval="1s", eager=True)
    main_df = pl.DataFrame({'time': time_series})

    for token_1, token_2 in token_pairs:
        try:
            spread_df = pl.scan_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{method}_full.parquet').filter(
                    (pl.col('time') >= start_time) & (pl.col('time') < end_time)
                ).select('time', token_1, f'{token_1}_size', f'{token_1}_bid_price', f'{token_1}_ask_price',
                    f'{token_1}_bid_size', f'{token_1}_ask_size', token_2, f'{token_2}_size',
                    f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
                    f'z_score_{wind_1}_{tf_1}', f'z_score_{wind_2}_{tf_2}'
                ).rename({f'z_score_{wind_1}_{tf_1}': f'{token_1}_{token_2}_z_score_1',
                          f'z_score_{wind_2}_{tf_2}': f'{token_1}_{token_2}_z_score_2'}).collect()

            if token_1 not in main_df.columns:
                df = spread_df.select('time', token_1, f'{token_1}_size', f'{token_1}_bid_price', f'{token_1}_ask_price',
                    f'{token_1}_bid_size', f'{token_1}_ask_size')
                main_df = main_df.join(df, on='time')
            if token_2 not in main_df.columns:
                df = spread_df.select('time', token_2, f'{token_2}_size', f'{token_2}_bid_price', f'{token_2}_ask_price',
                    f'{token_2}_bid_size', f'{token_2}_ask_size')
                main_df = main_df.join(df, on='time')

            df = spread_df.select('time', f'{token_1}_{token_2}_z_score_1', f'{token_1}_{token_2}_z_score_2')
            main_df = main_df.join(df, on='time')

        except FileNotFoundError:
            continue

    return main_df

In [7]:
# Загружаем полный датасет
df = pl.scan_parquet('./data/train_data.parquet')

# Выбрасываем столбцы с информацией о спреде, он нам сейчас не нужен
all_cols = df.collect_schema().names()
cols_to_drop = [col for col in all_cols if '_spread_' in col]

df = df.drop(cols_to_drop).collect()
f'{df.estimated_size():_}'

'772_437_520'

#### Поиск наилучших параметров для прямого входа на 1 таймфрейме

In [8]:
start_time = datetime(2025, 10, 22, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 11, 12, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))

metrics_arr = []
in_params = (1.6, 1.8, 2.0, 2.25, 2.5)
out_params = (0.0, 0.25, 0.5)

tfs = ['4h', '1h']
winds = {'4h': [14, 18, 24, 30], '1h': [24, 36, 48, 72, 96, 120]}
leverage = 2
max_pairs = 5
min_order_size = 45
max_order_size = 50
fee_rate = 0.00055
sl_ratio = 0.9

ln = len(in_params) * len(out_params) * len(winds['4h'] + winds['1h'])
ln

150

In [9]:
# Симуляция одного таймфрейма с прямым входом и выходом
tf = '4h'

with tqdm(total=ln, desc="Обработка", unit="iter") as progress_bar:
    for tf in tfs:
        for wind in winds[tf]:
            tdf = select_cols_1tf(df, token_pairs, tf, wind)
    
            for in_ in in_params:
                for out_ in out_params:
                    if abs(out_) > abs(in_):
                        continue
    
                    trades_df, metrics = run_single_tf_backtest(tdf, tf, wind, in_, out_, leverage, max_pairs, min_order_size, max_order_size,
                               fee_rate, start_time, end_time, sl_ratio, coin_information)
    
                    if trades_df.is_empty():
                        progress_bar.update(1)
                        continue
    
                    metrics_arr.append({'tf': tf, 'wind': wind, 'thresh_in': in_, 'thresh_out': out_,
                        'n_trades': metrics['n_trades'], 'duration_min': metrics['duration_min'], 'duration_max': metrics['duration_max'],
                        'duration_avg': metrics['duration_avg'], 'stop_losses': metrics['stop_losses'], 'liquidations': metrics['liquidations'],
                        'profit': metrics['profit'], 'max_drawdown': metrics['max_drawdown'], 'max_profit': metrics['max_profit'],
                        'max_loss': metrics['max_loss'], 'avg_profit': metrics['avg_profit'], 'profit_std': metrics['profit_std'],
                        'profit_ratio': metrics['profit_ratio']})
                    progress_bar.update(1)

str_output = pl.DataFrame(metrics_arr).sort(by='profit_ratio', descending=True).drop('duration_max', 'stop_losses', 'liquidations')

Обработка:   0%|          | 0/150 [00:00<?, ?iter/s]

In [16]:
str_output.head(10)

tf,wind,thresh_in,thresh_out,n_trades,duration_min,duration_avg,profit,max_drawdown,max_profit,max_loss,avg_profit,profit_std,profit_ratio
str,i64,f64,f64,i64,duration[μs],duration[μs],f64,f64,f64,f64,f64,f64,f64
"""4h""",24,1.6,0.25,85,9m,1d 3h 1m 15s,163.57,-5.83,14.43,-5.83,1.92,2.9,19.532
"""4h""",14,1.8,0.5,101,8m 55s,22h 31m 11s,160.61,-5.59,13.52,-5.59,1.59,2.82,19.504
"""1h""",120,2.5,0.5,47,9m 10s,1d 19h 23m 24s,132.61,-2.92,12.61,-2.92,2.82,3.16,19.306
"""1h""",48,2.0,0.25,127,49m 30s,17h 30m 38s,147.61,-4.74,9.71,-3.8,1.16,1.95,19.278
"""4h""",24,1.6,0.0,107,6m 30s,20h 51m 25s,169.36,-6.87,8.24,-6.55,1.58,2.29,19.198
"""1h""",96,2.5,0.25,65,9m 10s,1d 3h 31m 48s,147.48,-4.96,12.69,-4.96,2.27,2.74,18.693
"""4h""",30,2.5,0.5,44,9m 10s,1d 19h 22m 46s,128.71,-2.93,12.87,-2.93,2.93,3.28,18.685
"""4h""",18,2.25,0.5,76,8m 55s,1d 2h 31m 56s,155.2,-5.71,15.11,-5.71,2.04,2.96,18.653
"""1h""",48,1.8,0.0,160,7m 25s,13h 48m 40s,154.34,-6.05,9.91,-4.57,0.96,1.88,18.536
"""1h""",72,1.8,0.25,96,9m,23h 3m 59s,160.77,-6.49,14.1,-6.49,1.67,2.64,18.522


In [20]:
str_output.filter((pl.col('tf') == '4h') & (pl.col('wind') == 24)).head(10)

tf,wind,thresh_in,thresh_out,n_trades,duration_min,duration_avg,profit,max_drawdown,max_profit,max_loss,avg_profit,profit_std,profit_ratio
str,i64,f64,f64,i64,duration[μs],duration[μs],f64,f64,f64,f64,f64,f64,f64
"""4h""",24,1.6,0.25,85,9m,1d 3h 1m 15s,163.57,-5.83,14.43,-5.83,1.92,2.9,19.532
"""4h""",24,1.6,0.0,107,6m 30s,20h 51m 25s,169.36,-6.87,8.24,-6.55,1.58,2.29,19.198
"""4h""",24,2.25,0.25,67,7m 20s,1d 3h 49m 50s,146.81,-5.7,14.61,-5.7,2.19,3.1,17.612
"""4h""",24,2.5,0.25,59,7m 15s,1d 5h 17m 36s,131.06,-4.62,13.65,-4.62,2.22,2.68,17.016
"""4h""",24,1.8,0.0,97,5m,22h 13m 49s,146.18,-6.43,8.47,-6.43,1.51,2.31,17.007
"""4h""",24,2.5,0.5,52,9m 10s,1d 12h 21m 45s,126.39,-4.12,14.33,-4.12,2.43,2.85,16.937
"""4h""",24,2.25,0.5,59,9m 15s,1d 10h 14m 12s,136.32,-5.2,13.36,-5.2,2.31,3.04,16.908
"""4h""",24,2.0,0.25,79,7m 25s,1d 2h 37m 32s,141.03,-6.18,14.43,-6.18,1.79,2.74,16.527
"""4h""",24,2.0,0.5,72,9m 20s,1d 6h 41m,134.4,-5.68,11.5,-5.68,1.87,2.48,16.332
"""4h""",24,1.8,0.25,79,7m 30s,1d 4h 6m 15s,132.04,-5.64,14.43,-5.64,1.67,2.75,16.003


In [21]:
for row in str_output[:5].iter_rows(named=True):
    print(f"{row['tf']:2}, {row['wind']:2}, {row['thresh_in']:>4}, {row['thresh_out']:>4}, {row['n_trades']:>3}, \
{row['profit']:>6}, {row['max_drawdown']:>6}, {row['max_profit']:>6}, {row['max_loss']:>6}, \
{row['avg_profit']:>6}, {row['profit_std']:>5}, {row['profit_ratio']:>5}")

4h, 24,  1.6, 0.25,  85, 163.57,  -5.83,  14.43,  -5.83,   1.92,   2.9, 19.532
4h, 14,  1.8,  0.5, 101, 160.61,  -5.59,  13.52,  -5.59,   1.59,  2.82, 19.504
1h, 120,  2.5,  0.5,  47, 132.61,  -2.92,  12.61,  -2.92,   2.82,  3.16, 19.306
1h, 48,  2.0, 0.25, 127, 147.61,  -4.74,   9.71,   -3.8,   1.16,  1.95, 19.278
4h, 24,  1.6,  0.0, 107, 169.36,  -6.87,   8.24,  -6.55,   1.58,  2.29, 19.198


In [None]:
# Симуляция одного таймфрейма со входом и выходом, когда спред возвращается к пороговому значению
metrics_arr = []

dist_in_params = (0.1, 0.2, 0.3, 0.4, 0.5)
dist_out_params = (0.1, 0.2, 0.3, 0.4, 0.5)
dist_in = 0
dist_out = 0

ln = len(in_params) * len(out_params) * len(winds['4h'] + winds['1h']) * len(dist_in_params)

with tqdm(total=ln, desc="Обработка", unit="iter") as progress_bar:
    for tf in tfs:
        for wind in winds[tf]:
            tdf = select_cols_1tf(df, token_pairs, tf, wind)
    
            for in_ in in_params:
                for out_ in out_params:
                    for dist_in in dist_out_params:
                        if abs(out_) > abs(in_):
                            continue
    
                        trades_df, metrics = run_single_tf_backtest_reverse(tdf, tf, wind, in_, out_, dist_in, dist_out, 
                                max_pairs, leverage, min_order_size, max_order_size, fee_rate, start_time, end_time,
                                sl_ratio, coin_information, reverse_in=True, reverse_out=False)
    
                        metrics_arr.append({'tf': tf, 'wind': wind, 'thresh_in': in_, 'thresh_out': out_, 'dist_in': dist_in, 'dist_out': dist_out,
                            'n_trades': metrics['n_trades'], 'duration_min': metrics['duration_min'], 'duration_max': metrics['duration_max'],
                            'duration_avg': metrics['duration_avg'], 'stop_losses': metrics['stop_losses'], 'liquidations': metrics['liquidations'],
                            'profit': metrics['profit'], 'max_drawdown': metrics['max_drawdown'], 'max_profit': metrics['max_profit'],
                            'max_loss': metrics['max_loss'], 'avg_profit': metrics['avg_profit'], 'profit_std': metrics['profit_std'],
                            'profit_ratio': metrics['profit_ratio']})
                        progress_bar.update(1)
rev_output = pl.DataFrame(metrics_arr).sort(by='profit', descending=True).drop('duration_max', 'stop_losses', 'liquidations')

Обработка:   0%|          | 0/750 [00:00<?, ?iter/s]

In [None]:
rev_output

In [None]:
for row in rev_output[:10].iter_rows(named=True):
    print(f"{row['tf']:2}, {row['wind']:2}, {row['thresh_in']:>4}, {row['thresh_out']:>4}, {row['n_trades']:>3}, {row['dist_in']:>4}, {row['dist_out']:>4}, \
{row['duration_avg']}, {row['profit']:>6}, {row['max_drawdown']:>6}, {row['max_profit']:>6}, {row['max_loss']:>6}, \
{row['avg_profit']:>6}, {row['profit_std']:>5}, {row['profit_ratio']:>5}")

In [None]:
token_params[:2]

In [None]:
df.head(1)

In [None]:
def run_single_tf_best_params_backtest(main_df, leverage, max_pairs, max_order_size,
                           fee_rate, start_time, end_time, coin_information):
    tokens_in_position = []
    pairs = []
    current_orders = {}
    trades = []

    params = get_thresholds()
    token_params = sorted(params, key=lambda x: x[0], reverse=True)

    for prof, token_1, token_2, tf, wind, in_, out_ in token_params:
        if token_1 not in df.columns or token_2 not in df.columns:
            token_params.remove((prof, token_1, token_2, tf, wind, in_, out_))

    for row in main_df.iter_rows(named=True):
        time = row['time']

        for _, token_1, token_2, tf, wind, in_, out_ in token_params:
            try:
                z_score = row[f'{token_1}_{token_2}_z_score_{wind}_{tf}']
                low_in = -in_
                low_out = -out_
                high_in = in_
                high_out = out_
            except KeyError:
                continue

            if z_score is None or row[token_1] is None or row[token_2] is None:
                continue

            # ----- Проверяем условия для входа в позицию -----
            if (len(pairs) < max_pairs and token_1 not in tokens_in_position and token_2 not in tokens_in_position):

                # --- Входим в лонг ---
                if z_score < low_in:
                    t1_price = row[f'{token_1}_ask_price']
                    t2_price = row[f'{token_2}_bid_price']
                    t1_vol = row[f'{token_1}_ask_size']
                    t2_vol = row[f'{token_2}_bid_size']
                    qty_1, qty_2 = get_qty(token_1, token_2, t1_price, t2_price, None, coin_information, 2 * max_order_size * leverage,
                              method='usdt_neutral')
                    place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                                'open', 'long', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, verbose=False)

                # --- Открываем шорт ---
                if z_score > high_in:
                    t1_price = row[f'{token_1}_bid_price']
                    t2_price = row[f'{token_2}_ask_price']
                    t1_vol = row[f'{token_1}_bid_size']
                    t2_vol = row[f'{token_2}_ask_size']
                    qty_1, qty_2 = get_qty(token_1, token_2, t1_price, t2_price, None, coin_information, 2 * max_order_size * leverage,
                              method='usdt_neutral')
                    place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                                'open', 'short', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, verbose=False)

            # ----- Проверяем условия для выхода из позиции -----
            # --- Закрываем лонг ---
            if z_score > high_out and (token_1, token_2, 'long') in pairs:
                t1_price = row[f'{token_1}_bid_price']
                t2_price = row[f'{token_2}_ask_price']
                t1_vol = row[f'{token_1}_bid_size']
                t2_vol = row[f'{token_2}_ask_size']
                qty_1 = current_orders[(token_1, token_2)]['qty_1']
                qty_2 = current_orders[(token_1, token_2)]['qty_2']
                place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                            'close', 'long', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, reason=1, verbose=False)

            # --- Закрываем шорт ---
            if z_score < low_out and (token_1, token_2, 'short') in pairs:
                t1_price = row[f'{token_1}_ask_price']
                t2_price = row[f'{token_2}_bid_price']
                t1_vol = row[f'{token_1}_ask_size']
                t2_vol = row[f'{token_2}_bid_size']
                qty_1 = current_orders[(token_1, token_2)]['qty_1']
                qty_2 = current_orders[(token_1, token_2)]['qty_2']
                place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                            'close', 'short', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, reason=1, verbose=False)

            # --- Проверка стоп-лосса ---
            if (token_1, token_2, 'long') in pairs:
                op_1 = current_orders[(token_1, token_2)]['t1_price']
                op_2 = current_orders[(token_1, token_2)]['t2_price']
                t1_price = row[f'{token_1}_bid_price']
                t2_price = row[f'{token_2}_ask_price']

                sl_price_1 = op_1 - 0.85 * op_1 / leverage
                sl_price_2 = op_2 + 0.85 * op_2 / leverage

                if t1_price < sl_price_1 or t2_price > sl_price_2:
                    qty_1 = current_orders[(token_1, token_2)]['qty_1']
                    qty_2 = current_orders[(token_1, token_2)]['qty_2']
                    t1_vol = row[f'{token_1}_bid_size']
                    t2_vol = row[f'{token_2}_ask_size']

                    place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                                'close', 'long', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, reason=2, verbose=False)

            if (token_1, token_2, 'short') in pairs:
                op_1 = current_orders[(token_1, token_2)]['t1_price']
                op_2 = current_orders[(token_1, token_2)]['t2_price']
                t1_price = row[f'{token_1}_ask_price']
                t2_price = row[f'{token_2}_bid_price']

                sl_price_1 = op_1 + 0.85 * op_1 / leverage
                sl_price_2 = op_2 - 0.85 * op_2 / leverage

                if t1_price > sl_price_1 or t2_price < sl_price_2:
                    qty_1 = current_orders[(token_1, token_2)]['qty_1']
                    qty_2 = current_orders[(token_1, token_2)]['qty_2']
                    t1_vol = row[f'{token_1}_ask_size']
                    t2_vol = row[f'{token_2}_bid_size']

                    place_order(tokens_in_position, pairs, current_orders, trades, time, token_1, token_2,
                                'close', 'short', qty_1, qty_2, t1_price, t2_price, t1_vol, t2_vol, None, z_score,
                                tf, wind, in_, out_, fee_rate, min_order_size, max_order_size, leverage, reason=2, verbose=False)

    trades_df = pl.DataFrame(trades)
    if trades_df.is_empty():
        return pl.DataFrame(), dict()

    try:
        trades_df = trades_df.with_columns(
            (pl.col('open_time').dt.timestamp() // 1_000_000).alias('open_ts'),
            (pl.col('close_time').dt.timestamp() // 1_000_000).alias('close_ts'),
            (pl.col('close_time') - pl.col('open_time')).alias('duration'),
        )

        metrics = analyze_strategy(trades_df, start_date=start_time, end_date=end_time, initial_balance=200.0)
        return trades_df, metrics
    except pl.ColumnNotFoundError:
        return None

In [None]:
trades_df, metrics = run_single_tf_best_params_backtest(df, leverage, max_pairs, max_order_size,
                           fee_rate, start_time, end_time, coin_information)

In [None]:
trades_df

In [None]:
metrics

In [None]:
{row['profit']:>6}, {row['max_drawdown']:>6}, {row['max_profit']:>6}, {row['max_loss']:>6}, \
{row['avg_profit']:>6}, {row['profit_std']:>5}, {row['profit_ratio']:>5}")

In [None]:
# ------- Top 5 profit ratio (1 timeframe, direct) -------
#  tf  w   in    out  n_tr  profit    drdwn   mx_pr    mx_ls  avg_pr  std     pr_rat
# 4h, 24,  1.6, 0.25,  85,  163.57,   -5.83,  14.43,  -5.83,   1.92,   2.9,   19.532
# 4h, 14,  1.8,  0.5, 101,  160.61,   -5.59,  13.52,  -5.59,   1.59,  2.82,   19.504
# 1h, 120, 2.5,  0.5,  47,  132.61,   -2.92,  12.61,  -2.92,   2.82,  3.16,   19.306
# 1h, 48,  2.0, 0.25, 127,  147.61,   -4.74,   9.71,   -3.8,   1.16,  1.95,   19.278
# 4h, 24,  1.6,  0.0, 107,  169.36,   -6.87,   8.24,  -6.55,   1.58,  2.29,   19.198

# ------- Top 5 profit ratio (1 timeframe, direct) -------
#  tf  w   in    out  n_tr  avg_dur   profit   drdwn  mx_pr   mx_ls   avg_pr  std   pr_rat
# 1h, 72,  2.0,  1.5,  61, 1d, 9:57,  41.86, -18.62,   6.99,  -7.03,   0.69,  2.71, 2.775
# 1h, 48,  2.0, 0.25, 167, 12:22:55,  55.42, -28.27,  11.92,  -7.83,   0.33,  2.28,  2.77
# 1h, 48,  2.0, 1.25, 108, 22:27:26,  32.96, -16.37,   8.56, -10.09,   0.31,  2.58, 2.377
# 1h, 64, 2.25, 0.25, 112, 15:34:33,  43.87, -26.72,  14.72, -10.92,   0.39,  2.92, 2.258
# 1h, 96,  1.4, 1.25,  57, 1d 18:03,  36.42, -21.08,  11.37, -10.76,   0.64,  3.85, 2.176

# 4h, 18,  2.0,  1.5,  54, 1d 13:01,  66.41,  -6.72,   8.85,  -6.31,   1.23,  2.76, 7.529
# 4h, 18,  1.8,  1.5,  57, 1d 15:36,  62.06, -17.84,  23.05, -10.05,   1.09,  4.19, 4.114
# 4h, 18,  2.0, 1.25,  61, 1d, 9:37,  40.44, -11.82,   9.34, -11.04,   0.66,  3.24, 3.482
# 4h, 24,  1.8,  1.5,  41, 2d, 1:43,   47.7, -17.59,   13.2,  -6.96,   1.16,  3.98, 3.202
# 4h, 18,  1.6, 1.25,  64, 1d 12:23,  38.54, -15.38,  21.41, -13.81,    0.6,  4.06, 2.809

# 4h, 18,  1.8, 1.25,  57,  0,  0.3, 102.86,   -7.5,  19.17,  -3.72,    1.8,  3.68, 10.949
# 4h, 18,  1.8,  1.5,  58,  0,  0.1,  104.1,  -8.09,  22.74,  -6.47,   1.79,  3.86, 10.684
# 4h, 18,  1.8, 1.25,  71,  0,  0.2,   86.0, -14.73,  19.75,  -8.63,   1.21,  3.76, 6.469
# 4h, 18,  1.8,  1.5,  37,  0,  0.3,  65.26, -12.17,  21.48, -10.44,   1.76,  5.51, 5.303
# 4h, 18,  1.8,  1.5,  51,  0,  0.2,  68.38, -14.39,  22.08,  -6.56,   1.34,   4.2, 5.173

# 4h, 18,  1.6,  1.5,  59, 0.5,   0,  60.34, -14.93,  22.29,  -9.19,   1.02,  4.18, 4.467
# 4h, 18,  1.6, 1.25,  70, 0.5,   0,   60.7, -15.67,  21.38, -12.36,   0.87,  4.14, 4.368
# 4h, 18,  1.6, 1.25,  68, 0.5,   0,  37.85, -13.37,  21.38, -13.37,   0.56,  3.94, 3.003
# 4h, 24,  1.6, 1.25,  50, 0.5,   0,  41.85, -20.38,   10.3,  -6.99,   0.84,  3.28, 2.586
# 4h, 18,  1.6,  1.5,  60, 0.5,   0,  49.58, -26.42,  22.29, -18.09,   0.83,  4.74, 2.487

#### Поиск наилучших параметров входа random search

In [None]:
method = 'lr'
start_time = datetime(2025, 9, 16, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 9, 26, 0, tzinfo=ZoneInfo("Europe/Moscow"))
min_order_size = 40
max_order_size = 50
max_pairs = 5
leverage = 2
fee_rate = 0.00055

tfs = ('4h', '1h', '5m')
winds = {'4h': [10, 14, 18, 24],
         '1h': [36, 48, 60, 72, 96],
         '5m': [60, 90, 120, 180, 240, 300]}

n_tf_params = (1, 1, 2)
in_params = (1.75, 2.0, 2.25, 2.5)
out_params = (0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25)
dist_in_params = (0.1, 0.3, 0.5, 0.75)
dist_out_params = (0.1, 0.3, 0.5, 0.75)

metrics_arr = []

In [None]:
n_iter = 5_000

with tqdm(total=n_iter, desc="Обработка", unit="iter") as progress_bar:
    for _ in range(n_iter):
        try:
            n_tf = random.choice(n_tf_params)
            trades_df, metrics = None, None

            if n_tf == 1:
                mode = random.choice(['1_tf_direct', '1_tf_rev_in', '1_tf_rev_out', '1_tf_rev_both'])
                tf_1 = random.choice(tfs[:2]) # На одиночном таймфрейме игнорируем '5m'
                wind_1 = random.choice(winds[tf_1])
                tf_2, wind_2 = 0, 0
                in_1 = random.choice(in_params)
                out_1 = random.choice(out_params)
                in_2, out_2 = 0, 0

                tdf = select_cols_1tf(df, token_pairs, tf_1, wind_1)

                if mode == '1_tf_direct':
                    dist_in, dist_out = 0, 0
                    trades_df, metrics = run_single_tf_backtest(tdf, tf_1, wind_1, in_1, out_1, leverage, max_pairs, max_order_size,
                           fee_rate, start_time, end_time, coin_information)
                elif mode == '1_tf_rev_in':
                    dist_in = random.choice(dist_in_params)
                    dist_out = 0
                    trades_df, metrics = run_single_tf_backtest_reverse(tdf, tf_1, wind_1, in_1, out_1, dist_in, dist_out, max_pairs, leverage,
                                   max_order_size, fee_rate, start_time, end_time, coin_information,
                                   reverse_in=True, reverse_out=False)
                elif mode == '1_tf_rev_out':
                    dist_in = 0
                    dist_out = random.choice(dist_out_params)
                    trades_df, metrics = run_single_tf_backtest_reverse(tdf, tf_1, wind_1, in_1, out_1, dist_in, dist_out, max_pairs, leverage,
                                   max_order_size, fee_rate, start_time, end_time, coin_information,
                                   reverse_in=False, reverse_out=True)
                elif mode == '1_tf_rev_both':
                    dist_in = random.choice(dist_in_params)
                    dist_out = random.choice(dist_out_params)
                    trades_df, metrics = run_single_tf_backtest_reverse(tdf, tf_1, wind_1, in_1, out_1, dist_in, dist_out, max_pairs, leverage,
                                   max_order_size, fee_rate, start_time, end_time, coin_information,
                                   reverse_in=True, reverse_out=True)

            elif n_tf == 2:
                mode = '2_tf_direct'
                tf_1, tf_2 = random.choices(tfs, k=2)
                wind_1 = random.choice(winds[tf_1])
                wind_2 = random.choice(winds[tf_2])
                dist_in, dist_out = 0, 0
                in_1 = random.choice(in_params)
                out_1 = random.choice(out_params)
                in_2 = random.choice(in_params)
                out_2 = random.choice(out_params)

                if tf_1 == tf_2 and wind_1 == wind_2:
                    continue
                if tf_1 == '5m' and tf_2 == '5m':
                    continue

                tdf = select_cols_2tf(df, token_pairs, tf_1=tf_1, wind_1=wind_1, tf_2=tf_2, wind_2=wind_2)
                trades_df, metrics = run_double_tf_backtest(tdf, tf_1, wind_1,
                                                tf_2, wind_2, in_1, out_1, in_2, out_2, leverage,
                                                max_pairs, max_order_size, fee_rate, start_time, end_time,
                                                coin_information)
            else:
                print('unknown mode!')
                continue

            if not metrics:
                continue

            log = {'n_tf': n_tf, 'tf_1': tf_1, 'tf_2': tf_2, 'wind_1': wind_1, 'wind_2': wind_2,
                    'in_1': in_1, 'in_2': in_2, 'out_1': out_1, 'out_2': out_2,
                    'dist_in': dist_in, 'dist_out': dist_out,
                    'n_trades': metrics['n_trades'],
                    'duration_min': metrics['duration_min'].total_seconds(), 'duration_max': metrics['duration_max'].total_seconds(),
                    'duration_avg': metrics['duration_avg'].total_seconds(), 'stop_losses': metrics['stop_losses'],
                    'liquidations': metrics['liquidations'],
                    'profit': metrics['profit'], 'max_drawdown': metrics['max_drawdown'], 'max_profit': metrics['max_profit'],
                    'max_loss': metrics['max_loss'], 'avg_profit': metrics['avg_profit'], 'profit_std': metrics['profit_std'],
                    'profit_ratio': metrics['profit_ratio']}
            json_log = json.dumps(log, default=float, ensure_ascii=False)
            with open('./logs/backtest_res.jsonl', 'a', encoding='utf-8') as f:
                f.write(json_log + '\n')

            # with open('./logs/trades_bt.jsonl', 'a', encoding='utf-8') as f:
            #     for trade in trades_df.to_dicts():
            #         trade.pop('open_time')
            #         trade.pop('close_time')
            #         trade.pop('duration')

            #         trade_log = json.dumps(trade, default=float, ensure_ascii=False)
            #         f.write(trade_log + '\n')

            if n_tf == 1 and mode[5:] == 'direct':
                                print(f'n_tf: {n_tf} ({mode[5:]:>8}); tf: {tf_1}; wind: {wind_1:>3}; in: {in_1:>4}; \
out: {out_1:>4}; profit: {metrics['profit']:.1f}')

            elif n_tf == 1 and mode[5:] == 'rev_in':
                print(f'n_tf: {n_tf} ({mode[5:]:>8}); tf: {tf_1}; wind: {wind_1:>3}; in: {in_1:>4}; \
out: {out_1:>4}; dist_in: {dist_in:>4}, profit: {metrics['profit']:.1f}')

            elif n_tf == 1 and mode[5:] == 'rev_out':
                print(f'n_tf: {n_tf} ({mode[5:]:>8}); tf: {tf_1}; wind: {wind_1:>3}; in: {in_1:>4}; \
out: {out_1:>4}; dist_out: {dist_out:>4}, profit: {metrics['profit']:.1f}')

            elif n_tf == 1 and mode[5:] == 'rev_out':
                print(f'n_tf: {n_tf} ({mode[5:]:>8}); tf: {tf_1}; wind: {wind_1:>3}; in: {in_1:>4}; \
out: {out_1:>4}; dist_in: {dist_in:>4}, dist_out: {dist_out:>4}, profit: {metrics['profit']:.1f}')

            elif n_tf == 2:
                print(f'n_tf: {n_tf} ({mode[5:]:>8}); tf_1: {tf_1}; wind_1: {wind_1:>3}; tf_2: {tf_2}; \
wind_2: {wind_2:>3}; in_1: {in_1:>4}; out_1: {out_1:>4}; in_2: {in_2:>4}; out_2: {out_2:>4}; profit: {metrics['profit']:.1f}')

            metrics_arr.append(log)
            progress_bar.update(1)
        except Exception as err:
            # print(err)
            # print(f'{mode=}; {tf_1=}; {wind_1=}; {tf_2=}; {wind_2=}; {in_1=}; {out_1=}; {in_2=}; {out_2=}; {dist_in=}, {dist_out=}')
            continue

In [None]:
output = pl.DataFrame(metrics_arr).sort(by='profit_ratio', descending=True).drop('duration_min', 'duration_avg',
                            'duration_max', 'stop_losses', 'liquidations')
output

In [None]:
for row in output[:5].iter_rows(named=True):
    print(row['tf'], row['wind'], row['thresh_in'], row['thresh_out'], row['n_trades'], row['duration_avg'], row['profit'],
         row['max_drawdown'], row['max_profit'], row['max_loss'], row['avg_profit'], row['profit_std'], row['profit_ratio'])

In [None]:
mode, n_tf, tf_1, tf_2, wind_1, wind_2, dist_in, dist_out, in_1, out_1, in_2, out_2

#### EDA

In [None]:
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
import polars as pl
import numpy as np
import pickle
import random
import json
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
mpl.rcParams['timezone'] = 'Europe/Moscow'

In [None]:
trades = pl.read_ndjson('./logs/trades_bt.jsonl')
trades = trades.unique()
# trades = trades.filter(pl.col('open_z_score') > 0)
# trades = trades.filter((pl.col('open_z_score') > 1.0) & (pl.col('open_z_score') < 5.0))
# start_times = trades['open_ts'].unique().to_list()

# trades = trades.with_columns(
#     (pl.col("total_profit") > 0).alias("profit_cat")
# )

In [None]:
trades.height

In [None]:
trades['open_z_score'].min(), trades['open_z_score'].max()

In [None]:
trades['total_profit'].mean(), trades['total_profit'].median(), trades['total_profit'].min(), trades['total_profit'].max()

In [None]:
sl_trades = trades.filter(pl.col('reason') != 1).group_by('token_1', 'token_2').agg(pl.col('total_profit').len().alias('n_trades'))

pairs_to_remove = []
for row in sl_trades.iter_rows(named=True):
    pairs_to_remove.append((row['token_1'], row['token_2']))

In [None]:
pairs_to_remove

In [None]:
profitable_trades = trades.group_by('token_1', 'token_2').agg(
    pl.col('total_profit').mean().alias('avg_profit'),
    pl.col('total_profit').min().alias('min_profit'),
    pl.col('total_profit').max().alias('max_profit'),
    pl.col('total_profit').len().alias('n_trades'),
    pl.col('reason').mean().alias('mean_reason')
).sort(by='avg_profit').filter(pl.col('avg_profit') > 0.5)

In [None]:
profitable_trades

In [None]:
t1_agg_trades = trades.group_by('token_1').agg(
    pl.col('total_profit').sum().alias('profit'),
    pl.col('total_profit').len().alias('n_trades'),
).rename({'token_1': 'token', 'profit': 'profit_1', 'n_trades': 'n_trades_1'})

t2_agg_trades = trades.group_by('token_2').agg(
    pl.col('total_profit').sum().alias('profit'),
    pl.col('total_profit').len().alias('n_trades'),
).rename({'token_2': 'token', 'profit': 'profit_2', 'n_trades': 'n_trades_2'})

In [None]:
token_df = t1_agg_trades.join(t2_agg_trades, on='token')
token_df = token_df.with_columns(
    ((pl.col('profit_1') + pl.col('profit_2')) / (pl.col('n_trades_1') + pl.col('n_trades_2'))).alias('avg_profit')
).sort(by='avg_profit')

In [None]:
bad_coins = token_df.filter(pl.col('avg_profit') < 0).select('token').to_series().to_list()

In [None]:
profitable_trades

In [None]:
profitable_coins = []
for row in profitable_trades.iter_rows(named=True):
    profitable_coins.append((row['token_1'], row['token_2']))

In [None]:
# bad_coins = ('C98', 'ARB', 'CELO')

In [None]:
for token_1, token_2 in profitable_coins:
    if token_1 in bad_coins or token_2 in bad_coins:
        pairs_to_remove.append((token_1, token_2))
pairs_to_remove

In [None]:
for pair in pairs_to_remove:
    profitable_coins.remove(pair)

In [None]:
sns.relplot(data=trades, x="open_z_score", y="total_profit", height=4, aspect=3.5);
plt.grid();

In [None]:
sns.relplot(data=trades, x="spread", y="total_profit", height=4, aspect=3.5);
plt.grid();

In [None]:
# sns.displot(trades.to_pandas(), x="open_z_score", height=4, aspect=3.5, hue='profit_cat');

In [None]:
df = pl.scan_parquet('./data/full.parquet')

# Выбрасываем столбцы с информацией о спреде, он нам сейчас не нужен
all_cols = df.collect_schema().names()

In [None]:
cols = []
add_data = pl.DataFrame()

for row in tqdm(trades.iter_rows(named=True), total=trades.height):
    ts = row['open_ts']
    t1 = row['token_1']
    t2 = row['token_2']
    w = row['wind']
    tf = row['tf']

    col = [col for col in all_cols if (col.startswith(f"{t1}_{t2}") and col.endswith(f"_{w}_{tf}") and 'spread' in col)]
    cols.extend(col)

In [None]:
cols = list(set(cols))

In [None]:
df = df.filter(pl.col('ts').is_in(start_times)).select(['time', 'ts'] + cols).collect()

In [None]:
add_data = pl.DataFrame()
spr, spr_mean, spr_std = [], [], []

for row in tqdm(trades.iter_rows(named=True), total = trades.height):
    ts = row['open_ts']
    t1 = row['token_1']
    t2 = row['token_2']
    w = row['wind']
    tf = row['tf']

    col = ['ts'] + [col for col in all_cols if (col.startswith(f"{t1}_{t2}") and col.endswith(f"_{w}_{tf}") and 'spread' in col)]
    tr = df.select(col).filter(pl.col('ts') == ts)

    try:
        spr.append(tr[f'{t1}_{t2}_spread_{w}_{tf}'][0])
        spr_mean.append(tr[f'{t1}_{t2}_spread_mean_{w}_{tf}'][0])
        spr_std.append(tr[f'{t1}_{t2}_spread_std_{w}_{tf}'][0])
    except IndexError:
        spr.append(None)
        spr_mean.append(None)
        spr_std.append(None)

In [None]:
# trades = trades.with_columns(
#     pl.Series(spr).alias('spread'),
#     pl.Series(spr_mean).alias('spread_mean'),
#     pl.Series(spr_std).alias('spread_std'),
# )

# trades.write_parquet('./data/trades.parquet')

In [None]:
trades

In [None]:
trades.drop('close_ts')

In [None]:
trades['spread_mean'].min(), trades['spread_mean'].max()

In [None]:
trades.filter(pl.col('side') == 'long').drop('token_1', 'token_2', 'side', 'tf').corr()

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split

import pandas as pd

In [None]:
X = trades.drop("total_profit", 'token_1', 'token_2', 'open_ts', 'close_ts').to_pandas()
y = trades["total_profit"].to_pandas()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(Xs, y, test_size=0.2, random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42).fit(X_train, y_train)

res = permutation_importance(rf, X_test, y_test, n_repeats=30, random_state=42)
imp = pd.Series(res.importances_mean, index=X.columns).sort_values(ascending=False)
print(imp.head(20))

In [None]:
# stats = pl.read_parquet('./data/trades_stats.parquet')
stats = pl.read_ndjson('./logs/backtest_res.jsonl')
stats = stats.with_columns(
    (abs(pl.col('in_1')) + abs(pl.col('out_1'))).alias('dist_1'),
    (abs(pl.col('in_2')) + abs(pl.col('out_2'))).alias('dist_2')
)
stats = stats.drop('duration_min', 'duration_max', 'duration_avg', 'liquidations', 'max_profit')
stats.height

In [None]:
stats.sort(by='profit_ratio', descending=True).head(10)

In [None]:
stats.group_by('tf_1', 'tf_2', 'wind_1', 'wind_2').agg(
    pl.col('profit').mean().alias('mean_run'),
    pl.col('profit').max().alias('best_run'),
    pl.col('profit').min().alias('worst_run'),
    pl.col('max_drawdown').mean().alias('mean_drdn'),
    pl.col('profit').len().alias('n_trades')
).sort(by='mean_run', descending=True).head(10)

In [None]:
n_iters = 10

tf_1 = '4h'
tf_2 = '5m'
wind_1 = 18
wind_2 = 60

in_1 = 2.0
in_2 = 2.0
out_1 = 0.25
out_2 = 0.25

leverage = 2
sl_ratio = 0.1
max_pairs = 5
min_order_size = 40
max_order_size = 50
fee_rate = 0.00055

start_time = datetime(2025, 9, 16, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 9, 26, 0, tzinfo=ZoneInfo("Europe/Moscow"))

# tdf = select_cols_2tf(df, profitable_coins, tf_1=tf_1, wind_1=wind_1, tf_2=tf_2, wind_2=wind_2)
tdf = select_cols_1tf(df, profitable_coins, tf_1, wind_1)

profit_arr = []
profit_ratio_arr = []
max_drawdown_arr = []
n_trades_arr = []

for i in tqdm(range(n_iters)):
    # trades_df, metrics = run_double_tf_backtest(tdf, tf_1, wind_1,
    #                                 tf_2, wind_2, in_1, out_1, in_2, out_2, leverage,
    #                                 max_pairs, max_order_size, fee_rate, start_time, end_time,
    #                                 coin_information)
    trades_df, metrics = run_single_tf_backtest(tdf, tf_1, wind_1, in_1, out_1, leverage, max_pairs, min_order_size, max_order_size,
                           fee_rate, start_time, end_time, sl_ratio, coin_information=coin_information)

    profit_arr.append(metrics['profit'])
    profit_ratio_arr.append(metrics['profit_ratio'])
    max_drawdown_arr.append(metrics['max_drawdown'])
    n_trades_arr.append(metrics['n_trades'])

mean_profit = sum(profit_arr) / n_iters
mean_pr_ratio = sum(profit_ratio_arr) / n_iters
mean_drdn = sum(max_drawdown_arr) / n_iters
mean_n_tr = int(sum(n_trades_arr) / n_iters)

print(f'mean profit: {mean_profit:.1f}; mean pr_ratio: {mean_pr_ratio:.1f}; mean drdwn: {mean_drdn:.1f}; mean n_trades: {mean_n_tr}')

In [None]:
# 6.68 sec / it

In [None]:
# tf_1 = '5m', tf_2 = '1h', wind_1 = 60, wind_2 = 72, in_1 = 2.0, in_2 = 2.0, out_1 = 0.25, out_2 = 0.25
# train: mean profit: 40.9; mean pr_ratio: 3.8; mean drdwn: -11.3; mean n_trades: 56
# test:  mean profit: -3.6; mean pr_ratio: -0.1; mean drdwn: -38.5; mean n_trades: 42

# in_1 = 2.25, in_2 = 2.25,
# train: mean profit: 32.2; mean pr_ratio: 2.9; mean drdwn: -10.9; mean n_trades: 47
# test:  mean profit: -51.5; mean pr_ratio: -1.2; mean drdwn: -63.4; mean n_trades: 41

# tf_1 = '4h', tf_2 = '5m', wind_1 = 18, wind_2 = 180, in_1 = 2.25, in_2 = 2.25, out_1 = 0.25, out_2 = 0.25
# train: mean profit: 40.7; mean pr_ratio: 6.0; mean drdwn: -3.1; mean n_trades: 50
# test:  mean profit: -67.4; mean pr_ratio: -1.2; mean drdwn: -84.6; mean n_trades: 36

# in_1 = 2.0, in_2 = 2.0,
# train: mean profit: 43.1; mean pr_ratio: 3.7; mean drdwn: -12.0; mean n_trades: 61
# test:  mean profit: -66.4; mean pr_ratio: -1.3; mean drdwn: -80.0; mean n_trades: 40

# tf_1 = '4h', tf_2 = '5m', wind_1 = 18, wind_2 = 180, in_1 = 2.25, in_2 = 2.25, out_1 = 2.0, out_2 = 0.25
# train: mean profit: 11.0; mean pr_ratio: 1.2; mean drdwn: -8.2; mean n_trades: 34
# test:  mean profit: -45.4; mean pr_ratio: -1.1; mean drdwn: -62.3; mean n_trades: 32

# tf_1 = '4h', wind_1 = 18, in_1 = 2.0, out_1 = 0.25
# train: mean profit: 24.0; mean pr_ratio: 2.2; mean drdwn: -10.6; mean n_trades: 58
# test:  mean profit: -44.1; mean pr_ratio: -0.8; mean drdwn: -70.4; mean n_trades: 42
# test:  mean profit: 29.1; mean pr_ratio: 1.5; mean drdwn: -26.6; mean n_trades: 46 (only profitable pairs)

In [None]:
trades_df.filter(pl.col('total_profit') > 0).height, trades_df.filter(pl.col('total_profit') < 0).height

In [None]:
trades_df.drop('beta', 'fees').sort(by='total_profit').head(5)

In [None]:
from bot.utils.pair_trading import make_df_from_orderbooks, make_trunc_df, make_zscore_df
from bot.utils.pair_trading import create_zscore_df, get_lr_zscore, get_qty, round_down, calculate_profit_curve, get_thresholds

In [None]:
1974.1 * 0.05 + 4274.0 * 0.023

In [None]:
token_1 = 'C98'
token_2 = 'VET'
qty_1 = 1974.1
qty_2 = 4274.0
side = 'short'


min_order = 40
tf = '4h'
wind = 18
winds = np.array([wind,])

train_time = datetime(2025, 9, 24, 8, 14, tzinfo=ZoneInfo("Europe/Moscow"))
st_t = datetime(2025, 10, 2, 8, 14, 30, tzinfo=ZoneInfo("Europe/Moscow"))
end_t = datetime(2025, 10, 2, 11, 15, 30, tzinfo=ZoneInfo("Europe/Moscow"))
start_ts = int(datetime.timestamp(st_t))
median_length = 4

df_1 = db_manager.get_tick_ob(token=token_1 + '_USDT',
                                     start_time=train_time,
                                     end_time=end_t)
df_2 = db_manager.get_tick_ob(token=token_2 + '_USDT',
                                     start_time=train_time,
                                     end_time=end_t)

avg_df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=train_time)
agg_df = make_trunc_df(avg_df, timeframe='4h', token_1=token_1, token_2=token_2, method='triple', offset='3h')

tick_df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=st_t)
spread_df = create_zscore_df(token_1, token_2, tick_df, agg_df, tf, winds, min_order, start_ts, median_length)

t1_op = tick_df[f'{token_1}_ask_price'][0] if side == 'long' else tick_df[f'{token_1}_bid_price'][0]
t2_op = tick_df[f'{token_2}_bid_price'][0] if side == 'long' else tick_df[f'{token_2}_ask_price'][0]

res_df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
     f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
     f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
     f'z_score_{wind}_{tf}').filter(
        (pl.col('time') >= st_t) & (pl.col('time') <= end_t)
     ).rename({f'z_score_{wind}_{tf}': 'z_score'})
profit_df = calculate_profit_curve(res_df, token_1, token_2, side, t1_op, t2_op, t1_qty=qty_1, t2_qty=qty_2, fee_rate=fee_rate)

In [None]:
fig, (ax1, ax3) = plt.subplots(2, 1, figsize=(14, 6), sharex=True)
# --- Графики цены ---
ax1.plot(tick_df['time'], tick_df[token_1], label=token_1, color='orange')
ax1.set_ylabel('udst')
ax1.legend()
ax1.grid()

ax2 = ax1.twinx()
ax2.plot(tick_df['time'], tick_df[token_2], label=token_2);

ax3.plot(res_df['time'], res_df['z_score'])
ax3.grid()

ax4 = ax3.twinx()
ax4.plot(profit_df['time'], profit_df['profit'], c='g')

plt.legend();

In [None]:
profit_df

In [None]:
metrics

In [None]:
{'total_days': 10.0,
 'n_trades': 55,
 'duration_min': datetime.timedelta(seconds=185),
 'duration_max': datetime.timedelta(days=2, seconds=38660),
 'duration_avg': datetime.timedelta(seconds=67501),
 'stop_losses': 0,
 'liquidations': 0,
 'initial_balance': 200.0,
 'final_balance': 261.3689,
 'profit': 61.37,
 'total_perc_return': 30.68,
 'max_drawdown': -5.14,
 'max_profit': 15.18,
 'max_loss': -4.76,
 'avg_profit': 1.12,
 'profit_std': 2.96,
 'profit_ratio': 7.654}


In [None]:
trades = pl.read_ndjson('./logs/trades_bt.jsonl')


In [None]:
trades.group_by('token_1', 'token_2').agg(
    pl.col('total_profit').mean().alias('mean_run'),
    pl.col('total_profit').max().alias('best_run'),
    pl.col('total_profit').min().alias('worst_run'),
    pl.col('total_profit').len().alias('n_trades')
).sort(by='mean_run', descending=True)#.filter((pl.col('token_1') == 'IOTA') & (pl.col('token_2') == 'SAND'))

In [None]:
# for row in trades.group_by('token_1', 'token_2').agg().iter_rows(named=True):
#     print(row['token_1'], row['token_2'])

#### Анализ совершённых сделок

In [None]:
orders = pl.read_ndjson('./logs/trades.jsonl')

token_1 = 'APT'
token_2 = 'FIL'

ot = "2025-09-16 17:07:14"
ct = "2025-09-16 23:09:17"

open_ = orders.filter((pl.col('token_1') == token_1) & (pl.col('token_2') == token_2) & (pl.col('action') == 'open') & (pl.col('ct') == ot))
close = orders.filter((pl.col('token_1') == token_1) & (pl.col('token_2') == token_2) & (pl.col('action') == 'close') & (pl.col('ct') == ct))

In [None]:
open_.select('ct', 'token_1', 'token_2', 'tf', 'wind', 'thresh_in', 'thresh_out', 'side', 'action',
            't1_bid_price', 't1_ask_price', 't2_bid_price', 't2_ask_price', 'z_score', 'beta')

In [None]:
close.select('ct', 'token_1', 'token_2', 'tf', 'wind', 'thresh_in', 'thresh_out', 'side', 'action',
            't1_bid_price', 't1_ask_price', 't2_bid_price', 't2_ask_price', 'z_score', 'beta')

In [None]:
max_position_size = 200
leverage = 2
tf = open_['tf'][0]
wind = open_['wind'][0]

side = open_['side'][0]
side_2 = 'short' if side == 'long' else 'long'
beta = open_['beta'][0]
t1_open = open_['t1'].to_numpy()[0]
t2_open = open_['t2'].to_numpy()[0]
t1_close = close['t1'].to_numpy()[0]
t2_close = close['t2'].to_numpy()[0]

t1_op = open_['t1_ask_price'][0] if side == 'long' else open_['t1_bid_price'][0]
t2_op = open_['t2_bid_price'][0] if side == 'long' else open_['t2_ask_price'][0]
t1_cl = close['t1_bid_price'][0] if side == 'long' else close['t1_ask_price'][0]
t2_cl = close['t2_ask_price'][0] if side == 'long' else close['t2_bid_price'][0]

In [None]:
make_zscore_df(pl.DataFrame({token_1: t1_open, token_2: t2_open}), token_1, token_2, wind, method='lr').tail(2)

In [None]:
train_len = 80
wind = 10
winds = np.array((wind,))
tf = '4h'
median_length = 6
min_order = 50

train_time = datetime(2025, 9, 12, 17, 0, tzinfo=ZoneInfo("Europe/Moscow"))
start_time = datetime(2025, 9, 16, 17, 7, 14, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 9, 16, 23, 9, 17, tzinfo=ZoneInfo("Europe/Moscow"))

start_ts = int(datetime.timestamp(start_time))

df_1 = db_manager.get_tick_ob(token=token_1 + '_USDT',
                                 start_time=train_time,
                                 end_time=end_time)
df_2 = db_manager.get_tick_ob(token=token_2 + '_USDT',
                                 start_time=train_time,
                                 end_time=end_time)

avg_df = make_df_from_orderbooks(df_1, df_2, token_1, token_2, start_time=train_time)
agg_df = make_trunc_df(avg_df, timeframe='4h', token_1=token_1, token_2=token_2, method='triple', offset='3h')

df_sec = make_trunc_df(avg_df, timeframe='1s', token_1=token_1, token_2=token_2,
                       start_date=start_time - timedelta(seconds=6),
                       end_date = end_time + timedelta(seconds=6),
                       method='last', return_bid_ask=True)
spread_df = create_zscore_df(token_1, token_2, df_sec, agg_df, winds, min_order, start_ts, median_length)

df = spread_df.select('time', 'ts', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
     f'{token_1}_bid_price', f'{token_1}_ask_price', f'{token_1}_bid_size', f'{token_1}_ask_size',
     f'{token_2}_bid_price', f'{token_2}_ask_price', f'{token_2}_bid_size', f'{token_2}_ask_size',
     'z_score').filter(
        (pl.col('time') >= start_time) & (pl.col('time') <= end_time)
     )

In [None]:
spread_df = create_zscore_df(token_1, token_2, df_sec, agg_df, winds, min_order, start_ts, median_length)


In [None]:
t1_op, t1_cl

In [None]:
t2_op, t2_cl

In [None]:
qty_1, qty_2 = get_qty(token_1, token_2, t1_op, t2_op, beta, coin_information, max_position_size * leverage,
                          method='usdt_neutral')
qty_1, qty_2

In [None]:
pr_1 = calculate_profit(open_price=t1_op, close_price=t1_cl, n_coins=qty_1, side=side)
pr_2 = calculate_profit(open_price=t2_op, close_price=t2_cl, n_coins=qty_2, side=side_2)
pr_1, pr_2, pr_1 + pr_2

In [None]:
t1_op, t1_cp

In [None]:
make_zscore_df(pl.DataFrame({token_1: t1_close, token_2: t2_close}), token_1, token_2, wind, method='lr').tail(1)

In [None]:
qty_1 = 10881
qty_2 = 4375

open_spread = -0.000066
close_spread = 0.000021
open_mean = 0.000015
open_std = 0.00005

t1_bid_ask_spread = open_['t1_ask_price'][0] - open_['t1_bid_price'][0]
t2_bid_ask_spread = open_['t2_ask_price'][0] - open_['t2_bid_price'][0]
beta = open_['beta'][0]
z_score = open_['z_score'][0]
fee_rate = 0.00055

In [None]:
# Доход за 1 стандартное отклонение
profit_per_std = qty_1 * open_std
profit_per_std

In [None]:
# Считаем профит
spread_profit = abs(open_spread - (open_mean + 0.5 * open_std))
real_spread_profit = (close_spread - open_spread) * qty_1
spread_profit * qty_1, real_spread_profit

In [None]:
# Комиссия за 4 сделки, каждая на 200$ (100$ с плечом 2)
fees = 4 * 200 * fee_rate

# Bid-ask spread
bid_ask_slippage = t1_bid_ask_spread * qty_1 + t2_bid_ask_spread * qty_2
fees, bid_ask_slippage

#### Обработка больших датафреймов

In [None]:
from tqdm.notebook import tqdm
import polars as pl
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo


In [None]:
token_pairs = []
with open('./data/token_pairs.txt', 'r') as file:
    for line in file:
        a, b = line.strip().split()
        token_pairs.append((a, b))

In [None]:
info_df = pl.read_parquet('./data/pair_backtest/BLUR_KAS_4h_dist.parquet')
info_df['time'][0], info_df['time'][-1]

In [None]:
start_bt_time = datetime(2025, 10, 22, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))
end_time = datetime(2025, 11, 12, 0, 0, tzinfo=ZoneInfo("Europe/Moscow"))
method = 'dist'

In [None]:
# --- Соединение датафреймов со всеми таймфреймами в одном датафрейме по отдельно взятым токенам ---
tf_1 = '4h'
tf_2 = '1h'
# tf_3 = '5m'

time_series = pl.datetime_range(start=start_bt_time, end=end_time, interval="5s", eager=True)
main_df = pl.DataFrame({'time': time_series})

for token_1, token_2 in tqdm(token_pairs):
    cols_to_drop = ['time', token_1, token_2, f'{token_1}_size', f'{token_2}_size',
               f'{token_1}_bid_price', f'{token_2}_bid_price', f'{token_1}_ask_price', f'{token_2}_ask_price',
               f'{token_1}_bid_size', f'{token_2}_bid_size', f'{token_1}_ask_size', f'{token_2}_ask_size']

    try:
        spread_df_1 = pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{tf_1}_{method}.parquet',
                        low_memory=True, rechunk=True, use_pyarrow=True).filter(
                        (pl.col('time') >= start_bt_time) & (pl.col('time') < end_time)
                    )
        spread_df_2 = pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{tf_2}_{method}.parquet',
                            low_memory=True, rechunk=True, use_pyarrow=True).filter(
                            (pl.col('time') >= start_bt_time) & (pl.col('time') < end_time)
                        )
        # spread_df_3 = pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{tf_3}_{method}.parquet',
        #                     low_memory=True, rechunk=True, use_pyarrow=True).filter(
        #                     (pl.col('time') >= start_bt_time) & (pl.col('time') < end_time)
        #                 )

        (spread_df_1
            .join(spread_df_2.drop(cols_to_drop), on='ts', coalesce=True)
            #.join(spread_df_3.drop(cols_to_drop), on='ts', coalesce=True)
            .write_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{method}_full.parquet'))
    except FileNotFoundError:
        continue

In [None]:
pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{method}_full.parquet').tail(3)

In [None]:
# --- Удаление файлов с отдельными таймфреймами после объединения их в один датафрейм ---
import os

for tf in ('4h', '1h', ):
    for token_1, token_2 in token_pairs:

        file_path = f'./data/pair_backtest/{token_1}_{token_2}_{tf}_{method}.parquet'
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f'Удалён: {file_path}')

In [None]:
token_pairs = []
with open('./bot/config/token_pairs.txt', 'r') as file:
    for line in file:
        a, b = line.strip().split()
        token_pairs.append((a, b))
len(token_pairs)

In [None]:
# --- Создание объединённого датафрейма из нескольких ---
tfs = ('4h', '1h')
method = 'dist'

time_series = pl.datetime_range(start=start_bt_time, end=end_time, interval="5s", eager=True)
main_df = pl.DataFrame({'time': time_series})

for token_1, token_2 in token_pairs[:]:
    print(token_1, token_2, end='; ')
    try:
        spread_df = pl.read_parquet(f'./data/pair_backtest/{token_1}_{token_2}_{method}_full.parquet', 
                low_memory=True, rechunk=True, use_pyarrow=True).filter(
                (pl.col('time') >= start_bt_time) & (pl.col('time') < end_time)
            )

        print(spread_df.height)
        cols_to_rn = [col for col in spread_df.columns for tf in tfs if col.endswith(tf)]
        prefix = f"{token_1}_{token_2}_"
        mapping = {c: f"{prefix}{c}" for c in cols_to_rn}
        spread_df = spread_df.rename(mapping)

        cols_to_drop = [col for col in spread_df.columns if col in main_df.columns]
        cols_to_drop.remove('time')
        spread_df = spread_df.drop(cols_to_drop)

        main_df = main_df.join(spread_df, on='time', how='full', coalesce=True)
    except FileNotFoundError:
        print()
        continue

In [None]:
main_df

In [None]:
main_df[1:-1].write_parquet('./data/train_data.parquet')

In [None]:
# --- Код для склеивания по частям двух датафреймов, записанных на диске, в один ---
import polars as pl

df_1 = pl.read_parquet('./data/test_1.parquet', low_memory=True, use_pyarrow=True)
df_2 = pl.read_parquet('./data/test_2.parquet', low_memory=True, use_pyarrow=True)
print(df_1.shape, df_2.shape)

full_df = df_1.join(df_2[:, :500], on='time', how='full', coalesce=True)
full_df.write_parquet(f'./data/full_1.parquet')

cols_to_drop = df_2[:, :500].columns
cols_to_drop.remove('time')
df_2 = df_2.drop(cols_to_drop)
print(full_df.shape, df_2.shape)

df_2.write_parquet(f'./data/full_2.parquet')