In [6]:
import requests
import polars as pl
from pathlib import Path
import zipfile
import io
import gc

class BinanceFuturesDataDownloader:
    """
    Télécharge aggTrades Binance USDT-M Futures (daily).
    """
    BASE_URL = "https://data.binance.vision/data/futures/um"
    
    def __init__(self, data_dir="./data"):
        self.data_dir = Path(data_dir)
        self.data_dir.mkdir(exist_ok=True)
    
    def download_aggtrades_daily(self, symbol, year, month, day):
        month_str = f"{month:02d}"
        day_str = f"{day:02d}"
        
        url = f"{self.BASE_URL}/daily/aggTrades/{symbol}/{symbol}-aggTrades-{year}-{month_str}-{day_str}.zip"
        
        csv_filename = f"{symbol}-aggTrades-{year}-{month_str}-{day_str}.csv"
        csv_path = self.data_dir / csv_filename
        
        if csv_path.exists():
            print(f"✓ Existe: {csv_filename}")
            return csv_path
        
        print(f"Téléchargement: {url}")
        response = requests.get(url, timeout=300)
        
        if response.status_code != 200:
            raise ValueError(f"Erreur {response.status_code}")
        
        with zipfile.ZipFile(io.BytesIO(response.content)) as z:
            z.extractall(self.data_dir)
        
        file_size_mb = csv_path.stat().st_size / 1e6
        print(f"✓ Téléchargé: {csv_filename} ({file_size_mb:.1f} MB)")
        return csv_path
    
    def download_multiple_days(self, symbol, year, month, start_day, end_day):
        """
        Télécharge plusieurs jours consécutifs.
        """
        files = []
        
        for day in range(start_day, end_day + 1):
            try:
                filepath = self.download_aggtrades_daily(symbol, year, month, day)
                files.append(filepath)
            except Exception as e:
                print(f"✗ Jour {day}: {e}")
        
        return files
    
    def load_aggtrades_csv(self, filepath):
        """
        Charge CSV aggTrades futures avec Polars.
        """
        df = pl.read_csv(filepath, has_header=True)
        df = self._preprocess(df)
        gc.collect()
        return df
    
    def _preprocess(self, df: pl.DataFrame) -> pl.DataFrame:
        """
        Prépare les données brutes pour l'analyse.
        """
        return df.with_columns(
            pl.from_epoch(pl.col('transact_time'), time_unit='ms').alias('timestamp'),
            pl.when(~pl.col('is_buyer_maker')).then(pl.col('quantity')).otherwise(0).alias('buy_volume'),
            pl.when(pl.col('is_buyer_maker')).then(pl.col('quantity')).otherwise(0).alias('sell_volume'),
            (pl.col('price') * pl.col('quantity')).alias('dollar_volume')
        ).with_columns(
                pl.col('quantity').cum_sum().alias('cumulative_volume'),
                pl.col('dollar_volume').cum_sum().alias('cumulative_dollar')
            )
        



In [None]:
downloader = BinanceFuturesDataDownloader()
    
csv_path = downloader.download_aggtrades_daily("BTCUSDT", 2024, 12, 25)
df = downloader.load_aggtrades_csv(csv_path)

gc.collect()

In [None]:
rolling_window = 1000
threshold_multiplier = 2
df.with_columns(
    pl.col('quantity').rolling_mean(rolling_window, min_samples=100).alias('qty_ma')
    ).with_columns([
                (pl.col('quantity') > pl.col('qty_ma') * threshold_multiplier).alias('is_big_trade')
            ])

In [None]:
class BarMethods:
    
    def __init__(self, df: pl.DataFrame, bar_type: str = 'time', bar_params: dict = None):
        self.df_raw = df
        self.bar_type = bar_type
        self.bar_params = bar_params or {}
        self._time_bar()

    def _time_bar(self):
        self.df_agg = (
            self.df_raw
            .group_by_dynamic('timestamp', every=self.bar_params.get('time_window', '5m'))
            .agg([
                pl.col('price').first().alias('open'),
                pl.col('price').max().alias('high'),
                pl.col('price').min().alias('low'),
                pl.col('price').last().alias('close'),
                pl.col('quantity').sum().alias('volume'),
                pl.col('buy_volume').sum().alias('buy_volume'),
                pl.col('sell_volume').sum().alias('sell_volume'),
            ])
            .with_columns([
                (pl.col('buy_volume') - pl.col('sell_volume')).alias('delta'),
                (pl.col('buy_volume') + pl.col('sell_volume')).alias('total_volume')
            ])
            .with_columns([
                (pl.col('delta') / pl.col('total_volume')).alias('imbalance'),
                pl.col('delta').cum_sum().alias('cvd')
            ])
        )
        
        
        



class OrderFlowIndicators:
    
    def __init__(self, df: pl.DataFrame):
        self._prepare_data(df)

    def _prepare_data(self, df: pl.DataFrame):
        bar_method = BarMethods(df, bar_type='time', bar_params={'time_window': '30m'})
        self.df = bar_method.df_agg



In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

indicator = OrderFlowIndicators(df)
data = indicator.df

data.head()

In [None]:
fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
            subplot_titles=('Price (OHLC)', 'Delta Volume', 'Imbalance', 'CVD'),
            vertical_spacing=0.04, row_heights=[0.35, 0.2, 0.2, 0.25]
        )
        
# 1. Price candlestick
fig.add_trace(
            go.Candlestick(
                x=data['timestamp'],
                open=data['open'], high=data['high'],
                low=data['low'], close=data['close'],
                name='Price',
                increasing_line_color='green',
                decreasing_line_color='red'
            ),
            row=1, col=1
        )
        
# 2. Delta Volume
colors = ['red' if x < 0 else 'green' for x in data['delta']]
fig.add_trace(
            go.Bar(
                x=data['timestamp'], y=data['delta'],
                name='Delta',
                marker_color=colors,
                showlegend=False
            ),
            row=2, col=1
        )
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
        
# 3. Imbalance
fig.add_trace(
            go.Scatter(
                x=data['timestamp'], y=data['imbalance'],
                name='Imbalance',
                fill='tozeroy',
                line=dict(color='blue'),
                showlegend=False
            ),
            row=3, col=1
        )
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=3, col=1)
        
# 4. CVD
fig.add_trace(
            go.Scatter(
                x=data['timestamp'], y=data['cvd'],
                name='CVD',
                line=dict(color='purple', width=2),
                showlegend=False
            ),
            row=4, col=1
        )
        
fig.update_layout(
            height=1000,
            title_text="Order Flow Analysis ",
            xaxis_rangeslider_visible=False
        )
        
fig.update_xaxes(rangeslider_visible=False)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=data['close'], y=data['buy_volume'], nbinsx=100))
fig.add_trace(go.Histogram(x=data['close'], y=data['sell_volume'], nbinsx=100))
fig

In [None]:
e e e

In [None]:
import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ========================
# ORDER FLOW INDICATORS
# ========================

class OrderFlowIndicators:
    def __init__(self, df: pl.DataFrame, window: str = '30s'):
        """
        Args:
            df: Raw tick data with columns [timestamp, price, quantity, is_buyer_maker]
            window: Aggregation window for OHLC and flow indicators
        """
        self.df_raw = df
        self.window = window
        self._preprocess()
    
    def _preprocess(self):
        """Prepare tick-level and aggregated data."""
        # Add buy/sell volume to raw ticks
        self.df_raw = self.df_raw.with_columns([
            pl.when(~pl.col('is_buyer_maker')).then(pl.col('quantity')).otherwise(0).alias('buy_volume'),
            pl.when(pl.col('is_buyer_maker')).then(pl.col('quantity')).otherwise(0).alias('sell_volume'),
        ])
        
        # Aggregate to window-level
        self.df_agg = (
            self.df_raw
            .group_by_dynamic('timestamp', every=self.window)
            .agg([
                # OHLCV
                pl.col('price').first().alias('open'),
                pl.col('price').max().alias('high'),
                pl.col('price').min().alias('low'),
                pl.col('price').last().alias('close'),
                pl.col('quantity').sum().alias('volume'),
                # Flow
                pl.col('buy_volume').sum().alias('buy_volume'),
                pl.col('sell_volume').sum().alias('sell_volume'),
            ])
            .with_columns([
                (pl.col('buy_volume') - pl.col('sell_volume')).alias('delta'),
                (pl.col('buy_volume') + pl.col('sell_volume')).alias('total_volume')
            ])
            .with_columns([
                (pl.col('delta') / pl.col('total_volume')).alias('imbalance'),
                pl.col('delta').cum_sum().alias('cvd')
            ])
        )
    
    def ohlcv(self) -> pl.DataFrame:
        """Return OHLCV data."""
        return self.df_agg.select(['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    
    def delta_imbalance_cvd(self) -> pl.DataFrame:
        """Return delta, imbalance, CVD."""
        return self.df_agg.select(['timestamp', 'buy_volume', 'sell_volume', 'delta', 'imbalance', 'cvd'])
    
    def volume_profile(self, price_precision: int = 1) -> pl.DataFrame:
        """Volume distribution by price level (uses raw ticks)."""
        return (
            self.df_raw
            .with_columns(pl.col('price').round(price_precision).alias('price_level'))
            .group_by('price_level')
            .agg(pl.col('quantity').sum().alias('total_volume'))
            .sort('price_level')
        )
    
    def volume_profile_from_agg(self, price_precision: int = 1) -> pl.DataFrame:
        """Volume profile calculé depuis les données agrégées (léger)."""
        # Créer deux DataFrames séparés pour open et close
        df_open = (
            self.df_agg
            .select([
                pl.col('open').round(price_precision).alias('price_level'),
                (pl.col('volume') / 2).alias('volume')
            ])
        )
        
        df_close = (
            self.df_agg
            .select([
                pl.col('close').round(price_precision).alias('price_level'),
                (pl.col('volume') / 2).alias('volume')
            ])
        )
        
        # Concaténer verticalement et agréger
        return (
            pl.concat([df_open, df_close])
            .group_by('price_level')
            .agg(pl.col('volume').sum().alias('total_volume'))
            .sort('price_level')
        )
    
    def big_trades(self, threshold_multiplier: float = 2.0, rolling_window: int = 100) -> pl.DataFrame:
        """Detect big trades (uses raw ticks)."""
        return (
            self.df_raw
            .with_columns(pl.col('quantity').rolling_mean(rolling_window).alias('qty_ma'))
            .with_columns([
                (pl.col('quantity') > pl.col('qty_ma') * threshold_multiplier).alias('is_big_trade'),
                pl.when(~pl.col('is_buyer_maker'))
                  .then(pl.lit('BUY'))
                  .otherwise(pl.lit('SELL'))
                  .alias('side')
            ])
            .filter(pl.col('is_big_trade'))
        )
    
    def big_candles(self, volume_threshold_multiplier: float = 2.0) -> pl.DataFrame:
        """Détecte les grosses bougies au lieu des gros trades (léger)."""
        return (
            self.df_agg
            .with_columns(pl.col('volume').mean().alias('avg_volume'))
            .filter(pl.col('volume') > pl.col('avg_volume') * volume_threshold_multiplier)
            .with_columns([
                pl.when(pl.col('delta') > 0)
                  .then(pl.lit('BUY'))
                  .otherwise(pl.lit('SELL'))
                  .alias('side')
            ])
        )
    
    def footprint(self, price_precision: int = 1) -> pl.DataFrame:
        """Footprint chart: buy/sell volume by price and time (uses raw ticks)."""
        return (
            self.df_raw
            .with_columns(pl.col('price').round(price_precision).alias('price_level'))
            .group_by_dynamic('timestamp', every=self.window)
            .agg([
                pl.col('price_level'),
                pl.col('buy_volume'),
                pl.col('sell_volume')
            ])
            .explode(['price_level', 'buy_volume', 'sell_volume'])
            .group_by(['timestamp', 'price_level'])
            .agg([
                pl.col('buy_volume').sum(),
                pl.col('sell_volume').sum()
            ])
            .with_columns((pl.col('buy_volume') - pl.col('sell_volume')).alias('delta'))
            .sort(['timestamp', 'price_level'])
        )


# ========================
# ORDER FLOW VISUALIZER
# ========================

class OrderFlowVisualizer:
    def __init__(self, indicators: OrderFlowIndicators):
        self.indicators = indicators
    
    def plot_overview(self):
        """Plot complete overview: Price + Delta + Imbalance + CVD."""
        ohlcv = self.indicators.ohlcv()
        flow = self.indicators.delta_imbalance_cvd()
        
        # Merge on timestamp
        df = ohlcv.join(flow, on='timestamp')
        
        fig = make_subplots(
            rows=4, cols=1,
            shared_xaxes=True,
            subplot_titles=('Price (OHLC)', 'Delta Volume', 'Imbalance', 'CVD'),
            vertical_spacing=0.04,
            row_heights=[0.35, 0.2, 0.2, 0.25]
        )
        
        # 1. Price candlestick
        fig.add_trace(
            go.Candlestick(
                x=df['timestamp'],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'],
                name='Price',
                increasing_line_color='green',
                decreasing_line_color='red'
            ),
            row=1, col=1
        )
        
        # 2. Delta Volume
        colors = ['red' if x < 0 else 'green' for x in df['delta']]
        fig.add_trace(
            go.Bar(
                x=df['timestamp'],
                y=df['delta'],
                name='Delta',
                marker_color=colors,
                showlegend=False
            ),
            row=2, col=1
        )
        fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
        
        # 3. Imbalance
        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['imbalance'],
                name='Imbalance',
                fill='tozeroy',
                line=dict(color='blue'),
                showlegend=False
            ),
            row=3, col=1
        )
        fig.add_hline(y=0, line_dash="dash", line_color="gray", row=3, col=1)
        
        # 4. CVD
        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['cvd'],
                name='CVD',
                line=dict(color='purple', width=2),
                showlegend=False
            ),
            row=4, col=1
        )
        
        fig.update_layout(
            height=1000,
            title_text=f"Order Flow Analysis ({self.indicators.window})",
            xaxis_rangeslider_visible=False
        )
        
        fig.update_xaxes(rangeslider_visible=False)
        fig.show()
    
    def plot_volume_profile(self, price_precision: int = 1):
        """Plot Volume Profile (LOURD - utilise raw ticks)."""
        vp = self.indicators.volume_profile(price_precision)
        
        fig = go.Figure()
        
        fig.add_trace(go.Bar(
            x=vp['total_volume'],
            y=vp['price_level'],
            orientation='h',
            marker_color='rgba(55, 128, 191, 0.7)',
            name='Volume'
        ))
        
        fig.update_layout(
            title='Volume Profile',
            xaxis_title='Total Volume',
            yaxis_title='Price Level',
            height=600,
            showlegend=False
        )
        fig.show()
    
    def plot_volume_profile_light(self, price_precision: int = 1):
        """Volume Profile léger (depuis agrégation)."""
        vp = self.indicators.volume_profile_from_agg(price_precision)
        
        fig = go.Figure()
        fig.add_trace(go.Bar(
            x=vp['total_volume'],
            y=vp['price_level'],
            orientation='h',
            marker_color='rgba(55, 128, 191, 0.7)',
            name='Volume'
        ))
        
        fig.update_layout(
            title=f'Volume Profile (Aggregated - {self.indicators.window})',
            xaxis_title='Total Volume',
            yaxis_title='Price Level',
            height=600,
            showlegend=False
        )
        fig.show()
    
    def plot_big_trades(self, threshold_multiplier: float = 2.0, rolling_window: int = 100):
        """Plot Big Trades overlaid on price (LOURD - utilise raw ticks)."""
        ohlcv = self.indicators.ohlcv()
        big = self.indicators.big_trades(threshold_multiplier, rolling_window)
        
        fig = go.Figure()
        
        # Price line (using close)
        fig.add_trace(go.Scatter(
            x=ohlcv['timestamp'],
            y=ohlcv['close'],
            mode='lines',
            name='Price',
            line=dict(color='lightgray', width=1)
        ))
        
        # Big Buys
        big_buys = big.filter(pl.col('side') == 'BUY')
        if len(big_buys) > 0:
            fig.add_trace(go.Scatter(
                x=big_buys['timestamp'],
                y=big_buys['price'],
                mode='markers',
                name='Big Buy',
                marker=dict(
                    size=big_buys['quantity'] * 100,
                    color='green',
                    opacity=0.6,
                    symbol='triangle-up',
                    line=dict(color='darkgreen', width=1)
                )
            ))
        
        # Big Sells
        big_sells = big.filter(pl.col('side') == 'SELL')
        if len(big_sells) > 0:
            fig.add_trace(go.Scatter(
                x=big_sells['timestamp'],
                y=big_sells['price'],
                mode='markers',
                name='Big Sell',
                marker=dict(
                    size=big_sells['quantity'] * 100,
                    color='red',
                    opacity=0.6,
                    symbol='triangle-down',
                    line=dict(color='darkred', width=1)
                )
            ))
        
        fig.update_layout(
            title='Big Trades Detection',
            xaxis_title='Time',
            yaxis_title='Price',
            height=600
        )
        fig.show()
    
    def plot_big_candles(self, volume_threshold: float = 2.0):
        """Big candles au lieu de big trades (léger)."""
        ohlcv = self.indicators.ohlcv()
        big = self.indicators.big_candles(volume_threshold)
        
        fig = go.Figure()
        
        # All candles
        fig.add_trace(go.Candlestick(
            x=ohlcv['timestamp'],
            open=ohlcv['open'],
            high=ohlcv['high'],
            low=ohlcv['low'],
            close=ohlcv['close'],
            name='Price',
            increasing_line_color='lightgreen',
            decreasing_line_color='lightcoral'
        ))
        
        # Big buy candles
        big_buys = big.filter(pl.col('side') == 'BUY')
        if len(big_buys) > 0:
            fig.add_trace(go.Scatter(
                x=big_buys['timestamp'],
                y=big_buys['high'],
                mode='markers',
                name='Big Buy Candle',
                marker=dict(size=12, color='green', symbol='triangle-up')
            ))
        
        # Big sell candles
        big_sells = big.filter(pl.col('side') == 'SELL')
        if len(big_sells) > 0:
            fig.add_trace(go.Scatter(
                x=big_sells['timestamp'],
                y=big_sells['low'],
                mode='markers',
                name='Big Sell Candle',
                marker=dict(size=12, color='red', symbol='triangle-down')
            ))
        
        fig.update_layout(
            title='Big Volume Candles',
            xaxis_title='Time',
            yaxis_title='Price',
            height=600,
            xaxis_rangeslider_visible=False
        )
        fig.show()
    
    def plot_footprint(self, price_precision: int = 1, max_candles: int = 50):
        """Plot Footprint heatmap (LOURD - utilise raw ticks)."""
        fp = self.indicators.footprint(price_precision)
        
        # Limit to last N candles
        timestamps = fp['timestamp'].unique().sort()[-max_candles:]
        fp_filtered = fp.filter(pl.col('timestamp').is_in(timestamps))
        
        # Pivot for heatmap
        pivot = fp_filtered.pivot(
            values='delta',
            index='price_level',
            columns='timestamp'
        ).sort('price_level', descending=True)
        
        # Get matrix values
        z_values = pivot.select(pl.all().exclude('price_level')).to_numpy()
        
        fig = go.Figure(data=go.Heatmap(
            z=z_values,
            x=pivot.columns[1:],
            y=pivot['price_level'].to_list(),
            colorscale='RdYlGn',
            zmid=0,
            colorbar=dict(title="Delta"),
            hoverongaps=False
        ))
        
        fig.update_layout(
            title=f'Footprint Chart ({self.indicators.window})',
            xaxis_title='Time',
            yaxis_title='Price Level',
            height=800
        )
        fig.show()



In [None]:

# ========================
# USAGE
# ========================

# Créer les indicateurs
indicators = OrderFlowIndicators(df, window='5m')

# Créer le visualiseur
viz = OrderFlowVisualizer(indicators)

# Vue d'ensemble
viz.plot_overview()

# Versions légères (RECOMMANDÉ)
viz.plot_big_candles(volume_threshold=2.0)


In [None]:
# Versions lourdes (À ÉVITER avec 3.7M ticks)
# viz.plot_volume_profile(price_precision=1)
# viz.plot_big_trades(threshold_multiplier=2.5, rolling_window=100)
# viz.plot_footprint(price_precision=1, max_candles=10)

In [None]:
stop 

In [None]:
import polars as pl

class AlternativeBars:
    """Implémentation des bars alternatifs de López de Prado."""
    
    def __init__(self, df: pl.DataFrame):
        """
        Args:
            df: Raw tick data with [timestamp, price, quantity, is_buyer_maker]
        """
        self.df = df
    
    def tick_bars(self, tick_threshold: int = 1000) -> pl.DataFrame:
        """
        Tick Bars: nouvelle barre toutes les N trades.
        
        Args:
            tick_threshold: Nombre de trades par barre
        """
        return (
            self.df
            .with_columns([
                (pl.int_range(pl.len()) // tick_threshold).alias('bar_id')
            ])
            .group_by('bar_id')
            .agg([
                pl.col('timestamp').first().alias('timestamp'),
                pl.col('price').first().alias('open'),
                pl.col('price').max().alias('high'),
                pl.col('price').min().alias('low'),
                pl.col('price').last().alias('close'),
                pl.col('quantity').sum().alias('volume'),
                pl.len().alias('tick_count')
            ])
            .drop('bar_id')
        )
    
    def volume_bars(self, volume_threshold: float = 100.0) -> pl.DataFrame:
        """
        Volume Bars: nouvelle barre quand volume cumulé atteint seuil.
        
        Args:
            volume_threshold: Volume cumulé par barre (en unités, ex: BTC)
        """
        df_with_cumsum = self.df.with_columns(
            pl.col('quantity').cum_sum().alias('cumulative_volume')
        )
        
        df_with_bar = df_with_cumsum.with_columns(
            (pl.col('cumulative_volume') // volume_threshold).cast(pl.Int64).alias('bar_id')
        )
        
        return (
            df_with_bar
            .group_by('bar_id')
            .agg([
                pl.col('timestamp').first().alias('timestamp'),
                pl.col('price').first().alias('open'),
                pl.col('price').max().alias('high'),
                pl.col('price').min().alias('low'),
                pl.col('price').last().alias('close'),
                pl.col('quantity').sum().alias('volume'),
                pl.len().alias('tick_count')
            ])
            .drop('bar_id')
        )
    
    def dollar_bars(self, dollar_threshold: float = 1000000.0) -> pl.DataFrame:
        """
        Dollar Bars: nouvelle barre quand dollar volume cumulé atteint seuil.
        
        Args:
            dollar_threshold: Dollar volume par barre (ex: $1M)
        """
        df_with_dollar = self.df.with_columns(
            (pl.col('price') * pl.col('quantity')).alias('dollar_volume')
        )
        
        df_with_cumsum = df_with_dollar.with_columns(
            pl.col('dollar_volume').cum_sum().alias('cumulative_dollar')
        )
        
        df_with_bar = df_with_cumsum.with_columns(
            (pl.col('cumulative_dollar') // dollar_threshold).cast(pl.Int64).alias('bar_id')
        )
        
        return (
            df_with_bar
            .group_by('bar_id')
            .agg([
                pl.col('timestamp').first().alias('timestamp'),
                pl.col('price').first().alias('open'),
                pl.col('price').max().alias('high'),
                pl.col('price').min().alias('low'),
                pl.col('price').last().alias('close'),
                pl.col('quantity').sum().alias('volume'),
                pl.col('dollar_volume').sum().alias('dollar_volume'),
                pl.len().alias('tick_count')
            ])
            .drop('bar_id')
        )

In [None]:
# Créer les bars alternatifs
alt_bars = AlternativeBars(df)

# Tick Bars (toutes les 1000 trades)
tick_bars = alt_bars.tick_bars(tick_threshold=1000)

# Volume Bars (tous les 50 BTC)
volume_bars = alt_bars.volume_bars(volume_threshold=50.0)

# Dollar Bars (tous les $5M) ⭐
dollar_bars = alt_bars.dollar_bars(dollar_threshold=5_000_000.0*30)

print(f"Original: {len(df)} ticks")
print(f"Time bars (5m): {len(indicators.df_agg)} bars")
print(f"Dollar bars ($5M): {len(dollar_bars)} bars")

# Utiliser dollar_bars avec OrderFlowIndicators
indicators_dollar = OrderFlowIndicators(df, window='5m')  # On peut adapter

In [None]:
# Analyser le dollar volume
total_dollar_volume = (df['price'] * df['quantity']).sum()
duration_hours = (df['timestamp'].max() - df['timestamp'].min()).total_seconds() / 3600

print(f"Analyse du dollar volume:")
print(f"- Total dollar volume: ${total_dollar_volume:,.0f}")
print(f"- Durée: {duration_hours:.1f} heures")
print(f"- Dollar volume/heure: ${total_dollar_volume/duration_hours:,.0f}")
print(f"- Dollar volume par 5min: ${total_dollar_volume/(duration_hours*12):,.0f}")

# Nombre de barres Time (5m)
num_time_bars = len(indicators.df_agg)
avg_dollar_per_time_bar = total_dollar_volume / num_time_bars

print(f"\nCalibration:")
print(f"- Nombre de time bars (5m): {num_time_bars}")
print(f"- Dollar volume moyen par time bar: ${avg_dollar_per_time_bar:,.0f}")
print(f"\nSeuils suggérés:")
print(f"- Conservative (même nombre de barres): ${avg_dollar_per_time_bar:,.0f}")
print(f"- 2x plus de barres: ${avg_dollar_per_time_bar/2:,.0f}")
print(f"- 3x plus de barres: ${avg_dollar_per_time_bar/3:,.0f}")

# Tester avec le seuil calibré
dollar_threshold = avg_dollar_per_time_bar  # ou /2, /3 selon ce que tu veux
dollar_bars = alt_bars.dollar_bars(dollar_threshold=dollar_threshold)
print(f"\nDollar bars générées: {len(dollar_bars)}")

In [None]:
# Test avec 3 seuils différents
threshold_1x = 131_799_915  # Même nombre que time bars
threshold_2x = 65_899_958   # 2x plus de barres
threshold_3x = 43_933_305   # 3x plus de barres

dollar_bars_1x = alt_bars.dollar_bars(dollar_threshold=threshold_1x)
dollar_bars_2x = alt_bars.dollar_bars(dollar_threshold=threshold_2x)
dollar_bars_3x = alt_bars.dollar_bars(dollar_threshold=threshold_3x)

print(f"Résultats:")
print(f"- Time bars (5m): {len(indicators.df_agg)} barres")
print(f"- Dollar bars (1x): {len(dollar_bars_1x)} barres")
print(f"- Dollar bars (2x): {len(dollar_bars_2x)} barres")
print(f"- Dollar bars (3x): {len(dollar_bars_3x)} barres")

# Visualiser avec le seuil 2x (plus granulaire pour scalping)
compare_bars(indicators.ohlcv(), dollar_bars_1x)

In [None]:
# Créer les bars
alt_bars = AlternativeBars(df)
dollar_bars = alt_bars.dollar_bars(dollar_threshold=5_000_000.0)  # $5M par barre

# Visualisation comparative
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def compare_bars(time_bars_df, dollar_bars_df):
    """Compare Time Bars vs Dollar Bars."""
    
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=False,
        subplot_titles=('Time Bars (5m)', 'Dollar Bars ($5M)'),
        vertical_spacing=0.1,
        row_heights=[0.5, 0.5]
    )
    
    # Time Bars
    fig.add_trace(
        go.Candlestick(
            x=time_bars_df['timestamp'],
            open=time_bars_df['open'],
            high=time_bars_df['high'],
            low=time_bars_df['low'],
            close=time_bars_df['close'],
            name='Time Bars',
            increasing_line_color='green',
            decreasing_line_color='red'
        ),
        row=1, col=1
    )
    
    # Dollar Bars
    fig.add_trace(
        go.Candlestick(
            x=dollar_bars_df['timestamp'],
            open=dollar_bars_df['open'],
            high=dollar_bars_df['high'],
            low=dollar_bars_df['low'],
            close=dollar_bars_df['close'],
            name='Dollar Bars',
            increasing_line_color='green',
            decreasing_line_color='red'
        ),
        row=2, col=1
    )
    
    fig.update_layout(
        height=800,
        title_text="Time Bars vs Dollar Bars Comparison",
        xaxis_rangeslider_visible=False,
        xaxis2_rangeslider_visible=False
    )
    
    fig.show()

# Comparer
compare_bars(indicators.ohlcv(), dollar_bars)