In [None]:
import pandas as pd
import numpy as np

df_order_book = pd.read_csv('order_book_snapshot.csv')
df_trades = pd.read_csv('trades.csv')
df_trades['timestamp'] = pd.to_datetime(df_trades['timestamp'], unit='ns')

if not df_trades.empty:
    mean_trade_volume = df_trades['trade_volume'].mean()
    print(f"Avg volume per trade: {mean_trade_volume:.2f}")

    mean_trade_price = df_trades['trade_price'].mean()
    print(f"Avg price per trade: {mean_trade_price:.2f}")

    df_trade_sorted = df_trades.sort_values(by = 'timestamp').copy()
    time_diffs = df_trade_sorted['timestamp'].diff()
    time_diffs_seconds = time_diffs.dt.total_seconds()
    mean_time_diffs_seconds = time_diffs_seconds.mean()

    print(f"Avg time between trades: {mean_time_diffs_seconds:.6f}")

    direction_counts = df_trades['direction'].value_counts
    #print(direction_counts)
    direction_percentage = df_trades['direction'].value_counts(normalize=True)*100
    print(direction_percentage)

    mean_volume_by_direction = df_trades.groupby('direction')['trade_volume'].mean()
    print(mean_volume_by_direction)


    df_buys = df_trade_sorted[df_trade_sorted['direction'] == 'buy']
    if not df_buys.empty:
        time_diffs_buys = df_buys['timestamp'].diff()
        mean_time_between_buys_seconds = time_diffs_buys.dt.total_seconds().mean()
        print(f"Avg time between 'buy': {mean_time_between_buys_seconds:.6f} sec")

    df_sells = df_trade_sorted[df_trade_sorted['direction'] == 'sell']
    if not df_sells.empty:
        time_diffs_sells = df_sells['timestamp'].diff()
        mean_time_between_sells_seconds = time_diffs_sells.dt.total_seconds().mean()
        print(f"Avg time between 'sells': {mean_time_between_sells_seconds:.6f} sec")


    df_trade_sorted_new = df_trades.sort_values(by='timestamp').copy()

    long_phase = []
    short_phase = []

    current_phase = None
    phase_start_time = None

    for index, row in df_trade_sorted_new.iterrows():
        trade_direction = row['direction']
        trade_timestamp = row['timestamp']

        if current_phase is None:
            current_phase = trade_direction
            phase_start_time = trade_timestamp
        
        elif trade_direction != current_phase:
            phase_duration = trade_timestamp - phase_start_time

            if phase_duration.total_seconds() > 0:
                if current_phase == 'buy':
                    long_phase.append(phase_duration.total_seconds())
                elif current_phase == 'sell':
                    short_phase.append(phase_duration.total_seconds())

            current_phase = trade_direction
            phase_start_time = trade_timestamp


    if current_phase is not None and phase_start_time is not None:
        last_phase_duration = df_trade_sorted.iloc[-1]['timestamp'] - phase_start_time
        if last_phase_duration.total_seconds() > 0:
            if current_phase == 'buy':
                long_phase.append(last_phase_duration.total_seconds())
            elif current_phase == 'sell':
                short_phase.append(last_phase_duration.total_seconds())

    if long_phase:
        mean_long_phase_duration = np.mean(long_phase)
        print(f"Avg time of buy phase: {mean_long_phase_duration:.6f} sec")

    if short_phase:
        mean_short_phase_duration = np.mean(short_phase)
        print(f"Avg time of sell phase: {mean_short_phase_duration:.6f} sec")




Avg volume per trade: 51.02
Avg price per trade: 100.00
Avg time between trades: 0.000500
direction
sell    50.25
buy     49.75
Name: proportion, dtype: float64
direction
buy     51.831156
sell    50.226866
Name: trade_volume, dtype: float64
Avg time between 'buy': 0.001006 sec
Avg time between 'sells': 0.000995 sec
Avg time of buy phase: 0.000984 sec
Avg time of sell phase: 0.000995 sec
