In [5]:
import pandas as pd
import numpy as np
import jax
import jax.numpy as jnp
import chex
import sys
sys.path.append('/Users/alishihab/projects/trading/sims/jaxlob/src')
from handlers.orderbook.bybit import init, process, prepare_for_storing, write_book_to_csv
jax.config.update("jax_platform_name", "cpu")

In [2]:
file_path = '/Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.json'

# Initialize the orderbook
asks, bids = init()

batch_size = 5000
ob_levels = 500

# Open and process the file in chunks
with open(file_path, 'r') as f:
    # Read lines in batches of 500
    for i in range(15):
        # Initialize storage array for orderbook snapshots
        all_obs = []
        lines = []
        for _ in range(batch_size):
            line = f.readline()
            if not line:
                break
            lines.append(line)
        
        # If no lines were read, we've reached the end of the file
        if not lines:
            break

        # Process each line in the batch
        for line in lines:
            # Parse the JSON string into a dictionary
            import ujson
            data_dict = ujson.loads(line)
            
            # Process the current data
            asks, bids = process(data_dict, asks, bids, ob_levels)
            
            # Prepare the current state for storing
            ob = prepare_for_storing(asks, bids)
            
            # Add to our collection
            all_obs.append(ob)
        
        all_obs = jnp.stack(all_obs)

        # Concatenate all observations
        if all_obs.shape[0] > 0:
            print(f"iteration {i+1}")
            # Write to CSV
            output_path = file_path.replace('.json', '.csv')
            write_book_to_csv(all_obs, output_path)
            print(f"Wrote {len(all_obs)//(ob_levels*4)} orderbook snapshots to {output_path}")
        else:
            print("No data was processed")


I0000 00:00:1742078031.740851 1267086 service.cc:145] XLA service 0x1206c50d0 initialized for platform METAL (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742078031.740864 1267086 service.cc:153]   StreamExecutor device (0): Metal, <undefined>
I0000 00:00:1742078031.742524 1267086 mps_client.cc:406] Using Simple allocator.
I0000 00:00:1742078031.742538 1267086 mps_client.cc:384] XLA backend will use up to 11452858368 bytes on device 0 for SimpleAllocator.


Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

iteration 1
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.csv
iteration 2
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.csv
iteration 3
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.csv
iteration 4
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.csv
iteration 5
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/2025-03-14_SOLUSDT_ob500_data.csv
iteration 6
Wrote 2 orderbook snapshots to /Users/alishihab/projects/trading/research/reinforcement-learning/

KeyboardInterrupt: 

In [15]:
import ujson
ob_file_path = '/Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/lobs/2025-03-14_SOLUSDT_ob500_data.json'
trade_file_path = '/Users/alishihab/projects/trading/research/reinforcement-learning/data/cex/bybit/sol/trades/SOLUSDT2025-03-14.csv'

# Initialize the orderbook
asks, bids = init()

# Open and process the file in chunks
with open(ob_file_path, 'r') as f_ob:
    df_trade = pd.read_csv(trade_file_path)
    ob_line_1 = f_ob.readline()
    
    ob_line_2 = f_ob.readline()
    ob_dict_1 = ujson.loads(ob_line_1)
    ob_dict_2 = ujson.loads(ob_line_2)
    
    # Process the current data
    asks, bids = process(ob_dict_1, asks, bids, ob_levels)
    ob_start = prepare_for_storing(asks, bids).tolist()

    #########################################################
    # 1 ms difference between ob_start and ob_end
    #########################################################

    asks_2, bids_2 = process(ob_dict_2, asks, bids, ob_levels)
    ob_end = prepare_for_storing(asks_2, bids_2).tolist()

    intermediate_ob = []
    for _, row in df_trade.iterrows():
        if ob_dict_2['ts'] < row['timestamp'] * 1000:
            break
        print(row['price'], ob_start[2])
        delta_ob = []
        for i in range(len(ob_start)):
            if i % 2 == 0 and ob_start[i] == row['price']:
                delta_ob = ob_start
                delta_ob[i+1] += (1 if row['side'] == 'Buy' else -1) * row['size']
                intermediate_ob.append(delta_ob)
                ob_start = delta_ob
                break

    intermediate_ob = jnp.stack(intermediate_ob)

    # Check if the final array in intermediate_ob is identical to ob_end
    if len(intermediate_ob) > 0:
        final_ob = intermediate_ob[-1]
        
        # Calculate similarity metrics
        absolute_diff = jnp.abs(final_ob - jnp.array(ob_end))
        max_diff = jnp.max(absolute_diff)
        mean_diff = jnp.mean(absolute_diff)
        is_identical = jnp.array_equal(final_ob, jnp.array(ob_end))
        
        # Print results
        print(f"Are the arrays identical? {is_identical}")
        print(f"Maximum absolute difference: {max_diff}")
        print(f"Mean absolute difference: {mean_diff}")
        
        # Visualize the differences
        import matplotlib.pyplot as plt
        
        # Create a figure with two subplots
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
        
        # Plot the values side by side
        x = range(len(ob_end))
        ax1.plot(x, final_ob, 'b-', label='Final Intermediate OB')
        ax1.plot(x, ob_end, 'r--', label='OB End')
        ax1.set_title('Comparison of Final Intermediate OB and OB End')
        ax1.legend()
        ax1.set_xlabel('Index')
        ax1.set_ylabel('Value')
        
        # Plot the absolute differences
        ax2.bar(x, absolute_diff)
        ax2.set_title('Absolute Differences Between Arrays')
        ax2.set_xlabel('Index')
        ax2.set_ylabel('Absolute Difference')
        
        plt.tight_layout()
        plt.show()
    else:
        print("No intermediate orderbooks were generated")






123.31 123.31999969482422
123.32 123.31999969482422
123.32 123.31999969482422
123.32 123.31999969482422
123.32 123.31999969482422
123.32 123.31999969482422
123.31 123.31999969482422
123.32 123.31999969482422
123.31 123.31999969482422
123.32 123.31999969482422
123.31 123.31999969482422
123.32 123.31999969482422
123.32 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.33 123.31999969482422
123.32 123.31999969482422
123.33 123.31999969482422


ValueError: Need at least one array to stack.