In [2]:
import pandas as pd 
import numpy as np
import time

def generate_single_order_book_row(base_timestamp, current_base_bid, current_base_ask, row_index):
    row = {}
    row['timestamp'] = base_timestamp + row_index * 100000 

    for j in range(1,11):
        bid_price = round(current_base_bid - (j - 1) * 0.01 - np.random.rand() * 0.001, 4)
        bid_volume = np.random.randint(10,500)
        row[f'bid_price_{j}'] = bid_price
        row[f'bid_volume_{j}'] = bid_volume

        ask_price = round(current_base_ask + (j - 1) * 0.01 + np.random.rand() * 0.001, 4)
        ask_volume = np.random.randint(10, 500)
        row[f'ask_price_{j}'] = ask_price 
        row[f'ask_volume_{j}'] = ask_volume 

    row['spread'] = round(row['ask_price_1']- row['bid_price_1'], 4)

    return row


initial_timestamp = int(time.time() * 1e9)

initial_base_bid = 100.00
initial_base_ask = 100.05

single_row_data = generate_single_order_book_row(initial_timestamp, initial_base_bid,  initial_base_ask, 0)
print(single_row_data)


{'timestamp': 1751798540998823936, 'bid_price_1': 99.9999, 'bid_volume_1': 314, 'ask_price_1': 100.0503, 'ask_volume_1': 128, 'bid_price_2': 99.9895, 'bid_volume_2': 96, 'ask_price_2': 100.0606, 'ask_volume_2': 140, 'bid_price_3': 99.9798, 'bid_volume_3': 338, 'ask_price_3': 100.0706, 'ask_volume_3': 494, 'bid_price_4': 99.9699, 'bid_volume_4': 359, 'ask_price_4': 100.0803, 'ask_volume_4': 369, 'bid_price_5': 99.9597, 'bid_volume_5': 167, 'ask_price_5': 100.0908, 'ask_volume_5': 459, 'bid_price_6': 99.9494, 'bid_volume_6': 412, 'ask_price_6': 100.1002, 'ask_volume_6': 123, 'bid_price_7': 99.9393, 'bid_volume_7': 128, 'ask_price_7': 100.1108, 'ask_volume_7': 192, 'bid_price_8': 99.9298, 'bid_volume_8': 353, 'ask_price_8': 100.121, 'ask_volume_8': 317, 'bid_price_9': 99.9195, 'bid_volume_9': 147, 'ask_price_9': 100.1301, 'ask_volume_9': 55, 'bid_price_10': 99.9098, 'bid_volume_10': 232, 'ask_price_10': 100.1406, 'ask_volume_10': 479, 'spread': 0.0504}


In [3]:
def generate_order_book_data(num_rows = 1000):

    all_data = []
    base_timestamp = int(time.time() * 1e9) 
    current_base_bid = 100.00
    current_base_ask = 100.05

    for i in range(num_rows):
        row = generate_single_order_book_row(base_timestamp, current_base_bid, current_base_ask, i)
        all_data.append(row)

        current_base_bid += np.random.uniform(-0.005, 0.005)
        current_base_ask += np.random.uniform(-0.005, 0.005)

        if current_base_ask <= current_base_bid:
            current_base_ask = current_base_bid + np.random.uniform(0.01, 0.05)
    
    df_order_book = pd.DataFrame(all_data)
    return df_order_book

df_order_book = generate_order_book_data(num_rows=5000)

df_order_book.to_csv('order_book_snapshot.csv', index = False)

print(df_order_book)

                timestamp  bid_price_1  bid_volume_1  ask_price_1  \
0     1751798544260275968      99.9994           229     100.0505   
1     1751798544260375968      99.9983           292     100.0487   
2     1751798544260475968     100.0008            99     100.0524   
3     1751798544260575968      99.9961            28     100.0560   
4     1751798544260675968     100.0009           442     100.0534   
...                   ...          ...           ...          ...   
4995  1751798544759775968     100.0815           286     100.1221   
4996  1751798544759875968     100.0831            59     100.1222   
4997  1751798544759975968     100.0786           451     100.1186   
4998  1751798544760075968     100.0788           483     100.1154   
4999  1751798544760175968     100.0829           289     100.1172   

      ask_volume_1  bid_price_2  bid_volume_2  ask_price_2  ask_volume_2  \
0               69      99.9899           193     100.0601           490   
1              155 

In [4]:
def generate_trades_data(num_rows = 500):
    all_data_trades = []
    base_timestamp = int(time.time() * 1e9) 

    for i in range(num_rows):
        row = {}

        row['timestamp'] = base_timestamp +i * 500000 + np.random.randint(1000, 10000)

        row['trade_price'] = round(100 + np.random.uniform(-0.1, 0.1), 4)
        row['trade_volume'] = np.random.randint(1, 100)
        row['direction'] = np.random.choice(['buy', 'sell'])

        all_data_trades.append(row)

    df_trades = pd.DataFrame(all_data_trades)
    return df_trades

df_trades = generate_trades_data(num_rows=2000)

df_trades.to_csv('trades.csv', index=False)

print(df_trades)


                timestamp  trade_price  trade_volume direction
0     1751798547343006560     100.0152            33       buy
1     1751798547343503002      99.9838            61      sell
2     1751798547344004101     100.0305            27       buy
3     1751798547344504291      99.9532            14      sell
4     1751798547345008008      99.9305            31      sell
...                   ...          ...           ...       ...
1995  1751798548340501296      99.9953            69      sell
1996  1751798548341004270     100.0588            66       buy
1997  1751798548341506982      99.9786             5       buy
1998  1751798548342010025      99.9152            56      sell
1999  1751798548342507967      99.9278            79       buy

[2000 rows x 4 columns]
