In [None]:
import numpy as np
import torch

# Check GPU availability and <set device>, required as default to CPU???
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [5]:
# Load orderbook data (~2-3 seconds for 60MB file)
orderbook = np.loadtxt('AMZN_2012-06-21_34200000_57600000_orderbook_10.csv', 
                       delimiter=',', dtype=np.float64)
print(f"Loaded shape: {orderbook.shape}")

# Reshape to (N_snapshots, 10_levels, 4_columns)
# Columns per level: [Ask_Price, Ask_Size, Bid_Price, Bid_Size]
##The wild card -1 infers the number of snapshots (from Nx40 to Nx10x4)
orderbook_3d = orderbook.reshape(-1, 10, 4)
print(f"Reshaped to: {orderbook_3d.shape}")

Loaded shape: (269748, 40)
Reshaped to: (269748, 10, 4)


In [7]:
# Extract prices and volumes for each side
ask_prices = orderbook_3d[:, :, 0]   # (N, 10) - all ask prices
ask_volumes = orderbook_3d[:, :, 1]  # (N, 10) - all ask volumes
bid_prices = orderbook_3d[:, :, 2]   # (N, 10) - all bid prices
bid_volumes = orderbook_3d[:, :, 3]  # (N, 10) - all bid volumes

print(f"Ask prices shape: {ask_prices.shape}")
print(f"Bid prices shape: {bid_prices.shape}")

Ask prices shape: (269748, 10)
Bid prices shape: (269748, 10)


In [14]:
#QUAN TRỌNG: em sẽ xử lí thành 2 dạng:
#lấy theo best ask/bid ĐẦU TIÊN
#HOẶC lấy theo best ask/bid cho từng ticks

#Phần này sẽ là theo best ask/bid ĐẦU TIÊN

# Calculate price gaps from Level 1 (best ask/bid)
best_ask = ask_prices[0,0]  # (N, 1) - Level 1 ask
best_bid = bid_prices[0,0]  # (N, 1) - Level 1 bid

print(f"Best ask: {best_ask:.2f}, Best bid: {best_bid:.2f}")

ask_gaps_1 = ask_prices - best_ask  # Gap from best ask (in ticks)
bid_gaps_1 = best_bid - bid_prices  # Gap from best bid (reversed)

print(f"Ask gaps range: {np.nanmin(ask_gaps_1):.0f} to {np.nanmax(ask_gaps_1):.0f}")
print(f"Bid gaps range: {np.nanmin(bid_gaps_1):.0f} to {np.nanmax(bid_gaps_1):.0f}")

Best ask: 2239500.00, Best bid: 2231800.00
Ask gaps range: -34200 to 58500
Bid gaps range: -28400 to 42100


In [13]:
##Nghịch 1 tí về chiều array
print(np.array(5).shape)                    # 0 brackets = scalar (no shape)
print(np.array([1, 2, 3]).shape)            # 1 bracket  = 1D array, shape: (3,)
print(np.array([[1], [2], [3]]).shape)      # 2 brackets = 2D array, shape: (3, 1)
print(np.array([[1, 2, 3]])).shape          # 2 brackets = 2D array, shape: (1, 3)

()
(3,)
(3, 1)
(1, 3)


In [17]:
#Còn đây là theo best ask/bid cho từng ticks (AI nó gợi ý cho em cách này)


# Calculate price gaps from Level 1 (best ask/bid)
best_ask = ask_prices[:,0:1]  # (N, 1) - Level 1 ask
best_bid = bid_prices[:,0:1]  # (N, 1) - Level 1 bid


ask_gaps = ask_prices - best_ask  # Gap from best ask (in ticks)
bid_gaps = best_bid - bid_prices  # Gap from best bid (reversed)

#Price trends are different; network shouldn't work with negative values

print(f"Ask gaps range: {np.nanmin(ask_gaps):.0f} to {np.nanmax(ask_gaps):.0f}")
print(f"Bid gaps range: {np.nanmin(bid_gaps):.0f} to {np.nanmax(bid_gaps):.0f}")

Ask gaps range: 0 to 58500
Bid gaps range: 0 to 42100


In [None]:
#Follow AI suggestion
# Stack into (N, 10, 2, 2) tensor
# Dimension 0: Snapshots (269749)
# Dimension 1: Price levels (10)
# Dimension 2: Side (0=Ask, 1=Bid)
# Dimension 3: Features (0=Price gap, 1=Volume)
tensor_data = np.stack([
    np.stack([ask_gaps, ask_volumes], axis=-1),  # Ask side
    np.stack([bid_gaps, bid_volumes], axis=-1)   # Bid side
], axis=2)

# Convert to PyTorch tensor
tensor = torch.from_numpy(tensor_data).float().to(device)

print(f"Final tensor shape: {tensor.shape}")
print(f"Tensor dtype: {tensor.dtype}")
print(f"Tensor device: {tensor.device}")
print(f"Memory usage: {tensor.element_size() * tensor.nelement() / 1024**2:.1f} MB")

Final tensor shape: torch.Size([269748, 10, 2, 2])
Tensor dtype: torch.float32
Tensor device: cuda:0
Memory usage: 41.2 MB


In [23]:
#Not necessarily required, but useful for debugging purposes
# Inspect sample snapshot (first row)
print("Sample snapshot at index 0:")
print(f"Ask side (price_gaps, volumes):\n{tensor[0, :, 0, :]}")
print(f"\nBid side (price_gaps, volumes):\n{tensor[0, :, 1, :]}")

Sample snapshot at index 0:
Ask side (price_gaps, volumes):
tensor([[    0.,   100.],
        [  400.,   100.],
        [  500.,   220.],
        [ 3000.,   100.],
        [ 4500.,   547.],
        [ 5900.,   100.],
        [ 9400.,   100.],
        [28200.,   100.],
        [54800.,   100.],
        [58500.,   100.]], device='cuda:0')

Bid side (price_gaps, volumes):
tensor([[0.0000e+00, 1.0000e+02],
        [1.1000e+03, 2.0000e+02],
        [1.4000e+03, 1.0000e+02],
        [1.8000e+03, 1.0000e+01],
        [5.6000e+03, 1.0000e+02],
        [1.8800e+04, 4.0000e+03],
        [2.7800e+04, 1.0000e+02],
        [2.9300e+04, 5.0000e+03],
        [2.9800e+04, 1.0000e+02],
        [4.2100e+04, 1.0000e+02]], device='cuda:0')
