In [2]:
from lobster_loader import LoadLOBSTER_resample
import os

# Get the current workspace directory
workspace_dir = r"f:\OneDrive - 7t12kn\Documents\GitHub\ViT-MARL"

# Initialize the loader for your AMZN data
loader = LoadLOBSTER_resample(
    datapath=workspace_dir,              # Base directory where AMZN files are
    atpath=workspace_dir,                # Where to save cached .npz files
    stock="AMZN",                        # Stock ticker (not used since files are in root)
    time_period="2012-06-21",            # Time period (not used since files are in root)
    n_Levels=10,                         # Number of order book levels
    type_="fixed_time",                  # Use time-based windows
    window_length=1800,                  # 30-minute windows (1800 seconds)
    window_resolution=60,                # New window every 60 seconds
    n_data_msg_per_step=100,             # 100 messages per step
    day_start=34200,                     # 9:30 AM (in seconds since midnight)
    day_end=57600                        # 4:00 PM (in seconds since midnight)
)

print(f"\nFound files:")
print(f"  Message files: {len(loader.message_files)}")
print(f"  Book files: {len(loader.book_files)}")

# Load the data (will use cached .npz if available)
print("\nLoading data...")
msgs, starts, ends, obs, max_msgs = loader.run_loading()

print(f"\nData loaded successfully!")
print(f"  Total messages: {msgs.shape}")
print(f"  Number of windows: {len(starts)}")
print(f"  Window start indices shape: {starts.shape}")
print(f"  Window end indices shape: {ends.shape}")
print(f"  Initial orderbook states shape: {obs.shape}")
print(f"  Max messages per window: {max_msgs[:5]}...")  # Show first 5

# Example: Access first window's data
print(f"\nFirst window:")
print(f"  Starts at message index: {starts[0]}")
print(f"  Ends at message index: {ends[0]}")
print(f"  Contains {ends[0] - starts[0]} messages")
print(f"  First 3 messages in window:")
print(msgs[starts[0]:starts[0]+3])

self.datapath f:\OneDrive - 7t12kn\Documents\GitHub\ViT-MARL/AMZN/2012-06-21/
found 1 message files
found 1 book files

Found files:
  Message files: 1
  Book files: 1

Loading data...
Loading cached arrays from f:\OneDrive - 7t12kn\Documents\GitHub\ViT-MARL\saved_npz/lobster_AMZN_2012-06-21_10_fixed_time_1800_60_100_34200_57600.npz

Data loaded successfully!
  Total messages: (267328, 8)
  Number of windows: 390
  Window start indices shape: (390,)
  Window end indices shape: (390,)
  Initial orderbook states shape: (390, 40)
  Max messages per window: [17217 17792 18569 19000 19844]...

First window:
  Starts at message index: 0
  Ends at message index: 17217
  Contains 17217 messages
  First 3 messages in window:
[[        1         1        21   2238100  11885113  11885113     34200
  189607669]
 [        1        -1        20   2239600   3911376   3911376     34200
  189607669]
 [        1         1       100   2237500  11534792  11534792     34200
  189607669]]


In [None]:
import numpy as np
data = np.load(r"f:\OneDrive - 7t12kn\Documents\GitHub\ViT-MARL\saved_npz\lobster_AMZN_2012-06-21_10_fixed_time_1800_60_100_34200_57600.npz", allow_pickle=True)
msgs, starts, ends, obs, max_msgs = data["msgs"], data["starts"], data["ends"], data["obs"], data["max_msgs_in_windows_arr"]
## cái obs này nó bị theo cái format của em, không giống cái state mà anh chỉ định đâu, giờ em chưa mò lại được nó ở đâu, làm cái state đấy sai nhé

##bây giờ nó đang là theo cái giống tensor của bọn mình. ảnh/tensor như anh bảo ở snippet dưới nhé

In [4]:
from gymnax_exchange.jaxlobster.data_loading import *
# from gymnax_exchange.jaxob.JaxOrderBookArrays import *
# from gymnax_exchange.jaxob.jorderbook import *
import torch
torch.cuda.is_available()

True

In [5]:
import platform
print(platform.system())

Windows


In [None]:
#---------------------------------------------------------------------------------------------------------------------#

## ANH  HIỆP  ƠI ##

# Reshape all observations at once: (num_windows, 40) -> (num_windows, 10, 2, 2)
obs_tensor = obs.reshape(-1, 10, 2, 2)

print("Original obs shape:", obs.shape)
print("Reshaped tensor shape:", obs_tensor.shape)
print(f"\nTensor structure: {obs_tensor.shape[0]} windows, each with (10 levels, 2 sides, 2 features [price, qty])")
print("\nFirst window orderbook:")
print(obs_tensor[0])

# If you want a PyTorch tensor instead of numpy:
import torch
obs_torch = torch.tensor(obs_tensor)## the data in tensors  ----------- ảnh của anh đây
print("\nPyTorch tensor shape:", obs_torch.shape)

Original obs shape: (390, 40)
Reshaped tensor shape: (390, 10, 2, 2)

Tensor structure: 390 windows, each with (10 levels, 2 sides, 2 features [price, qty])

First window orderbook:
[[[2239500     100]
  [2231800     100]]

 [[2239900     100]
  [2230700     200]]

 [[2240000     220]
  [2230400     100]]

 [[2242500     100]
  [2230000      10]]

 [[2244000     547]
  [2226200     100]]

 [[2245400     100]
  [2213000    4000]]

 [[2248900     100]
  [2204000     100]]

 [[2267700     100]
  [2202500    5000]]

 [[2294300     100]
  [2202000     100]]

 [[2298000     100]
  [2189700     100]]]

PyTorch tensor shape: torch.Size([390, 10, 2, 2])


In [7]:
obs_tensor

array([[[[2239500,     100],
         [2231800,     100]],

        [[2239900,     100],
         [2230700,     200]],

        [[2240000,     220],
         [2230400,     100]],

        ...,

        [[2267700,     100],
         [2202500,    5000]],

        [[2294300,     100],
         [2202000,     100]],

        [[2298000,     100],
         [2189700,     100]]],


       [[[2244200,     100],
         [2239200,      10]],

        [[2244500,     100],
         [2238600,     100]],

        [[2244900,     100],
         [2236600,     200]],

        ...,

        [[2250000,    2404],
         [2234300,      11]],

        [[2250100,     200],
         [2232300,      50]],

        [[2251000,     185],
         [2232000,       4]]],


       [[[2238100,       1],
         [2235000,     100]],

        [[2239500,       1],
         [2234900,      61]],

        [[2240000,      10],
         [2232300,      50]],

        ...,

        [[2248000,     250],
         [2229200,       

In [8]:
import os
workspace_dir = r"f:\OneDrive - 7t12kn\Documents\GitHub\ViT-MARL"
os.makedirs(os.path.join(workspace_dir, "saved_npz"), exist_ok=True)

In [9]:
print("##AI-generated placeholder")
# ═══════════════════════════════════════════════════════════════
# EXPLORING THE LOADED DATA
# ═══════════════════════════════════════════════════════════════

print("="*70)
print("DATA LOADED - Let's explore what we have:")
print("="*70)

print(f"\n1. MESSAGES ARRAY (msgs):")
print(f"   Shape: {msgs.shape}")
print(f"   → {msgs.shape[0]:,} total messages")
print(f"   → 8 features per message")
print(f"\n   First message: {msgs[0]}")
print(f"   Features: [type, direction, qty, price, trader_id, order_id, time_s, time_ns]")

print(f"\n2. EPISODE WINDOWS:")
print(f"   Number of windows: {len(starts)}")
print(f"   → This means {len(starts)} different starting points for episodes!")

print(f"\n3. WINDOW STRUCTURE:")
for i in range(min(3, len(starts))):
    start_idx = starts[i]
    end_idx = ends[i]
    n_msgs = end_idx - start_idx
    n_steps = n_msgs // 100  # Assuming 100 msgs per step
    
    # Get time
    first_msg_time = msgs[start_idx][-2]
    hours = first_msg_time // 3600
    minutes = (first_msg_time % 3600) // 60
    
    print(f"\n   Window {i}:")
    print(f"     Start time: {hours:02d}:{minutes:02d}")
    print(f"     Messages: [{start_idx:,} : {end_idx:,}] = {n_msgs:,} messages")
    print(f"     Steps: ~{n_steps} steps (at 100 msgs/step)")
    print(f"     Initial orderbook shape: {obs[i].shape}")

print(f"\n4. ORDERBOOK SNAPSHOTS:")
print(f"   Shape: {obs.shape}")
print(f"   → {obs.shape[0]} initial orderbooks (one per window)")
print(f"   → {obs.shape[1]} values = 10 levels × 2 sides × 2 (price, qty)")
print(f"\n   First orderbook snapshot:")
print(f"   {obs[0]}")

print("\n" + "="*70)
print("✓ Data exploration complete!")
print("="*70)

##AI-generated placeholder
DATA LOADED - Let's explore what we have:

1. MESSAGES ARRAY (msgs):
   Shape: (267328, 8)
   → 267,328 total messages
   → 8 features per message

   First message: [        1         1        21   2238100  11885113  11885113     34200
 189607669]
   Features: [type, direction, qty, price, trader_id, order_id, time_s, time_ns]

2. EPISODE WINDOWS:
   Number of windows: 390
   → This means 390 different starting points for episodes!

3. WINDOW STRUCTURE:

   Window 0:
     Start time: 09:30
     Messages: [0 : 17,217] = 17,217 messages
     Steps: ~172 steps (at 100 msgs/step)
     Initial orderbook shape: (40,)

   Window 1:
     Start time: 09:31
     Messages: [672 : 18,464] = 17,792 messages
     Steps: ~177 steps (at 100 msgs/step)
     Initial orderbook shape: (40,)

   Window 2:
     Start time: 09:32
     Messages: [871 : 19,440] = 18,569 messages
     Steps: ~185 steps (at 100 msgs/step)
     Initial orderbook shape: (40,)

4. ORDERBOOK SNAPSHOTS:
  