In [7]:
import pandas as pd
import json

def is_integer(string):
    if string.startswith('-'):
        return string[1:].isdigit()
    else:
        return string.isdigit()

def parse_sandbox_logs(file_path: str):
    """Parse the 'Sandbox logs' section from the file into a list of JSON objects."""
    sandbox_logs = []
    current_obj = ""
    in_sandbox_section = False
    
    with open(file_path, "r") as f:
        for line in f:
            line = line.strip()
            if line == "Sandbox logs:":
                in_sandbox_section = True
                continue
            elif line in ["Activities log:", "Trade History:"]:
                in_sandbox_section = False
                if current_obj:
                    sandbox_logs.append(json.loads(current_obj))
                current_obj = ""
                continue
            
            if in_sandbox_section:
                if line.startswith("{"):
                    if current_obj:
                        sandbox_logs.append(json.loads(current_obj))
                    current_obj = line
                elif line.startswith("}"):
                    current_obj += line
                elif line:
                    current_obj += line
    
    if current_obj:  # Append the last object
        sandbox_logs.append(json.loads(current_obj))
    
    return sandbox_logs

def extract_trader_data(sandbox_logs):
    """Extract trader-level data from sandbox logs."""
    trader_data = []
    
    for entry in sandbox_logs:
        lines = entry['lambdaLog'].split("\n")
        current_trader = {}
        record = False
        
        for line in lines:
            line = line.strip()
            if line == "TRADER_BEGIN":
                record = True
                continue
            elif line == "TRADER_END":
                record = False
                trader_data.append(current_trader)
                continue

            if record:
                key, value = line.split(" ")
                current_trader[key] = int(value) if is_integer(value) else float(value)
    
    return pd.DataFrame(trader_data)

def extract_product_data(sandbox_logs):
    products_data = {}
    for entry in sandbox_logs:
        lines = entry['lambdaLog'].split("\n")
        current_product = {}
        record = False
        
        for line in lines:
            line = line.strip()
            if line.startswith("PRODUCT_BEGIN"):
                record = True
                current_product["product"] = line.split(" ")[1]
                current_product["orders"] = []
                continue
            elif line.startswith("PRODUCT_END"):
                record = False
                product = current_product["product"]
                if product in products_data:
                    products_data[product].append(current_product)
                else:
                    products_data[product] = [current_product]
                continue

            if record:
                key, value = line.split(" ")
                
                if key == "order":
                    price, volume = value.split("@")
                    current_product["orders"].append([int(price), int(volume)])
                else:
                    current_product[key] = int(value) if is_integer(value) else float(value)

                
    for product in products_data:
        products_data[product] = pd.DataFrame(products_data[product])

    return products_data


In [8]:
log_file = "../backtests/example.log"
sandbox = parse_sandbox_logs(log_file)

trader = extract_trader_data(sandbox)
products = extract_product_data(sandbox)

In [10]:
trader.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  2000 non-null   int64  
 1   runtime    2000 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 31.4 KB
