In [1]:
import os
import ast
from collections import defaultdict

directory_path = r'/Users/lihaolin/Desktop/miniproj/LOBs'

def preprocess_line(line):
    """
Each row of data is preprocessed to ensure the correct format.
    """
    line = line.replace("Exch0", '"Exch0"')
    return line

def parse_line(line):
    """
Try to safely parse the preprocessed rows.
    """
    try:
        return ast.literal_eval(line)
    except SyntaxError as e:
        print(f"Error parsing line: {e}")
        return None

def read_and_parse_file(file_path):
    """
    Each line of the file is read and parsed.
    """
    parsed_data = []
    with open(file_path, 'r') as file:
        for line in file:
            preprocessed_line = preprocess_line(line.strip())
            data = parse_line(preprocessed_line)
            if data:
                parsed_data.append(data)
    return parsed_data

#A trader who wants to buy immediately may place an order at the lowest asking price, 
#while a trader who wants to sell immediately may place an order at the highest bid price.

def clean_lob_data(lob_data):
    cleaned_lob_data = []
    for entry in lob_data:
        time_stamp, exchange, order_book = entry
        bid_data, ask_data = order_book[0][1], order_book[1][1]
        highest_bid = max(bid_data, key=lambda x: x[0]) if bid_data else [None, None]
        lowest_ask = min(ask_data, key=lambda x: x[0]) if ask_data else [None, None]
        cleaned_entry = [time_stamp, exchange, [['bid', [highest_bid] if highest_bid != [None, None] else []], ['ask', [lowest_ask] if lowest_ask != [None, None] else []]]]
        cleaned_lob_data.append(cleaned_entry)
    return cleaned_lob_data

# Initialize a dictionary to store the data organized by day
cleaned_lob_data_by_day = defaultdict(list)

# Iterate over all files in the specified directory
for filename in os.listdir(directory_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(directory_path, filename)
        print(f"Processing {file_path}...")
        lob_data = read_and_parse_file(file_path)
        cleaned_lob_data = clean_lob_data(lob_data)
        # file name 'UoB_Set01_2025-01-02LOBs.txt'
        date = filename.split('_')[2]
        cleaned_lob_data_by_day[date].extend(cleaned_lob_data)

# Store processed data with date key
for date, data in cleaned_lob_data_by_day.items():
    print(f"Date: {date}, Data: {data[:2]}") 


Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-05-13LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-02-27LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-06-30LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-06-20LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-04-15LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-02-18LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-21LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-31LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-01-14LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-07LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-17LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-01-22LOBs.txt...
Processing /Users/lihaolin/D

Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-02-05LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-05-21LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-04-08LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-03LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-03-13LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-06-12LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-06-02LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-01-08LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-02-14LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-02-04LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-05-30LOBs.txt...
Processing /Users/lihaolin/Desktop/miniproj/LOBs/UoB_Set01_2025-05-20LOBs.txt...
Processing /Users/lihaolin/D

Date: 2025-01-20LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [2.759, 'Exch0', [['bid', [[209, 6]]], ['ask', []]]]]
Date: 2025-01-30LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [0.806, 'Exch0', [['bid', [[221, 3]]], ['ask', []]]]]
Date: 2025-03-05LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [0.775, 'Exch0', [['bid', [[256, 1]]], ['ask', []]]]]
Date: 2025-05-27LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [2.201, 'Exch0', [['bid', [[98, 7]]], ['ask', []]]]]
Date: 2025-02-13LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [0.837, 'Exch0', [['bid', []], ['ask', [[448, 4]]]]]]
Date: 2025-02-03LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [0.744, 'Exch0', [['bid', [[202, 4]]], ['ask', []]]]]
Date: 2025-05-22LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [1.767, 'Exch0', [['bid', [[1, 5]]], ['ask', []]]]]
Date: 2025-02-06LOBs.txt, Data: [[0.0, 'Exch0', [['bid', []], ['ask', []]]], [1.116, '

In [4]:
import json

def save_data_by_day(cleaned_lob_data_by_day, output_directory):
    """
Save the LOB data organized by day to a text file.
- cleaned_lob_data_by_day: LOB data organized by day, in dictionary format.
- output_directory: The path to the directory where the output files are saved.
    """
    for date, data in cleaned_lob_data_by_day.items():
# Build output filename, including date
        output_filename = f"LOBs_{date}.txt"
        output_path = os.path.join(output_directory, output_filename)
        
        data_str = json.dumps(data, indent=4)
        
# write to file
        with open(output_path, 'w') as file:
            file.write(data_str)
        
        print(f"Data for {date} saved to {output_path}")

# Specify the directory path to the output file. Make sure it exists
output_directory = r'/Users/lihaolin/Desktop/miniproj/ProcessedLOBs'

# Call the function and save the processed data
save_data_by_day(cleaned_lob_data_by_day, output_directory)


Data for 2025-05-13LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-05-13LOBs.txt.txt
Data for 2025-02-27LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-02-27LOBs.txt.txt
Data for 2025-06-30LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-06-30LOBs.txt.txt
Data for 2025-06-20LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-06-20LOBs.txt.txt
Data for 2025-04-15LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-04-15LOBs.txt.txt
Data for 2025-02-18LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-02-18LOBs.txt.txt
Data for 2025-03-21LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-03-21LOBs.txt.txt
Data for 2025-03-31LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-03-31LOBs.txt.txt
Data for 2025-01-14LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-01-14LOBs.

Data for 2025-02-20LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-02-20LOBs.txt.txt
Data for 2025-05-14LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-05-14LOBs.txt.txt
Data for 2025-06-18LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-06-18LOBs.txt.txt
Data for 2025-03-27LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-03-27LOBs.txt.txt
Data for 2025-01-02LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-01-02LOBs.txt.txt
Data for 2025-06-26LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-06-26LOBs.txt.txt
Data for 2025-04-03LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-04-03LOBs.txt.txt
Data for 2025-03-18LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-03-18LOBs.txt.txt
Data for 2025-02-21LOBs.txt saved to /Users/lihaolin/Desktop/miniproj/ProcessedLOBs/LOBs_2025-02-21LOBs.