In [53]:
import os
import math
import random
import requests
import numpy as np
import pandas as pd
from subprocess import run
import multiprocessing as mp

from typing import List, Dict, Any, Tuple
from tqdm import tqdm_notebook as tqdm

random.seed(42)

In [54]:
# Inputs

cluster_name = "cluster1_16TB"
date = "20240119"
start_idx = 0
num_traces_to_include = 100
url_template = "https://storage.googleapis.com/thesios-io-traces/%s/%s/%s"
initialize_data_dir = False
remove_zero_size_ops = True
data_file_from_idx = lambda idx: "data-00%s-of-00100" % str(idx).zfill(3)
app_to_keep = "bigtable"
num_ops_to_keep = 700000
trace_folder = "traces"
keep_only = "both" # "reads", "writes", "both"

# Computed
def format_app_to_keep(s: str) -> str:
    if len(s) < 12:
        return s
    return s[:8]

assert start_idx >= 0 and start_idx <= 100, "Invalid start_idx: %d" % start_idx
end_idx = start_idx + num_traces_to_include - 1
assert end_idx >= 0 and end_idx <= 100, "Invalid end_idx: %d" % end_idx
output_trace_file = "trace_%s_%s" % (cluster_name, date)
if app_to_keep:
    output_trace_file += "_app_%s" % format_app_to_keep(app_to_keep)
if keep_only != "both":
    output_trace_file += "_%s" % keep_only
output_trace_file += ".txt"
data_dir = "data_%s_%s" % (cluster_name, date)
if app_to_keep:
    data_dir += "_app_%s" % format_app_to_keep(app_to_keep)
if keep_only != "both":
    data_dir += "_%s" % keep_only
data_dir += "_temp"


In [55]:
if not os.path.exists(trace_folder):
    os.makedirs(trace_folder, exist_ok=True)

In [56]:
# Helper functions

def download_to_file_single_arg(args):
    return download_to_file(*args)

def download_to_file(url, filename):
    response = requests.get(url)
    with open(filename, "wb") as f:
        f.write(response.content)

def format_bytes(bytes: int) -> str:
    for unit in ["B", "KB", "MB", "GB", "TB"]:
        if bytes < 1024:
            return f"{bytes:.2f} {unit}"
        bytes /= 1024
    return f"{bytes:.2f} PB"

def process_trace(trace) -> Dict[str, Dict[str, int]]:
    # We want to process the trace, operation by operation.
    # Let's simplify: assume everything pre-exists, and we only need to keep track of the last read/write offset for each file.

    # We will keep track of the last read offset for each file.
    # Initialize dictionaries to store file metadata
    file_metadata = {}

    for _, row in trace.iterrows():
        filename = row["filename"]
        offset = row["file_offset"]
        io_size = row["request_io_size_bytes"]
        operation = row["op_type"]

        if filename not in file_metadata:
            file_metadata[filename] = {
                "max_read_offset": 0,
                "max_write_offset": 0,
                "max_initialized_offset": 0,
            }

        metadata = file_metadata[filename]

        if operation == "READ":
            metadata["max_initialized_offset"] = max(metadata["max_initialized_offset"], offset + io_size)
            metadata["max_read_offset"] = max(metadata["max_read_offset"], offset + io_size)
        elif operation == "WRITE":
            metadata["max_initialized_offset"] = max(metadata["max_initialized_offset"], offset + io_size)
            metadata["max_write_offset"] = max(metadata["max_write_offset"], offset + io_size)

    return file_metadata

def print_file_metadata_stats(file_metadata) -> Tuple[int, int, int]:
    total_size_initialized = sum([metadata["max_initialized_offset"] for metadata in file_metadata.values()])
    total_size_read = sum([metadata["max_read_offset"] for metadata in file_metadata.values()])
    total_size_written = sum([metadata["max_write_offset"] for metadata in file_metadata.values()])

    print(f"Total size initialized: {format_bytes(total_size_initialized)}")
    print(f"Total size read: {format_bytes(total_size_read)}")
    print(f"Total size written: {format_bytes(total_size_written)}")
    return total_size_initialized, total_size_read, total_size_written

# Initialize a new directory and write the files needed to run the trace
def initialize_file(file_path: str, size: int):
    """Initialize a file with pseudo-random data."""
    with open(file_path, "wb") as f:
        # Write data in chunks to avoid excessive memory usage
        chunk_size = 16 * 1024 * 1024  # 16 MB chunks
        remaining_size = size

        while remaining_size > 0:
            write_size = min(chunk_size, remaining_size)
            data = np.random.bytes(write_size)
            f.write(data)
            remaining_size -= write_size
    # print(f"Initialized file: {file_path} with size: {format_bytes(size)}")

def initialize_file_single_arg(args):
    return initialize_file(*args)

def create_trace_data_dir(data_dir: str, file_metadata: Dict[str, Dict[str, int]]):
    print("Initializing data directory: %s" % data_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    else:
        print("Data directory already exists, not doing anything.")
        return

    # Process files in parallel
    total_files = len(file_metadata)
    print(f"Initializing {total_files} files...")

    with mp.Pool(mp.cpu_count()) as pool:
        tasks = []
        for filename, metadata in file_metadata.items():
            file_path = os.path.join(data_dir, filename)
            size = metadata["max_initialized_offset"]
            tasks.append((file_path, size))

        # Use multiprocessing to parallelize file initialization
        for _ in tqdm(pool.imap(initialize_file_single_arg, tasks), total=total_files):
            pass

    print("File initialization complete.")



In [57]:
local_files = []
download_tasks = []
for idx in range(start_idx, end_idx + 1):
    data_file = data_file_from_idx(idx)
    local_file = "%s_%s_%s.csv" % (cluster_name, date, data_file)
    local_file = os.path.join(trace_folder, local_file)
    download_url = url_template % (cluster_name, date, data_file)
    local_files.append(local_file)

    if not os.path.exists(local_file):
        print("Downloading %s to %s" % (download_url, local_file))
        download_tasks.append((download_url, local_file))

if len(download_tasks) > 0:
    with mp.Pool(mp.cpu_count()) as pool:
        for _ in tqdm(pool.imap(download_to_file_single_arg, download_tasks), total=len(download_tasks)):
            pass

# Combine all the traces into a single file
trace_dfs = [ pd.read_csv(local_file) for local_file in local_files ]
combined_trace = pd.concat(trace_dfs, ignore_index=True)
trace = combined_trace


In [58]:
trace.head()

Unnamed: 0,filename,file_offset,application,c_time,io_zone,redundancy_type,op_type,service_class,from_flash_cache,cache_hit,request_io_size_bytes,disk_io_size_bytes,response_io_size_bytes,start_time,disk_time,simulated_disk_start_time,simulated_latency
0,eac0f0c95eb5e8381ce9d2194fe38447a7d4f4a67a8ce6...,6582272,bigtable,1705174034,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,94208,98304,94208,1705651000.0,0.005128,1705651000.0,0.005128
1,a54a6612f9ca88d16f8b0f7f2628fafac909388ddab3ee...,4332227,spanner,1705651081,WARM,REPLICATED,WRITE,LATENCY_SENSITIVE,0,-1,37975,37975,0,1705651000.0,0.0,0.0,5.2e-05
2,c7cf9b09cbcc7a9dea16054e3b723795478b6ced944e5e...,6519893,91a5869bf23221afed516b34207d3e3514152f38c4c97a...,1705651194,WARM,REPLICATED,WRITE,THROUGHPUT_ORIENTED,0,-1,0,0,0,1705651000.0,0.0,0.0,5.3e-05
3,c7cf9b09cbcc7a9dea16054e3b723795478b6ced944e5e...,6519893,91a5869bf23221afed516b34207d3e3514152f38c4c97a...,1705651194,WARM,REPLICATED,WRITE,THROUGHPUT_ORIENTED,0,-1,475861,475861,0,1705651000.0,0.0,0.0,0.000374
4,6c70c7107b38a4a7fa4fad3564dc0010c34ee1446eb48b...,4878336,spanner,1705046465,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,364544,360448,364544,1705651000.0,0.001878,1705651000.0,0.004412


In [59]:
# Print unique applications
print("Number of unique applications: %d" % len(trace["application"].unique()))

# Print number of operations per application
operations_per_application = trace.groupby("application").size().sort_values(ascending=False)
print("Number of operations per application:")
print(operations_per_application)

Number of unique applications: 405
Number of operations per application:
application
eccab0ec807ba5e9c86ea4d72b7272534653995c86e7d336d87d7adda2a74b10    4829717
spanner                                                             4780942
bigtable                                                            1401456
ad263109a23dfb7acc3d450adb38df34455e00e2982ddd99436119eace25e970     421601
c1362825bb92f7917efc82b51ebad7d906bc575c1df81dc1f4f5f9fefff70773     258239
                                                                     ...   
4187ce1b2ce7527abe8ee8423f2d93402a29cff77bf07227af533f0275dd7f53          1
a2d737b427ca1079e35f9620bd0734ecef1dec0ef2794bac4ba59a000e40c46b          1
63cb71b70abf663c400d550b4020e77adf89af691e0fddf11b2654e411eb5748          1
e3c382fe554e0dbf9533148de6a9f4ef7d408091a74e120c3947fa577b5de9d9          1
611f2348c6b5d7d6d9c7e74a2bafe9c1d7c51f230d4e6165d66a9c4508923f07          1
Length: 405, dtype: int64


In [60]:
if remove_zero_size_ops:
    trace = trace[trace["request_io_size_bytes"] > 0].reset_index()
if app_to_keep and len(app_to_keep) > 0:
    trace = trace[trace["application"] == app_to_keep].head(num_ops_to_keep).reset_index(drop=True).copy()

if keep_only == "reads":
    trace = trace[trace["op_type"] == "READ"].reset_index(drop=True).copy()
elif keep_only == "writes":
    trace = trace[trace["op_type"] == "WRITE"].reset_index(drop=True).copy()

In [61]:
# How many lines is the trace?
print("Number of lines in trace: %d" % len(trace))

# What is the proportion of read and write operations?
read_ops = len(trace[trace["op_type"] == "READ"])
write_ops = len(trace[trace["op_type"] == "WRITE"])
print("Read operations: %d" % read_ops)
print("Write operations: %d" % write_ops)
print("Read proportion: %.2f%%" % (100 * read_ops / (read_ops + write_ops)))

read_bytes = trace[trace["op_type"] == "READ"]["request_io_size_bytes"].sum()
write_bytes = trace[trace["op_type"] == "WRITE"]["request_io_size_bytes"].sum()
total_bytes = read_bytes + write_bytes
print("Read bytes: %s" % format_bytes(read_bytes))
print("Write bytes: %s" % format_bytes(write_bytes))
print("Total bytes: %s" % format_bytes(total_bytes))

Number of lines in trace: 700000
Read operations: 298471
Write operations: 401529
Read proportion: 42.64%
Read bytes: 88.06 GB
Write bytes: 40.54 GB
Total bytes: 128.60 GB


In [62]:
num_unique_filenames = trace["filename"].nunique()
print(f"Number of unique filenames: {num_unique_filenames}")
print("\nUnique filenames:")
print(trace["filename"].unique())
if num_unique_filenames > 100000:
    raise Exception("Too many unique filenames: %d" % num_unique_filenames)

Number of unique filenames: 34087

Unique filenames:
['eac0f0c95eb5e8381ce9d2194fe38447a7d4f4a67a8ce658c5dd1d1ba52b02d9'
 '4f7d915d7e89c11316c83373721f324a500858d7799da0c54dfe6a79229f77e2'
 '7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9f470bd5ae5396d1357' ...
 '8f6dfce9900bb087d1f1a10280ba1c19caa8a242e4957d65aef70107d5fda198'
 'a27cd439f2edf5f71a65a334b70ecc6c24917aacd86ef8100b60a5993b8ef830'
 '04baa03f476619b6bccb034a0844202fe2613b3d944afe22a36801f5627be92e']


In [63]:
file_metadata = process_trace(trace)
total_size_initialized, total_size_read, total_size_written = print_file_metadata_stats(file_metadata)

Total size initialized: 122.47 GB
Total size read: 108.13 GB
Total size written: 40.54 GB


In [64]:
GiB = 2 ** 30
if total_size_initialized > 370 * GiB:
    raise Exception("Too much data initialized: %s" % format_bytes(total_size_initialized))

In [65]:
if initialize_data_dir:
    create_trace_data_dir(data_dir, file_metadata)

In [66]:
trace.head()

Unnamed: 0,index,filename,file_offset,application,c_time,io_zone,redundancy_type,op_type,service_class,from_flash_cache,cache_hit,request_io_size_bytes,disk_io_size_bytes,response_io_size_bytes,start_time,disk_time,simulated_disk_start_time,simulated_latency
0,0,eac0f0c95eb5e8381ce9d2194fe38447a7d4f4a67a8ce6...,6582272,bigtable,1705174034,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,94208,98304,94208,1705651000.0,0.005128,1705651000.0,0.005128
1,51,4f7d915d7e89c11316c83373721f324a500858d7799da0...,0,bigtable,1705651200,WARM,REPLICATED,WRITE,LATENCY_SENSITIVE,0,-1,215446,215446,0,1705651000.0,0.0,0.0,0.000113
2,56,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,6291456,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,184320,1048576,184320,1705651000.0,0.004203,1705651000.0,0.037774
3,60,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,6660096,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,1,77824,0,77824,1705651000.0,0.0,0.0,6.8e-05
4,61,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,7077888,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,1,1,262144,0,262144,1705651000.0,0.0,0.0,7.7e-05


In [67]:
# Format the trace file to be used in the benchmark client
# The expected format is:
# <op_type> <filename>:<offset>:<size>
# Example:
# READ file1:0:4096
# WRITE file2:0:4096

def convert_trace_to_bench_client_format(trace: pd.DataFrame, output_file: str):
    with open(output_file, "w") as f:
        for _, row in trace.iterrows():
            filename = row["filename"]
            offset = row["file_offset"]
            io_size = row["request_io_size_bytes"]
            operation = row["op_type"]

            f.write(f"{operation} {filename}:{offset}:{io_size}\n")

if not os.path.exists(output_trace_file):
    print("Converting trace to benchmark client format...")
    convert_trace_to_bench_client_format(trace, output_trace_file)
    print("Trace file converted to benchmark client format: %s" % output_trace_file)
else:
    print("Trace file already exists: %s" % output_trace_file)
    print("Not overwriting.")

Trace file already exists: trace_cluster1_16TB_20240119_app_bigtable.txt
Not overwriting.


In [68]:
PAGE_SIZE = 4096

def convert_trace_to_libcachesim_format(trace: pd.DataFrame, output_file: str):

    with open(output_file, "w") as f:
        f.write("time,obj_id,obj_size\n")
        for _, row in trace.iterrows():
            filename = row["filename"]
            offset = row["file_offset"]
            timestamp = row["start_time"]
            io_size = row["request_io_size_bytes"]
            start_page_num = offset // PAGE_SIZE
            end_page_num = math.ceil((offset + io_size) / PAGE_SIZE)
            for page_num in range(start_page_num, end_page_num):
                offset = page_num * PAGE_SIZE
                io_size = PAGE_SIZE
                obj_id = f"{filename}:{offset}"
                f.write(f"{timestamp},{obj_id},{io_size}\n")

prefix = output_trace_file
if prefix.endswith(".txt"):
    prefix = prefix[:-4]
output_trace_file_libcachesim = prefix + "_libcachesim.csv"
if not os.path.exists(output_trace_file_libcachesim):
    print("Converting trace to libcachesim format...")
    convert_trace_to_libcachesim_format(trace, output_trace_file_libcachesim)
    print("trace file converted to libcachesim format: %s" % output_trace_file_libcachesim)
else:
    print("libcachesim trace file already exists: %s" % output_trace_file_libcachesim)
    print("Not overwriting.")


output_trace_file_libcachesim_binary = output_trace_file_libcachesim + ".oracleGeneral"
if not os.path.exists(output_trace_file_libcachesim_binary):
    print("Converting trace to libcachesim binary format...")
    trace_conv_bin = "/mydata/libCacheSim/_build/bin/traceConv"
    if not os.path.exists(trace_conv_bin):
        raise Exception("traceConv binary does not exist: %s" % trace_conv_bin)
    csv_opts = "time-col=1, obj-id-col=2, obj-size-col=3, delimiter=,, has-header=true"
    cmd = [trace_conv_bin, output_trace_file_libcachesim, "csv", "-t", csv_opts]
    run(cmd)
    print("trace file converted to libcachesim binary format: %s" % output_trace_file_libcachesim_binary)
else:
    print("libcachesim bninary trace file already exists: %s" % output_trace_file_libcachesim_binary)


libcachesim trace file already exists: trace_cluster1_16TB_20240119_app_bigtable_libcachesim.csv
Not overwriting.
libcachesim bninary trace file already exists: trace_cluster1_16TB_20240119_app_bigtable_libcachesim.csv.oracleGeneral


In [69]:
# Find lines where request_io_size_bytes != disk_io_size_bytes
mismatched_io_sizes = trace[trace["request_io_size_bytes"] != trace["disk_io_size_bytes"]]

# Display the first few rows of mismatched IO sizes
print("Rows where request_io_size_bytes != disk_io_size_bytes:")
mismatched_io_sizes.head()

Rows where request_io_size_bytes != disk_io_size_bytes:


Unnamed: 0,index,filename,file_offset,application,c_time,io_zone,redundancy_type,op_type,service_class,from_flash_cache,cache_hit,request_io_size_bytes,disk_io_size_bytes,response_io_size_bytes,start_time,disk_time,simulated_disk_start_time,simulated_latency
0,0,eac0f0c95eb5e8381ce9d2194fe38447a7d4f4a67a8ce6...,6582272,bigtable,1705174034,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,94208,98304,94208,1705651000.0,0.005128,1705651000.0,0.005128
2,56,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,6291456,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,0,184320,1048576,184320,1705651000.0,0.004203,1705651000.0,0.037774
3,60,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,6660096,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,0,1,77824,0,77824,1705651000.0,0.0,0.0,6.8e-05
4,61,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,7077888,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,1,1,262144,0,262144,1705651000.0,0.0,0.0,7.7e-05
5,62,7ce8f1322c8ffe4aa635600d2e38286475832bf2b5ccc9...,6553600,bigtable,1705046784,WARM,ERASURE_CODED,READ,THROUGHPUT_ORIENTED,1,1,262144,0,262144,1705651000.0,0.0,0.0,5.4e-05


In [73]:
# How many writes don't have a corresponding read?

def convert_trace_to_per_page(trace: pd.DataFrame) -> pd.DataFrame:
    new_rows = []
    for _, row in trace.iterrows():
        filename = row["filename"]
        offset = row["file_offset"]
        timestamp = row["start_time"]
        io_size = row["request_io_size_bytes"]
        operation = row["op_type"]
        start_page_num = offset // PAGE_SIZE
        end_page_num = math.ceil((offset + io_size) / PAGE_SIZE)
        for page_num in range(start_page_num, end_page_num):
            offset = page_num * PAGE_SIZE
            io_size = PAGE_SIZE
            obj_id = f"{filename}:{offset}"
            new_rows.append({
                "timestamp": timestamp,
                "obj_id": obj_id,
                "obj_size": io_size,
                "op_type": operation,
            })
    return pd.DataFrame(new_rows)

trace_per_page_df = convert_trace_to_per_page(trace)

In [84]:
# How many writes?
num_writes = len(trace_per_page_df[trace_per_page_df["op_type"] == "WRITE"])
print("Number of writes: %d" % num_writes)

# How many writes don't have a corresponding read AFTER the write?
# Get writes and reads as separate dataframes
writes = trace_per_page_df[trace_per_page_df["op_type"] == "WRITE"]
reads = trace_per_page_df[trace_per_page_df["op_type"] == "READ"]

# Left join writes with reads on obj_id to find writes without matching reads
# Only keep writes where timestamp is less than read timestamp (if read exists)
writes_without_reads = writes.merge(
    reads,
    on="obj_id",
    how="left",
    suffixes=("_write", "_read")
)

# Filter after merge to avoid numpy array comparison issues
writes_without_reads = writes_without_reads[
    writes_without_reads["timestamp_read"].isna()
]

num_writes_without_reads = len(writes_without_reads)
print("Number of writes without corresponding reads AFTER them: %d (%.1f%%)" %
      (num_writes_without_reads, 100 * num_writes_without_reads / num_writes))


Number of writes: 10986920
Number of writes without corresponding reads AFTER them: 4228795 (38.5%)


In [87]:
writes_with_preceding_reads = writes.merge(
    reads,
    on="obj_id",
    how="inner",
    suffixes=("_write", "_read")
)

writes_with_preceding_reads = writes_with_preceding_reads[
    (writes_with_preceding_reads["timestamp_write"].isna() == False) &
    (writes_with_preceding_reads["timestamp_write"] < writes_with_preceding_reads["timestamp_read"])
]

num_writes_with_preceding_reads = len(writes_with_preceding_reads)
print("Number of writes with preceding reads: %d (%.1f%%)" %
      (num_writes_with_preceding_reads, 100 * num_writes_with_preceding_reads / num_writes))

Number of writes with preceding reads: 9121500 (83.0%)
