In [68]:
import re
import pandas as pd

def parse_log_line(line):
    pattern = r"(Read|Write)\s+(?:key\s+)?(\d+):\s+(\d+\.\d+)\s+ms"
    match = re.search(pattern, line)
    if match:
        action = match.group(1)
        key = match.group(2)
        duration = match.group(3)
        return action, key, duration
    
    pattern = r"Transaction commit (?:took)?:?\s+(\d+\.\d+)\s+ms"
    match = re.search(pattern, line)
    if match:
        action = 'Commit'
        key = 999
        duration = match.group(1)
        return action, key, duration
    
    return None


def p50(x):
    return np.percentile(x, q=50)


def parse_log(log_file, name=None):
    # Read the log file
    with open(log_file, 'r') as file:
        log_lines = file.readlines()

    # Group lines by HOTWorker
    data = []
    for line in log_lines:
        parsed = parse_log_line(line)
        if parsed is None:
            continue
        action, key, duration = parsed
        data.append({
            'action': action,
            'key': key,
            'duration': duration
        })
        # grouped_lines[worker].append(line)


    df = pd.DataFrame(data)
    df['key'] = df['key'].astype(int)
    df['duration'] = df['duration'].astype(float)
    df['action'] = df['action'].astype('category')
    res = df.groupby(['key', 'action']).agg(['count', p50]).dropna()
    if name is not None:
        res.columns = [name]
    return res


In [69]:
parse_log('cloudlab/mr100term8-breakdown.log')
    

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,count,p50
key,action,Unnamed: 2_level_2,Unnamed: 3_level_2
0,Read,2704,0.713935
0,Write,2704,0.82803
1,Read,2704,0.647072
1,Write,2704,0.799216
2,Read,2704,2.128098
2,Write,2704,1.054089
3,Read,2704,1.25825
3,Write,2704,0.827968
999,Commit,2702,4.063631


In [None]:
pd.concat(
    [parse_log(f'cloudlab/mr0term{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 32, 64]],
    axis=1
)
    

In [6]:
pd.concat(
    [parse_log(f'cloudlab/mr100term{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 32, 64]],
    axis=1
)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,4,8,16,32,64
key,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Read,0.703396,0.718276,0.689572,0.673099,0.636989,0.672628,0.70598
0,Write,0.705174,0.713797,0.694666,0.683854,0.649544,0.686987,0.714651
1,Read,0.637863,0.63878,0.616368,0.605107,0.569496,0.611,0.642427
1,Write,0.675408,0.686132,0.66644,0.659582,0.627003,0.667217,0.69157
2,Read,2.817582,2.237509,2.030181,2.091811,1.970562,2.190552,2.114638
2,Write,1.032617,0.911677,0.878635,0.874291,0.820694,0.870507,0.868419
3,Read,1.786111,1.377652,1.13153,1.14665,1.06763,1.194329,1.170411
3,Write,0.690772,0.69704,0.678967,0.671189,0.640721,0.675919,0.690254
999,Commit,4.401864,4.23507,3.994328,3.975199,3.912226,4.050221,4.121273


In [16]:
pd.concat(
    [parse_log(f'logs/pg-mr0-conn{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 24, 32]],
    axis=1
)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,4,8,16,24,32
key,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Read,0.157388,0.163529,0.169672,0.19376,0.202133,0.203995,0.208547
0,Write,0.150535,0.156009,0.163173,0.191032,0.200486,0.202052,0.204099
1,Read,0.134316,0.141886,0.148409,0.177863,0.187419,0.189945,0.191036
1,Write,0.141964,0.147848,0.158438,0.187312,0.197764,0.198518,0.199668
2,Read,0.132669,0.139768,0.149292,0.17766,0.186853,0.190134,0.190279
2,Write,0.141101,0.147778,0.156794,0.186786,0.196772,0.198002,0.199373
3,Read,0.132468,0.140999,0.149392,0.177506,0.186753,0.188761,0.190943
3,Write,0.141053,0.148362,0.159047,0.186432,0.195915,0.198522,0.200045


In [17]:
pd.concat(
    [parse_log(f'logs/pg-mr100-conn{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 24, 32]],
    axis=1
)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,4,8,16,24,32
key,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Read,0.156802,0.158601,0.167907,0.191377,0.205209,0.206754,0.209778
0,Write,0.14585,0.148731,0.158551,0.185827,0.204168,0.202758,0.204251
1,Read,0.134886,0.136091,0.145915,0.174453,0.192605,0.191823,0.195678
1,Write,0.138668,0.140515,0.154588,0.183983,0.200752,0.19984,0.202176
2,Read,0.129292,0.13311,0.147125,0.175674,0.194038,0.193843,0.197918
2,Write,0.145041,0.143738,0.153607,0.183564,0.201628,0.201024,0.203154
3,Read,0.128801,0.133656,0.145302,0.173891,0.192978,0.192348,0.195553
3,Write,0.144538,0.14411,0.153972,0.182306,0.200918,0.200379,0.202919


In [23]:
pd.concat(
    [parse_log(f'local/mr0clients{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 24]],
    axis=1
)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,4,8,16,24
key,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Read,1.854193,1.508294,1.73375,2.001804,2.472056,2.316673
0,Write,0.658769,0.647132,0.743745,0.868408,1.027026,0.989886
1,Read,1.461736,1.189288,1.410651,1.700772,2.082902,1.943993
1,Write,0.577227,0.611391,0.681183,0.839983,1.021679,0.967564
2,Read,1.40157,1.168119,1.380452,1.708373,2.114438,1.975448
2,Write,0.5308,0.593397,0.690052,0.847765,1.022437,0.977137
3,Read,1.381507,1.160004,1.399846,1.742349,2.09692,1.941342
3,Write,0.581093,0.579615,0.687525,0.83479,1.016412,0.965293
999,Commit,2.65937,2.514147,2.914475,4.929787,4.264864,4.005625


In [25]:
pd.concat(
    [parse_log(f'local/mr100clients{i}.log', f'{i}') for i in [1, 2, 4, 8, 16, 24]],
    axis=1
)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,4,8,16,24
key,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Read,1.724095,1.778588,1.941212,2.002164,3.041111,3.240429
0,Write,0.821402,0.736704,0.864237,0.939217,1.219845,1.280013
1,Read,1.344067,1.235948,1.559248,1.708331,2.63524,2.719262
1,Write,0.61663,0.624224,0.810574,0.908815,1.195325,1.223988
2,Read,2.761841,2.238706,2.872688,3.40985,5.420611,5.481295
2,Write,1.134095,0.921548,1.148615,1.307699,1.982451,1.886408
3,Read,1.905484,1.318735,1.660174,1.920833,3.064846,2.942326
3,Write,0.725673,0.660061,0.845139,0.925686,1.218843,1.254227
999,Commit,11.56158,10.063961,11.676925,16.913511,20.564606,21.470641


In [96]:
import re
import pandas as pd
import json

def parse_compute_log_line(line):
    pattern = r"CMD (\d+) (\[.+\])$"
    match = re.search(pattern, line)
    if match:
        command = match.group(1)
        latency = json.loads(match.group(2))
        return command, latency

    return None


def parse_compute_log(log_file, name=None):
    with open(log_file, 'r') as file:
        log_lines = file.readlines()

    data = []
    counter = 0
    for line in log_lines:
        parsed = parse_compute_log_line(line)
        if parsed is None:
            continue
        command, latency = parsed
        for key, duration in latency:
            data.append({
                'command': command,
                'key': key,
                'ms': duration
            })

    
    df = pd.DataFrame(data)
    return df

df = parse_compute_log('cloudlab/compute.log')

In [98]:
import numpy as np

def p50(x):
    return np.percentile(x, q=50)

agg_df = df.groupby(['command', 'key']).agg(['count', p50])
agg_df.columns = agg_df.columns.to_flat_index()
agg_df.reset_index(inplace=True)
agg_df[('us', 'p50')] = agg_df[('ms', 'p50')] * 1000
agg_df.drop(columns=[('ms', 'p50')], inplace=True)
out = agg_df.sort_values(by=['command', ('ms', 'count')], ascending=[True, False])
print(out.to_markdown())

|     |   command | key                        |   ('ms', 'count') |   ('us', 'p50') |
|----:|----------:|:---------------------------|------------------:|----------------:|
|  10 |         0 | read-buffer-hit-16399-0    |              5324 |          3.3915 |
|   9 |         0 | read-buffer-hit-16396-0    |              2018 |          3.053  |
|  56 |         0 | read-buffer-miss-16396-0   |               786 |        493.525  |
|  25 |         0 | read-buffer-hit-2659-1     |               743 |          1.748  |
|  27 |         0 | read-buffer-hit-2662-1     |               363 |          1.928  |
|   4 |         0 | read-buffer-hit-1249-1     |               329 |          1.493  |
|  18 |         0 | read-buffer-hit-2616-1     |               282 |          1.691  |
|  39 |         0 | read-buffer-hit-2690-1     |               202 |          1.7955 |
|   6 |         0 | read-buffer-hit-1259-1     |               200 |          1.7835 |
|  57 |         0 | read-buffer-miss-16399-

In [99]:
out.to_csv('latency-breakdown.csv', index=False)

```
  oid  |                    relname
-------+-----------------------------------------------
 16406 | usertable_2_pkey
 16403 | usertable_2
 16399 | usertable_1_pkey
 16396 | usertable_1
  5002 | pg_sequence_seqrelid_index
  3456 | pg_collation
  3455 | pg_class_tblspc_relfilenode_index
  3379 | pg_statistic_ext_relid_index
  3351 | pg_partitioned_table_partrelid_index
  3350 | pg_partitioned_table
  3164 | pg_collation_name_enc_nsp_index
  3085 | pg_collation_oid_index
  2965 | pg_db_role_setting_databaseid_rol_index
  2841 | pg_toast_2619_index
  2840 | pg_toast_2619
  2839 | pg_toast_2618_index
  2838 | pg_toast_2618
  2837 | pg_toast_1255_index
  2836 | pg_toast_1255
  2704 | pg_type_typname_nsp_index
  2703 | pg_type_oid_index
  2697 | pg_tablespace_oid_index
  2696 | pg_statistic_relid_att_inh_index
  2693 | pg_rewrite_rel_rulename_index
  2691 | pg_proc_proname_args_nsp_index
  2690 | pg_proc_oid_index
  2689 | pg_operator_oprname_l_r_n_index
  2688 | pg_operator_oid_index
  2687 | pg_opclass_oid_index
  2686 | pg_opclass_am_name_nsp_index
  2685 | pg_namespace_oid_index
  2684 | pg_namespace_nspname_index
  2681 | pg_language_name_index
  2680 | pg_inherits_relid_seqno_index
  2679 | pg_index_indexrelid_index
  2678 | pg_index_indrelid_index
  2677 | pg_authid_oid_index
  2676 | pg_authid_rolname_index
  2675 | pg_description_o_c_o_index
  2674 | pg_depend_reference_index
  2673 | pg_depend_depender_index
  2672 | pg_database_oid_index
  2671 | pg_database_datname_index
  2667 | pg_constraint_oid_index
  2666 | pg_constraint_contypid_index
  2665 | pg_constraint_conrelid_contypid_conname_index
  2664 | pg_constraint_conname_nsp_index
  2663 | pg_class_relname_nsp_index
  2662 | pg_class_oid_index
  2661 | pg_cast_source_target_index
  2659 | pg_attribute_relid_attnum_index
  2658 | pg_attribute_relid_attnam_index
  2657 | pg_attrdef_oid_index
  2656 | pg_attrdef_adrelid_adnum_index
  2655 | pg_amproc_fam_proc_index
  2654 | pg_amop_opr_fam_index
  2653 | pg_amop_fam_strat_index
  2652 | pg_am_oid_index
  2651 | pg_am_name_index
  2650 | pg_aggregate_fnoid_index
  2619 | pg_statistic
  2618 | pg_rewrite
  2617 | pg_operator
  2616 | pg_opclass
  2615 | pg_namespace
  2612 | pg_language
  2611 | pg_inherits
  2610 | pg_index
  2609 | pg_description
  2606 | pg_constraint
  2605 | pg_cast
  2603 | pg_amproc
  2602 | pg_amop
  2601 | pg_am
  2600 | pg_aggregate
  2579 | pg_constraint_conparentid_index
  2187 | pg_inherits_parent_index
  1262 | pg_database
  1260 | pg_authid
  1259 | pg_class
  1255 | pg_proc
  1249 | pg_attribute
  1247 | pg_type
  1213 | pg_tablespace

```