In [1]:
import jsonlines
import json
import polars as pl

In [2]:
# lines contains all log lines
lines = []
with jsonlines.open('logs.jsonl') as reader:
    for log in reader:
        lines.append(json.JSONDecoder().decode(log['line']))
lines[:3]

[{'level': 30,
  'time': 1748016000227,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'POST',
  'url': 'https://enotaryo.crabdance.com/otp?/approve',
  'body': {'txnId': '2406642583', 'otp': '111111'},
  'msg': 'transaction patch attempt'},
 {'level': 30,
  'time': 1748016000227,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'POST',
  'url': 'https://enotaryo.crabdance.com/otp?/approve',
  'requestHandleTime': 0.03625600039958954},
 {'level': 30,
  'time': 1748016000577,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'PATCH',
  'url': 'https://enotaryo.crabdance.com/api/otpTransaction',
  'requestHandleTime': 0.016787000000476837}]

In [3]:
# lines containing all testing log lines
testing_lines = []
with jsonlines.open('tester_logs.jsonl') as reader:
    for log in reader:
        testing_lines.append(json.JSONDecoder().decode(log['line']))
testing_lines[:3]

[{'level': 30,
  'time': 1748016000849,
  'pid': 1020205,
  'start': '11:59:45 PM',
  'document': 0,
  'msg': 'finished test #a-0'},
 {'level': 30,
  'time': 1748016002122,
  'pid': 1020205,
  'start': '11:59:45 PM',
  'msg': 'navigating to document: a-1'},
 {'level': 30,
  'time': 1748016002141,
  'pid': 1020205,
  'start': '11:59:45 PM',
  'msg': 'clicking file input: a-1'}]

In [4]:
# routines contains all lines relevant to routines
routines = list(filter(lambda x: x.get('routine', None) is not None, lines))

routines[:3]

[{'level': 30,
  'time': 1748016000928,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'PATCH',
  'url': 'https://enotaryo.crabdance.com/api/otpTransaction',
  'routine': 'b3',
  'elapsedTime': 350.1578550003469,
  'msg': 'routine b3'},
 {'level': 30,
  'time': 1748016003717,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'POST',
  'url': 'https://enotaryo.crabdance.com/api/document',
  'routine': 'a3.1',
  'elapsedTime': 369.9873789995909,
  'msg': 'routine a3.1'},
 {'level': 30,
  'time': 1748016004130,
  'pid': 1,
  'clientAddress': '172.22.0.1',
  'method': 'PATCH',
  'url': 'https://enotaryo.crabdance.com/api/otpTransaction',
  'routine': 'b4',
  'elapsedTime': 2852.8324120007455,
  'msg': 'routine b4'}]

In [5]:
len(routines)

104757

In [6]:
# put all routine times into a dictionary
from collections import defaultdict

routine_times = defaultdict(list)

for routine in routines:
    routine_name = routine['routine']
    elapsed_time = routine['elapsedTime']
    routine_times[routine_name].append(elapsed_time)

routine_times = dict(routine_times)
routine_mean_times = dict(routine_times)

sorted(list(routine_times.keys()))

['a1',
 'a2',
 'a3.1',
 'a3.2',
 'a4',
 'a5',
 'a6',
 'a7',
 'a8',
 'a9',
 'b1',
 'b2',
 'b3',
 'b4',
 'b5.1',
 'b5.2',
 'c1',
 'c2',
 'c3',
 'c4',
 'c5',
 'd1']

In [14]:
# calculate average time for each routine
for routine_key in routine_times:
    routine_mean_times[routine_key] = sum(routine_times[routine_key]) / len(routine_times[routine_key])

routine_mean_times

{'b1': 2372.937910690309,
 'b2': 2372.7177096985833,
 'a1': 2487.123466662698,
 'b4': 1313.162643889485,
 'a7': 2304.2417469132797,
 'a2': 1999.771945129445,
 'c1': 1416.7231106620345,
 'a3.2': 1279.3860924850076,
 'a4': 469.53837683103944,
 'a8': 876.4957833228942,
 'c4': 707.7689616476714,
 'a5': 716.9500085890443,
 'c2': 708.2759619893642,
 'b5.1': 353.73753516790305,
 'a3.1': 355.1808570309573,
 'c5': 354.0021746355526,
 'b5.2': 353.1889834445054,
 'a9': 366.5053002220509,
 'a6': 354.8961865696161,
 'c3': 353.82609906889513,
 'b3': 352.6659504701648,
 'd1': 178.04280500113964}

In [15]:
# calculate the total number of records for all routines
routine_totals = dict()

for key in sorted(list(routine_times.keys())):
    routine_totals[key] = len(routine_times[key])
    
routine_totals

{'a1': 7000,
 'a2': 7006,
 'a3.1': 7008,
 'a3.2': 7006,
 'a4': 7002,
 'a5': 6998,
 'a6': 6994,
 'a7': 6989,
 'a8': 6990,
 'a9': 6989,
 'b1': 3248,
 'b2': 3248,
 'b3': 3250,
 'b4': 3248,
 'b5.1': 2912,
 'b5.2': 2911,
 'c1': 3190,
 'c2': 3193,
 'c3': 3193,
 'c4': 3190,
 'c5': 3191,
 'd1': 1}

In [8]:
# calculate the success rate for the document upload routine

# find all document upload routines
import re

document_upload_routine_codes = sorted(list(filter(lambda x: re.match("a.*", x), routine_times.keys())))
document_upload_routine_codes

['a1', 'a2', 'a3.1', 'a3.2', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9']

In [9]:
prev = ""
document_upload_ratios = dict()

for code in document_upload_routine_codes:
    if prev:
        print(f"{prev} to {code}: {len(routine_times[code])} / {len(routine_times[prev])}")
        document_upload_ratios[f"{prev} to {code}"] = len(routine_times[code]) / len(routine_times[prev])
    prev = code
    
document_upload_ratios[f"{document_upload_routine_codes[0]} to {document_upload_routine_codes[-1]}"] = len(routine_times[document_upload_routine_codes[-1]]) / len(routine_times[document_upload_routine_codes[0]]) 
    
document_upload_ratios

a1 to a2: 7006 / 7000
a2 to a3.1: 7008 / 7006
a3.1 to a3.2: 7006 / 7008
a3.2 to a4: 7002 / 7006
a4 to a5: 6998 / 7002
a5 to a6: 6994 / 6998
a6 to a7: 6989 / 6994
a7 to a8: 6990 / 6989
a8 to a9: 6989 / 6990


{'a1 to a2': 1.000857142857143,
 'a2 to a3.1': 1.0002854695974879,
 'a3.1 to a3.2': 0.9997146118721462,
 'a3.2 to a4': 0.9994290608050242,
 'a4 to a5': 0.9994287346472437,
 'a5 to a6': 0.9994284081166047,
 'a6 to a7': 0.9992851015155848,
 'a7 to a8': 1.000143081985978,
 'a8 to a9': 0.999856938483548,
 'a1 to a9': 0.9984285714285714}

In [10]:
document_approval_routine_codes = sorted(list(filter(lambda x: re.match("b.*", x), routine_times.keys())))
document_approval_routine_codes

['b1', 'b2', 'b3', 'b4', 'b5.1', 'b5.2']

In [11]:
prev = ""
document_approval_ratios = dict()

for code in document_approval_routine_codes:
    if prev:
        print(f"{prev} to {code}: {len(routine_times[code])} / {len(routine_times[prev])}")
        document_approval_ratios[f"{prev} to {code}"] = len(routine_times[code]) / len(routine_times[prev])
    prev = code
    
document_approval_ratios[f"{document_approval_routine_codes[0]} to {document_approval_routine_codes[-1]}"] = len(routine_times[document_approval_routine_codes[-1]]) / len(routine_times[document_approval_routine_codes[0]]) 
    
document_approval_ratios

b1 to b2: 3248 / 3248
b2 to b3: 3250 / 3248
b3 to b4: 3248 / 3250
b4 to b5.1: 2912 / 3248
b5.1 to b5.2: 2911 / 2912


{'b1 to b2': 1.0,
 'b2 to b3': 1.000615763546798,
 'b3 to b4': 0.9993846153846154,
 'b4 to b5.1': 0.896551724137931,
 'b5.1 to b5.2': 0.9996565934065934,
 'b1 to b5.2': 0.896243842364532}

In [12]:
document_denial_routine_codes = sorted(list(filter(lambda x: re.match("c.*", x), routine_times.keys())))
document_denial_routine_codes

['c1', 'c2', 'c3', 'c4', 'c5']

In [13]:
prev = ""
document_denial_ratios = dict()

for code in document_denial_routine_codes:
    if prev:
        print(f"{prev} to {code}: {len(routine_times[code])} / {len(routine_times[prev])}")
        document_denial_ratios[f"{prev} to {code}"] = len(routine_times[code]) / len(routine_times[prev])
    prev = code
    
document_denial_ratios[f"{document_denial_routine_codes[0]} to {document_denial_routine_codes[-1]}"] = len(routine_times[document_denial_routine_codes[-1]]) / len(routine_times[document_denial_routine_codes[0]])     

document_denial_ratios

c1 to c2: 3193 / 3190
c2 to c3: 3193 / 3193
c3 to c4: 3190 / 3193
c4 to c5: 3191 / 3190


{'c1 to c2': 1.0009404388714733,
 'c2 to c3': 1.0,
 'c3 to c4': 0.9990604447228312,
 'c4 to c5': 1.0003134796238244,
 'c1 to c5': 1.0003134796238244}