In [1]:
import numpy as np
import polars as pl
from pprint import pprint

# Expect data rows read in to have these exact types
# when reading data in. These will later be converted
# to more convenient types.
data_row_name_map_pl_type = {
  '_ws.col.Time'           : pl.datatypes.Float64,
  '_ws.col.Source'         : pl.datatypes.Utf8,
  '_ws.col.Destination'    : pl.datatypes.Utf8,
  '_ws.col.Protocol'       : pl.datatypes.Utf8,
  '_ws.col.Length'         : pl.datatypes.Int64,
  'coap.type'              : pl.datatypes.Int64,
  'coap.retransmitted'     : pl.datatypes.Utf8,
  'coap.code'              : pl.datatypes.Int64,
  'coap.mid'               : pl.datatypes.Int64,
  'coap.token'             : pl.datatypes.Utf8,
  'coap.opt.proxy_uri'     : pl.datatypes.Utf8,
  'http.request'           : pl.datatypes.Int64,
  'http.request.method'    : pl.datatypes.Utf8,
  'http.request.full_uri'  : pl.datatypes.Utf8,
  'http.response'          : pl.datatypes.Int64,
  'http.response.code'     : pl.datatypes.Int64,
  'http.response.code.desc': pl.datatypes.Utf8,
  'http.response_for.uri'  : pl.datatypes.Utf8
}

# Specification of how to go from data row names
# to field names written out by the script.
data_row_name_map_field_name = {
  # Native wireshark columns
  "_ws.col.Time"       : "message_timestamp",
  "_ws.col.Source"     : "message_source",
  "_ws.col.Destination": "message_destination",
  "_ws.col.Protocol"   : "message_protocol",
  "_ws.col.Length"     : "message_size",

  # CoAP columns
  "coap.type"         : "coap_type",
  "coap.code"         : "coap_code",
  "coap.mid"          : "coap_message_id",
  "coap.token"        : "coap_token",
  "coap.opt.proxy_uri": "coap_proxy_uri",
  "coap.retransmitted": "coap_retransmitted",

  # HTTP columns
  "http.request"           : "http_request",
  "http.request.method"    : "http_request_method",
  "http.request.full_uri"  : "http_request_full_uri",
  "http.response.code"     : "http_response_code",
  "http.response.code.desc": "http_response_code_desc",
  "http.response_for.uri"  : "http_response_for_uri",
}

assert set(data_row_name_map_field_name.keys()) <= set(data_row_name_map_pl_type.keys())

field_name_map_pl_type = {
  'node_type'              : pl.datatypes.Utf8,
  'message_marker'         : pl.datatypes.Int64,
  'message_timestamp'      : pl.datatypes.Float64,
  'message_source'         : pl.datatypes.Utf8,
  'message_destination'    : pl.datatypes.Utf8,
  'message_protocol'       : pl.datatypes.Utf8,
  'message_size'           : pl.datatypes.Int64,
  'coap_type'              : pl.datatypes.Utf8,
  'coap_code'              : pl.datatypes.Utf8,
  'coap_message_id'        : pl.datatypes.Int64,
  'coap_token'             : pl.datatypes.Utf8,
  'coap_proxy_uri'         : pl.datatypes.Utf8,
  'coap_retransmitted'     : pl.datatypes.Boolean,
  'http_request'           : pl.datatypes.Boolean,
  'http_request_method'    : pl.datatypes.Utf8,
  'http_request_full_uri'  : pl.datatypes.Utf8,
  'http_response_code'     : pl.datatypes.Int64,
  'http_response_code_desc': pl.datatypes.Utf8,
  'http_response_for_uri'  : pl.datatypes.Utf8,
}

pre_final_field_name_map_pl_type = {f:t for f, t in field_name_map_pl_type.items() if f not in {"message_marker"}}

field_names = set(field_name_map_pl_type.keys())

ip_addr_map_host_name = {
    '10.1.1.1': 'originserver',
    '10.1.2.1': 'receiver',
    '10.1.3.1': 'attacker',
    '10.1.5.1': 'client1',
    '10.1.5.99': 'proxy',
}

type_map_text = {
  0: "con",
  1: "non",
  2: "ack",
  3: "rst",
}

coap_code_to_string = lambda c, dd : (c << 5) | dd
code_map_text = {
  # Empty Message
  coap_code_to_string(0, 00) : "empty_message",
  
  # Method Codes
  coap_code_to_string(0,  1) : "get",
  coap_code_to_string(0,  2) : "put",
  coap_code_to_string(0,  3) : "post",
  coap_code_to_string(0,  4) : "delete",

  # Response Codes
  coap_code_to_string(2,  1) : "created",
  coap_code_to_string(2,  2) : "deleted",
  coap_code_to_string(2,  3) : "valid",
  coap_code_to_string(2,  4) : "changed",
  coap_code_to_string(2,  5) : "content",
  coap_code_to_string(4, 00) : "bad_request",
  coap_code_to_string(4,  1) : "unauthorized",
  coap_code_to_string(4,  2) : "bad_option",
  coap_code_to_string(4,  3) : "forbidden",
  coap_code_to_string(4,  4) : "not_found",
  coap_code_to_string(4,  5) : "method_not_allowed",
  coap_code_to_string(4,  6) : "not_acceptable",
  coap_code_to_string(4, 12) : "precondition_failed",
  coap_code_to_string(4, 13) : "request_entity_too_large",
  coap_code_to_string(4, 15) : "unsupported_media_type",
  coap_code_to_string(5, 00) : "internal_server_error",
  coap_code_to_string(5,  1) : "not_implemented",
  coap_code_to_string(5,  2) : "bad_gateway",
  coap_code_to_string(5,  3) : "service_unavailable",
  coap_code_to_string(5,  4) : "gateway_timeout",
  coap_code_to_string(5,  5) : "proxying_not_supported",

  # Everything else is reserved
}

def pl_replace_from_to(column, from_, to_):
  """
  Produces an expression for polars to replace a `from` values
  to `to` values inside a specified column.
  """
  branch = pl.when(pl.col(column) == from_[0]).then(to_[0])
  for (from_value, to_value) in zip(from_, to_):
    branch = branch.when(pl.col(column) == from_value).then(to_value)
  return branch.otherwise(pl.col(column)).alias(column)

def pl_replace(column, mapping):
  from_ = [k for k, _ in sorted(mapping.items())]
  to_   = [v for _, v in sorted(mapping.items())]
  return pl_replace_from_to(column, from_, to_)

In [2]:
%%time

FILE_PATH = "/home/ubuntu/dos-californium/deter/expdata/real/final/client_and_attacker_httpson/1/proxy_dump.pcap.out"

p = FILE_PATH.split("/")[-1]
p = p[:p.index("_")]
node_type = "originserver" if p == "server" else p

df = (
    pl
    # Read intermediate Wireshark data in as csv
    .scan_csv(FILE_PATH,
              use_pyarrow=True,
              dtypes=data_row_name_map_pl_type,
              sep=";",
              quote_char='"')
    
    # Rename column names from Wireshark format to database format
    .rename(data_row_name_map_field_name)
    .drop("http.response")
)

df = df.with_columns([
    # Replace IP addresses with host names
    pl_replace("message_source", ip_addr_map_host_name),
    pl_replace("message_destination", ip_addr_map_host_name),
    
    # Add node type that generated the input data file
    pl.lit(node_type).alias("node_type"),
    
    # Add the message marker which will be populated later
#     pl.lit(-1).alias("message_marker"),
    
    # Lowercase protocol names
    pl.col("message_protocol").str.to_lowercase().alias("message_protocol")
])

df.collect()

CPU times: user 2.11 s, sys: 1.58 s, total: 3.68 s
Wall time: 639 ms


message_timestamp,message_source,message_destination,message_protocol,message_size,coap_type,coap_retransmitted,coap_code,coap_message_id,coap_token,coap_proxy_uri,http_request,http_request_method,http_request_full_uri,http_response_code,http_response_code_desc,http_response_for_uri,node_type
f64,str,str,str,i64,i64,str,i64,i64,str,str,i64,str,str,i64,str,str,str
1.6479e9,"""client1""","""proxy""","""coap""",106,0,,1,1,"""1d8bc74f5569de04""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""",,,,,,,"""proxy"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,1,"""GET""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""",,,,"""proxy"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,,,,200,"""OK""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""","""proxy"""
1.6479e9,"""proxy""","""client1""","""coap""",158,2,,69,1,"""1d8bc74f5569de04""",,,,,,,,"""proxy"""
1.6479e9,"""client1""","""proxy""","""coap""",106,0,,1,2,"""473bbaacedb0fbb2""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""",,,,,,,"""proxy"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,1,"""GET""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""",,,,"""proxy"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,,,,200,"""OK""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""","""proxy"""
1.6479e9,"""proxy""","""client1""","""coap""",158,2,,69,2,"""473bbaacedb0fbb2""",,,,,,,,"""proxy"""
1.6479e9,"""client1""","""proxy""","""coap""",106,0,,1,3,"""091e25b6b4acc2e6""","""https://10.1.1.1:443/3_091E25B6B4ACC2E6""",,,,,,,"""proxy"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,1,"""GET""","""https://10.1.1.1:443/3_091E25B6B4ACC2E6""",,,,"""proxy"""


In [3]:
http_columns = df.select("^(http_).*$").collect().columns
coap_columns = df.select("^(coap_).*$").collect().columns

print(http_columns)
print(coap_columns)

df.collect().schema

['http_request', 'http_request_method', 'http_request_full_uri', 'http_response_code', 'http_response_code_desc', 'http_response_for_uri']
['coap_type', 'coap_retransmitted', 'coap_code', 'coap_message_id', 'coap_token', 'coap_proxy_uri']


{'message_timestamp': polars.datatypes.Float64,
 'message_source': polars.datatypes.Utf8,
 'message_destination': polars.datatypes.Utf8,
 'message_protocol': polars.datatypes.Utf8,
 'message_size': polars.datatypes.Int64,
 'coap_type': polars.datatypes.Int64,
 'coap_retransmitted': polars.datatypes.Utf8,
 'coap_code': polars.datatypes.Int64,
 'coap_message_id': polars.datatypes.Int64,
 'coap_token': polars.datatypes.Utf8,
 'coap_proxy_uri': polars.datatypes.Utf8,
 'http_request': polars.datatypes.Int64,
 'http_request_method': polars.datatypes.Utf8,
 'http_request_full_uri': polars.datatypes.Utf8,
 'http_response_code': polars.datatypes.Int64,
 'http_response_code_desc': polars.datatypes.Utf8,
 'http_response_for_uri': polars.datatypes.Utf8,
 'node_type': polars.datatypes.Utf8}

In [4]:
%%time

# Coap messages

cdf = (
    df
    
    # Filter for only coap messages
    .filter(pl.col("message_protocol") == "coap")
    
    # Keep non-http columns
#     .select("^[^(http_).*].*$")
)

cdf = cdf.with_columns([
    # Convert coap type and code to human readable string
    pl_replace("coap_type", type_map_text),
    pl_replace("coap_code", code_map_text),
    
    # Lowercase proxy uri and token
    pl.col("coap_proxy_uri").str.to_lowercase().alias("coap_proxy_uri"),
    pl.col("coap_token").str.to_lowercase().alias("coap_token"),

    # Sometimes there is a random : so we need to remove it
    pl.col("coap_token").str.replace(":", "").alias("coap_token"),
    
    # Convert coap retransmitted to a boolean
    pl.col("coap_retransmitted").is_not_null().alias("coap_retransmitted"),
    
    # Assign each message a UID
    pl.format("{}_{}", "coap_message_id", "coap_token").alias("uid")
])

# Nullify values in http columns and cast the dataframe to the final expected types
nullify_http_columns = [pl.lit(None).alias(col) for col in http_columns]
cast_to_final_types = [pl.col(col).cast(col_type).alias(col) for col, col_type in pre_final_field_name_map_pl_type.items()]
cdf = cdf.with_columns(nullify_http_columns + cast_to_final_types)

cdf.collect()

CPU times: user 3.56 s, sys: 494 ms, total: 4.06 s
Wall time: 1.23 s


message_timestamp,message_source,message_destination,message_protocol,message_size,coap_type,coap_retransmitted,coap_code,coap_message_id,coap_token,coap_proxy_uri,http_request,http_request_method,http_request_full_uri,http_response_code,http_response_code_desc,http_response_for_uri,node_type,uid
f64,str,str,str,i64,str,bool,str,i64,str,str,bool,str,str,i64,str,str,str,str
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",1,"""1d8bc74f5569de04""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""",,,,,,,"""proxy""","""1_1d8bc74f5569de04"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",1,"""1d8bc74f5569de04""",,,,,,,,"""proxy""","""1_1d8bc74f5569de04"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",2,"""473bbaacedb0fbb2""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""",,,,,,,"""proxy""","""2_473bbaacedb0fbb2"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",2,"""473bbaacedb0fbb2""",,,,,,,,"""proxy""","""2_473bbaacedb0fbb2"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",3,"""091e25b6b4acc2e6""","""https://10.1.1.1:443/3_091E25B6B4ACC2E6""",,,,,,,"""proxy""","""3_091e25b6b4acc2e6"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",3,"""091e25b6b4acc2e6""",,,,,,,,"""proxy""","""3_091e25b6b4acc2e6"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",4,"""d1238e937982076a""","""https://10.1.1.1:443/4_D1238E937982076A""",,,,,,,"""proxy""","""4_d1238e937982076a"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",4,"""d1238e937982076a""",,,,,,,,"""proxy""","""4_d1238e937982076a"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",5,"""578bddc7bbcbc577""","""https://10.1.1.1:443/5_578BDDC7BBCBC577""",,,,,,,"""proxy""","""5_578bddc7bbcbc577"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",5,"""578bddc7bbcbc577""",,,,,,,,"""proxy""","""5_578bddc7bbcbc577"""


In [5]:
%%time

# Http messages

hdf = (
    df
    
    # Filter for only http messages
    .filter(pl.col("message_protocol") == "http")
    
    # Keep non-coap columns
#     .select("^[^(coap_).*].*$")
)

nullify_coap_columns = [pl.lit(None).alias(column_name) \
                            for column_name in cdf.select("^(coap_).*$").collect().columns]

hdf = hdf.with_columns(nullify_coap_columns + [
    # Convert http request to a boolean
    pl.col("http_request").is_not_null().alias("http_request"),
    
    # Lowercase the http request method
    pl.col("http_request_method").str.to_lowercase().alias("http_request_method"),
    
    # Lowercase the http request and response
    pl.col("http_request_full_uri").str.to_lowercase().alias("http_request_full_uri"),
    pl.col("http_response_for_uri").str.to_lowercase().alias("http_response_for_uri"),
    
    # Coalesce the http URI across requests and responses
    pl.format(
        "{}{}",
        pl.col("http_request_full_uri").fill_null(""),
        pl.col("http_response_for_uri").fill_null("")
    )
    # Then assign each message a UID
    .str.extract(r"(\w+_\w+)", 1).alias("uid"),    
])

# Nullify values in coap columns and cast the dataframe to the final expected types
nullify_coap_columns = [pl.lit(None).alias(col) for col in coap_columns]
cast_to_final_types = [pl.col(col).cast(col_type).alias(col) for col, col_type in pre_final_field_name_map_pl_type.items()]
hdf = hdf.with_columns(nullify_coap_columns + cast_to_final_types)

hdf.collect()

CPU times: user 6.17 s, sys: 733 ms, total: 6.9 s
Wall time: 1.8 s


message_timestamp,message_source,message_destination,message_protocol,message_size,coap_type,coap_retransmitted,coap_code,coap_message_id,coap_token,coap_proxy_uri,http_request,http_request_method,http_request_full_uri,http_response_code,http_response_code_desc,http_response_for_uri,node_type,uid
f64,str,str,str,i64,str,bool,str,i64,str,str,bool,str,str,i64,str,str,str,str
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/1_1d8bc74f5569de04""",,,,"""proxy""","""1_1D8BC74F5569DE04"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/1_1d8bc74f5569de04""","""proxy""","""1_1D8BC74F5569DE04"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""",,,,"""proxy""","""2_473BBAACEDB0FBB2"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""","""proxy""","""2_473BBAACEDB0FBB2"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/3_091e25b6b4acc2e6""",,,,"""proxy""","""3_091E25B6B4ACC2E6"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/3_091e25b6b4acc2e6""","""proxy""","""3_091E25B6B4ACC2E6"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/4_d1238e937982076a""",,,,"""proxy""","""4_D1238E937982076A"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/4_d1238e937982076a""","""proxy""","""4_D1238E937982076A"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/5_578bddc7bbcbc577""",,,,"""proxy""","""5_578BDDC7BBCBC577"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/5_578bddc7bbcbc577""","""proxy""","""5_578BDDC7BBCBC577"""


In [6]:
%%time 

joined_df = (
    pl.concat([cdf, hdf])
    .sort(by="message_timestamp")
)

# print(f"{len(cdf.collect())=}")
# print(f"{len(hdf.collect())=}")
# print(f"{len(joined_df.collect())=}")

joined_df.collect()

CPU times: user 6.87 s, sys: 974 ms, total: 7.84 s
Wall time: 1.6 s


message_timestamp,message_source,message_destination,message_protocol,message_size,coap_type,coap_retransmitted,coap_code,coap_message_id,coap_token,coap_proxy_uri,http_request,http_request_method,http_request_full_uri,http_response_code,http_response_code_desc,http_response_for_uri,node_type,uid
f64,str,str,str,i64,str,bool,str,i64,str,str,bool,str,str,i64,str,str,str,str
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",1,"""1d8bc74f5569de04""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""",,,,,,,"""proxy""","""1_1d8bc74f5569de04"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/1_1d8bc74f5569de04""",,,,"""proxy""","""1_1D8BC74F5569DE04"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/1_1d8bc74f5569de04""","""proxy""","""1_1D8BC74F5569DE04"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",1,"""1d8bc74f5569de04""",,,,,,,,"""proxy""","""1_1d8bc74f5569de04"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",2,"""473bbaacedb0fbb2""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""",,,,,,,"""proxy""","""2_473bbaacedb0fbb2"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""",,,,"""proxy""","""2_473BBAACEDB0FBB2"""
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""","""proxy""","""2_473BBAACEDB0FBB2"""
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",2,"""473bbaacedb0fbb2""",,,,,,,,"""proxy""","""2_473bbaacedb0fbb2"""
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",3,"""091e25b6b4acc2e6""","""https://10.1.1.1:443/3_091E25B6B4ACC2E6""",,,,,,,"""proxy""","""3_091e25b6b4acc2e6"""
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/3_091e25b6b4acc2e6""",,,,"""proxy""","""3_091E25B6B4ACC2E6"""


In [7]:
%%time

message_marker_df = (
#     # Get unique UIDs
#     joined_df
#     .groupby("uid")
#     .agg(pl.count())
#     .drop("count")
    joined_df
    .unique(maintain_order=True, subset=["uid"])
    .select("uid")
    
    # Add row counter
    .with_row_count(name="message_marker", offset=1)
)

message_marker_df.collect()

CPU times: user 7.47 s, sys: 759 ms, total: 8.22 s
Wall time: 1.54 s


message_marker,uid
u32,str
1,"""1_1d8bc74f5569de04"""
2,"""1_1D8BC74F5569DE04"""
3,"""2_473bbaacedb0fbb2"""
4,"""2_473BBAACEDB0FBB2"""
5,"""3_091e25b6b4acc2e6"""
6,"""3_091E25B6B4ACC2E6"""
7,"""4_d1238e937982076a"""
8,"""4_D1238E937982076A"""
9,"""5_578bddc7bbcbc577"""
10,"""5_578BDDC7BBCBC577"""


In [8]:
%%time

final_df = joined_df.join(message_marker_df, on="uid")

final_df.collect()

CPU times: user 12.5 s, sys: 951 ms, total: 13.4 s
Wall time: 2.79 s


message_timestamp,message_source,message_destination,message_protocol,message_size,coap_type,coap_retransmitted,coap_code,coap_message_id,coap_token,coap_proxy_uri,http_request,http_request_method,http_request_full_uri,http_response_code,http_response_code_desc,http_response_for_uri,node_type,uid,message_marker
f64,str,str,str,i64,str,bool,str,i64,str,str,bool,str,str,i64,str,str,str,str,u32
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",1,"""1d8bc74f5569de04""","""https://10.1.1.1:443/1_1D8BC74F5569DE04""",,,,,,,"""proxy""","""1_1d8bc74f5569de04""",1
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/1_1d8bc74f5569de04""",,,,"""proxy""","""1_1D8BC74F5569DE04""",2
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/1_1d8bc74f5569de04""","""proxy""","""1_1D8BC74F5569DE04""",2
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",1,"""1d8bc74f5569de04""",,,,,,,,"""proxy""","""1_1d8bc74f5569de04""",1
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",2,"""473bbaacedb0fbb2""","""https://10.1.1.1:443/2_473BBAACEDB0FBB2""",,,,,,,"""proxy""","""2_473bbaacedb0fbb2""",3
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""",,,,"""proxy""","""2_473BBAACEDB0FBB2""",4
1.6479e9,"""originserver""","""proxy""","""http""",491,,,,,,,false,,,200,"""OK""","""https://10.1.1.1:443/2_473bbaacedb0fbb2""","""proxy""","""2_473BBAACEDB0FBB2""",4
1.6479e9,"""proxy""","""client1""","""coap""",158,"""ack""",false,"""content""",2,"""473bbaacedb0fbb2""",,,,,,,,"""proxy""","""2_473bbaacedb0fbb2""",3
1.6479e9,"""client1""","""proxy""","""coap""",106,"""con""",false,"""get""",3,"""091e25b6b4acc2e6""","""https://10.1.1.1:443/3_091E25B6B4ACC2E6""",,,,,,,"""proxy""","""3_091e25b6b4acc2e6""",5
1.6479e9,"""proxy""","""originserver""","""http""",230,,,,,,,true,"""get""","""https://10.1.1.1:443/3_091e25b6b4acc2e6""",,,,"""proxy""","""3_091E25B6B4ACC2E6""",6


In [9]:
# Validators
assert len(df[~(df["message_timestamp"] > 0)]) == 0
assert len(set(df["message_protocol"].unique()) & {"coap", "http"}) > 0 # At least one of coap, http
assert len(set(df["message_protocol"].unique()) - {"coap", "http"}) == 0 # Nothing but coap, http
assert len(df[~(df["message_size"] > 0)]) == 0

# # Validators
# assert len({True, False} - set(cdf["coap_retransmitted"].unique())) == 0 # Nothing but True, False


df

TypeError: 'LazyFrame' object is not subscriptable