Helper functions to transfer a raw *.data file into a JSON array.

In [1]:
import json
from typing import Any, Dict, Optional

def get_mock_ip_pairs(ip_net_u32: str) -> str:
    mock_ip_table = {
        "95500673": "[129.57.177.6, 129.57.177.5]",      # sender side
        "112277889": "[129.57.177.6, 129.57.177.5]",     # receiver side
    }
    return mock_ip_table[ip_net_u32]

In [2]:
def sum_list(x: Optional[list]) -> int:
    if not x:
        return 0
    # Defensive: allow ints, floats, numeric strings
    s = 0
    for v in x:
        if v is None:
            continue
        s += int(v)
    return s


def transform_record_line(single_line: str) -> Dict[str, Any]:
    """
    Transform exactly ONE line of input that looks like:
    {"1762923223":{"112277889":{...}}}
    """

    # 1. Parse the JSON line
    raw = json.loads(single_line)

    # 2. Validate top-level structure
    if len(raw) != 1:
        raise ValueError(
            f"Expected one top-level timestamp, got {len(raw)}"
        )

    ts_str, per_ts = next(iter(raw.items()))
    timestamp = int(ts_str)

    if not isinstance(per_ts, dict):
        raise ValueError("Expected timestamp value to be a dict")

    out_stats: Dict[str, Any] = {}

    # 3. Process the (u32 -> metrics) entry
    for u32_str, metrics in per_ts.items():
        if not isinstance(metrics, dict):
            raise ValueError(f"Metrics for {u32_str} is not a dict")

        tcp_bytes    = metrics.get("tcp_bytes", []) or []
        tcp_packets  = metrics.get("tcp_packets", []) or []
        udp_bytes    = metrics.get("udp_bytes", []) or []
        udp_packets  = metrics.get("udp_packets", []) or []

        total_packet_len = (
            sum_list(tcp_bytes) +
            sum_list(udp_bytes)
        )

        key = get_mock_ip_pairs(u32_str)

        out_stats[key] = {
            "total_bytes": total_packet_len,
            "udp_bytes": udp_bytes,
            "udp_packets": udp_packets,
            "tcp_bytes": tcp_bytes,
            "tcp_packets": tcp_packets,
        }

    # 4. Final output
    return {
        "timestamp": timestamp,
        "stats": [out_stats],
    }


In [3]:
import json
from pathlib import Path
from typing import Iterable

def transform_data_file(input_file: str, output_file: str) -> None:
    """
    Read a *.data file line-by-line, transform each record,
    and write a single JSON array to output_file.
    """
    input_path = Path(input_file)
    output_path = Path(output_file)

    results = []

    with input_path.open("r") as fin:
        for lineno, line in enumerate(fin, start=1):
            line = line.strip()
            if not line:
                continue  # skip empty lines

            try:
                rec = transform_record_line(line)
                results.append(rec)
            except Exception as e:
                raise RuntimeError(
                    f"Error processing {input_path} at line {lineno}"
                ) from e

    with output_path.open("w") as fout:
        json.dump(results, fout, indent=2)

    print(f"Wrote {len(results)} records -> {output_path}")

In [4]:
transform_data_file("sender.data", "sender.json")

Wrote 11 records -> sender.json


In [5]:
transform_data_file("receiver.data", "receiver.json")

Wrote 11 records -> receiver.json
