In [1]:
import csv
from collections import defaultdict, OrderedDict

def parse_line(line):
    """ Extracts details from each log line. """
    parts = line.split()
    timestamp = float(parts[1])
    node_info = parts[2]
    node_id = node_info.split('/')[2].replace('NodeList', '')
    action = parts[0]
    ip_header_info = ' '.join(parts[8:])
    source_ip = ip_header_info.split('>')[0].split()[-1]
    destination_ip = ip_header_info.split('>')[1].split()[0].rstrip(')')
    packet_id = ip_header_info.split('id')[1].split()[0]  # Capture packet ID
    return source_ip, destination_ip, node_id, timestamp, packet_id

def track_flows(log_file):
    """ Tracks each flow with unique identifiers and accumulates nodes visited. """
    flows = defaultdict(lambda: {'nodes': OrderedDict(), 'first_timestamp': float('inf')})
    with open(log_file, 'r') as file:
        for line in file:
            source, destination, node_id, timestamp, packet_id = parse_line(line)
            flow_id = (source, destination, packet_id)  # Distinguish flows also by packet ID
            if node_id not in flows[flow_id]['nodes']:
                flows[flow_id]['nodes'][node_id] = True  # Mark the node as visited
            if timestamp < flows[flow_id]['first_timestamp']:
                flows[flow_id]['first_timestamp'] = timestamp  # Update the minimum timestamp for the flow

    return flows

def write_flows_to_csv(flows, output_file):
    """ Writes the tracked flows to a CSV file, including the first timestamp. """
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['timestamp', 'source', 'destination', 'packet id', 'path', 'number of hops']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for (source, destination, packet_id), data in flows.items():
            path = ' > '.join(data['nodes'].keys())
            num_hops = len(data['nodes']) - 1
            writer.writerow({
                'timestamp': data['first_timestamp'],
                'source': source,
                'destination': destination,
                'packet id': packet_id,
                'path': path,
                'number of hops': num_hops
            })

def process_tr_file(input_file, output_file):
    flows = track_flows(input_file)
    write_flows_to_csv(flows, output_file)
    print("Processing complete. Output written to", output_file)

# Call the function with file names
process_tr_file('dataset.tr', 'dataset.csv')


Processing complete. Output written to dataset.csv
