In [34]:
import csv
from collections import defaultdict

def read_lookup_table(filename):
    """ Read the lookup table from a CSV file. """
    lookup = {}
    with open(filename, mode='r', newline='') as file:
        reader = csv.DictReader(file)
        for row in reader:
            # Ensure protocol names are lowercased for consistent matching
            key = (row['dstport'].strip(), row['protocol'].strip().lower())
            lookup[key] = row['tag'].strip()
    return lookup

def generate_output(output_filename, tag_counts, port_protocol_counts):
    """Generate the output files with tag counts and port/protocol combination counts."""
    with open(output_filename + '_tag_counts.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Tag', 'Count'])
        for tag, count in sorted(tag_counts.items(), key=lambda x: (-x[1], x[0])):
            writer.writerow([tag, count])

    with open(output_filename + '_port_protocol_counts.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Port', 'Protocol', 'Count'])
        for key, count in sorted(port_protocol_counts.items(), key=lambda x: (int(x[0][0]), x[1])):
            writer.writerow([key[0], key[1], count])

def apply_tags_to_logs(logs_filename, lookup, output_filename):
    """Apply tags to log entries based on dstport and protocol."""
    tag_counts = defaultdict(int)
    port_protocol_counts = defaultdict(int)

    with open(logs_filename, 'r', newline='') as infile, open(output_filename + '.csv', 'w', newline='') as outfile:
        fieldnames = ['Version', 'AccountID', 'InterfaceID', 'SrcAddr', 'DstAddr', 'SrcPort', 'DstPort', 'Protocol', 'Packets', 'Bytes', 'StartTime', 'EndTime', 'Action', 'LogStatus', 'Tag']
        reader = csv.reader(infile, delimiter=' ')
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            log_entry = {fn: val for fn, val in zip(fieldnames[:-1], row)}
            protocol = {'6': 'tcp', '17': 'udp', '1': 'icmp'}.get(log_entry['Protocol'], 'unknown')
            key = (log_entry['DstPort'], protocol)
            tag = lookup.get(key, 'Untagged')
            log_entry['Tag'] = tag
            writer.writerow(log_entry)

            tag_counts[tag] += 1
            port_protocol_counts[key] += 1

    generate_output(output_filename, tag_counts, port_protocol_counts)

# Load the lookup table
lookup_table = read_lookup_table('lookup.txt')
# Apply tags to the logs and generate output files
apply_tags_to_logs('sampleFlowLogs.txt', lookup_table, 'output')