# Abhinav Malik (MDS202401)

## ADA Assignment-2.1


In [None]:
import csv
from collections import defaultdict
from scapy.all import rdpcap, IP, TCP, UDP

def pcap_to_flows(pcap_path):
    """
    Convert packets in a .pcap file into aggregated bidirectional network flows.
    """
    flow_stats = defaultdict(lambda: {
        'packets': 0,
        'bytes': 0,
        'first_seen': None,
        'last_seen': None
    })

    try:
        packets = rdpcap(pcap_path)
    except Exception as err:
        print(f"⚠️ Could not read PCAP: {err}")
        return None

    for pkt in packets:
        # Only process IP packets carrying TCP or UDP
        if IP in pkt and (TCP in pkt or UDP in pkt):
            ip_src = pkt[IP].src
            ip_dst = pkt[IP].dst
            proto_id = pkt[IP].proto
            ts = float(pkt.time)

            # Get transport layer info
            if proto_id == 6 and TCP in pkt:   # TCP
                sport, dport, proto_name = pkt[TCP].sport, pkt[TCP].dport, "TCP"
            elif proto_id == 17 and UDP in pkt:  # UDP
                sport, dport, proto_name = pkt[UDP].sport, pkt[UDP].dport, "UDP"
            else:
                continue  # Skip unsupported protocols

            # Build a flow key (bidirectional)
            key_fwd = (ip_src, sport, ip_dst, dport, proto_name)
            key_rev = (ip_dst, dport, ip_src, sport, proto_name)
            flow_key = key_fwd if key_fwd < key_rev else key_rev

            # Update statistics
            stat = flow_stats[flow_key]
            stat['packets'] += 1
            stat['bytes'] += len(pkt)
            stat['first_seen'] = ts if stat['first_seen'] is None else min(stat['first_seen'], ts)
            stat['last_seen'] = ts if stat['last_seen'] is None else max(stat['last_seen'], ts)

    return flow_stats


def write_flows_to_csv(flow_dict, csv_path):
    """
    Export the extracted flow data into a CSV file.
    """
    header = [
        'src_ip', 'src_port', 'dst_ip', 'dst_port', 'protocol',
        'packets', 'bytes', 'first_seen', 'last_seen', 'duration'
    ]

    try:
        with open(csv_path, mode='w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=header)
            writer.writeheader()

            for key, stats in flow_dict.items():
                duration = (stats['last_seen'] - stats['first_seen']) if stats['last_seen'] else 0.0
                writer.writerow({
                    'src_ip': key[0],
                    'src_port': key[1],
                    'dst_ip': key[2],
                    'dst_port': key[3],
                    'protocol': key[4],
                    'packets': stats['packets'],
                    'bytes': stats['bytes'],
                    'first_seen': stats['first_seen'],
                    'last_seen': stats['last_seen'],
                    'duration': round(duration, 6)
                })
        print(f"Flows saved to {csv_path}")
    except Exception as err:
        print(f"Failed to write CSV: {err}")

In [None]:
flows = pcap_to_flows('http.pcap')

In [None]:
type(flows)

collections.defaultdict

In [None]:
write_flows_to_csv(flows, 'flows.csv')

Flows saved to flows.csv
