## Real-time analysis

In [None]:
import time
import datetime
from collections import defaultdict
import ipaddress
from pybgpstream import BGPStream

# Define constants
COLLECTION_DURATION = 60  # 10 minutes in seconds
TARGET_ASN = '4766'

def process_update(stream):

    # Initialize counters and tracking variables
    announcements = defaultdict(int)
    withdrawals = defaultdict(int)
    prefix_as_paths = {}
    community_counts = defaultdict(int)
    med_values = []
    collector_to_prefix_count = defaultdict(int)

    # Start timer
    collection_start_time = time.time()
    interval_start_time = collection_start_time

    while True:
        # Check if the total collection duration has been exceeded
        if time.time() - collection_start_time >= COLLECTION_DURATION:
            break

        for rec in stream.records():
            for elem in rec:
                # Get timestamp and type
                elem_time = datetime.datetime.utcfromtimestamp(elem.time)
                elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals

                # Fields dictionary
                fields = elem.fields

                # Prefix
                prefix = fields.get("prefix")
                if prefix is None:
                    continue

                # AS Path
                as_path_str = fields.get('as-path', "")
                as_path = as_path_str.split()

                # Peer ASN and Collector
                peer_asn = elem.peer_asn
                collector = rec.collector

                # Communities
                communities = fields.get('communities', [])

                # Validate and parse IP prefix
                try:
                    network = ipaddress.ip_network(prefix)
                except ValueError:
                    continue

                # Filtering logic within the loop
                # Filter for a specific ASN in AS Path
                if TARGET_ASN not in as_path:
                    continue

                # Counting announcements and withdrawals
                if elem_type == 'A':
                    announcements[prefix] += 1
                elif elem_type == 'W':
                    withdrawals[prefix] += 1

                # Detecting AS Path changes
                if prefix in prefix_as_paths:
                    if as_path!= prefix_as_paths[prefix]:
                        # AS path has changed
                        # Handle AS path change
                        prefix_as_paths[prefix] = as_path
                else:
                    prefix_as_paths[prefix] = as_path

                # Analyze community attributes
                for community in communities:
                    community_str = f"{community[0]}:{community[1]}"
                    community_counts[community_str] += 1

                # Calculate statistics (e.g., Average MED)
                med = fields.get('med')
                if med is not None:
                    try:
                        med_values.append(int(med))
                    except ValueError:
                        pass

                # Update collector-to-prefix count
                collector_to_prefix_count[collector] += 1

        # Print summary every minute
        current_time = time.time()
        if current_time - interval_start_time >= 60:
            print(f"Summary at {current_time}:")
            print(f"Unique Prefixes: {len(announcements)}")
            print(f"Origin ASes: {len(set(as_path for as_path in prefix_as_paths.values()))}")
            print(f"Collector-to-Prefix Count: {collector_to_prefix_count}")
            interval_start_time = current_time

def main():
    """
    Run the script.
    """
    stream = BGPStream(project="ris-live", record_type="updates")

    process_update(stream)

if __name__ == "__main__":
    main()

## Updates Summary

In [5]:
import pybgpstream
from datetime import datetime, timezone
import os
import statistics
import re

def process_bgp_updates(directory, target_asn, from_time_str, until_time_str):
    # Parse the time window strings into datetime objects
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    # Regular expression pattern to match filenames and extract timestamp
    pattern = r'^updates\.(\d{8})\.(\d{4})\.gz$'

    # List to store update counts
    update_counts = []

    # Iterate over each file in the specified directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".gz"):
                # Match the filename with the pattern
                match = re.match(pattern, file)
                if match:
                    date_str = match.group(1)  # YYYYMMDD
                    time_str = match.group(2)  # HHMM

                    # Combine date and time strings
                    file_timestamp_str = date_str + time_str

                    # Convert file timestamp to datetime object
                    file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                    # Check if file time is within the desired time window
                    if file_time < from_time or file_time > until_time:
                        # Skip files outside the time window
                        continue

                    # Proceed to process the file
                    file_path = os.path.join(root, file)

                    # Initialize a new BGPStream instance for each file
                    stream = pybgpstream.BGPStream(data_interface="singlefile")

                    # Set the file for BGPStream to process
                    stream.set_data_interface_option("singlefile", "upd-file", file_path)

                    count = 0

                    for rec in stream.records():
                        for elem in rec:
                            # Get the element timestamp and set timezone to UTC
                            elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)

                            # Filter elements outside the time window
                            if elem_time < from_time or elem_time > until_time:
                                continue

                            elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals
                            fields = elem.fields
                            as_path = fields.get("as-path", "").split()

                            # Filter for target ASN in the AS path
                            if target_asn in as_path:
                                if elem_type in {'A', 'W'}:  # Count only announcements and withdrawals
                                    count += 1

                    # Store the count after processing each file
                    update_counts.append(count)
                else:
                    # If filename doesn't match pattern, skip it
                    continue

    # Calculate min, max, and median
    min_updates = min(update_counts) if update_counts else 0
    max_updates = max(update_counts) if update_counts else 0
    median_updates = statistics.median(update_counts) if update_counts else 0

    # Print the summary results
    print(f"Summary for AS{target_asn}:")
    print(f"Minimum updates: {min_updates}")
    print(f"Maximum updates: {max_updates}")
    print(f"Median updates: {median_updates}")

if __name__ == "__main__":
    # Define the directory and target ASN
    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"
    target_asn = "3356"  # Filter by specific ASN in AS path

    # Define the time window
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 13:15:00"

    process_bgp_updates(directory, target_asn=target_asn, from_time_str=from_time_str, until_time_str=until_time_str)

Summary for AS3356:
Minimum updates: 150
Maximum updates: 58569
Median updates: 29359.5


## Unique prefixes

In [None]:
import pybgpstream
from datetime import datetime
from collections import defaultdict

def summarize_prefixes_and_origins():
    # Define the time range
    from_time = "2024-10-28 13:00:00"
    until_time = "2024-10-28 13:15:00"

    # Initialize BGPStream
    stream = pybgpstream.BGPStream(
        from_time=from_time,
        until_time=until_time,
        record_type="updates"
    )

    # Data structures to store unique prefixes and origin AS changes
    unique_prefixes = set()
    origin_as_changes = defaultdict(set)

    # Iterate over records and elements
    for rec in stream.records():
        for elem in rec:
            # Extract element information
            elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals
            fields = elem.fields
            prefix = fields.get("prefix")
            if prefix is None:
                continue  # Skip elements without prefix information

            # Extract AS path and check if AS4766 is in the AS path
            as_path_str = fields.get('as-path', "")
            as_path = as_path_str.split()
            if '4766' not in as_path:
                continue  # Only process paths containing AS4766

            # Process announcements for tracking unique prefixes and origin AS changes
            if elem_type == 'A':
                unique_prefixes.add(prefix)  # Track unique prefixes

                # Track changes in origin AS for the prefix
                origin_as = as_path[-1] if as_path else None
                if origin_as:
                    origin_as_changes[prefix].add(origin_as)

    # Output summary
    print(f"Total unique prefixes associated with AS4766: {len(unique_prefixes)}")
    for prefix, origin_as_set in origin_as_changes.items():
        print(f"Prefix: {prefix}")
        print(f"  Origin AS count: {len(origin_as_set)}")
        if len(origin_as_set) > 1:
            print(f"  Changes in origin AS observed: {origin_as_set}")

if __name__ == "__main__":
    summarize_prefixes_and_origins()

In [8]:
import os
import re
import pybgpstream
from datetime import datetime, timezone
from collections import defaultdict, Counter
import statistics

def process_local_bgp_updates():
    # Define the time window and target ASN
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 14:00:00"
    target_asn = "4766"

    # Parse the time strings into timezone-aware datetime objects
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    # The directory containing BGP update files
    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"

    # Regex pattern to match filenames: updates.YYYYMMDD.HHMM.gz
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    # Data structures:
    #  - unique_prefixes stores each prefix encountered for a relevant announcement
    #  - origin_as_changes tracks a set of origin ASNs (for potential MOAS detection)
    unique_prefixes = set()
    origin_as_changes = defaultdict(set)

    # Traverse the directory and filter files by time window
    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)  # YYYYMMDD
                time_str = match.group(2)  # HHMM

                # Convert the extracted date/time to a datetime object
                file_timestamp_str = date_str + time_str  # e.g., "20241028" + "1300" = "202410281300"
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                # Check if file time is within the specified time window
                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)

                # Initialize BGPStream for local file processing
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                # Process each record and element in the file
                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue

                        elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals
                        fields = elem.fields
                        prefix = fields.get("prefix")
                        if prefix is None:
                            continue

                        # Check the as-path
                        as_path_str = fields.get('as-path', "")
                        as_path = as_path_str.split()

                        # Filter for target ASN in the AS path
                        # Remove if you want to detect MOAS conflicts regardless of target ASN
                        if target_asn not in as_path:
                            continue

                        # We only track announcements
                        if elem_type == 'A':
                            unique_prefixes.add(prefix)
                            origin_asn = as_path[-1] if as_path else None
                            if origin_asn:
                                origin_as_changes[prefix].add(origin_asn)

    # Print results
    print(f"Total unique prefixes associated with AS{target_asn}: {len(unique_prefixes)}")
    for prefix, origin_set in origin_as_changes.items():
        print(f"Prefix: {prefix}")
        print(f"  Origin AS count: {len(origin_set)}")
        if len(origin_set) > 1:
            print(f"  MOAS conflict observed: {origin_set}")

if __name__ == "__main__":
    process_local_bgp_updates()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1400.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1330.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1345.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1315.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1300.gz
Total unique prefixes associated with AS4766: 616
Prefix: 14.129.75.0/24
  Origin AS count: 1
Prefix: 14.129.73.0/24
  Origin AS count: 1
Prefix: 14.129.74.0/24
  Origin AS count: 1
Prefix: 14.129.76.0/24
  Origin AS count: 1
Prefix: 211.156.223.0/24
  Origin AS count: 1
Prefix: 211.156.221.0/24
  Origin AS count: 1
Prefix: 211.156.220.0/24
  Origin AS count: 1
Prefix: 211.156.219.0/24
  Origin AS count: 1
Prefix: 211.156.218.0/24
  Origin AS count: 1
Prefix: 211.156.217.0/24
  Origin AS count: 1
Prefix: 211.156.216.0/24
  Origin AS count: 1
Prefix: 211.156.215.0/24
  Origin AS count: 1
P

# BGP Analysis Capabilities

3. AS Path Length and Change Summary

Summarize the AS paths for each prefix associated with ASN [target ASN] over the period [start time] to [end time]. Provide minimum, maximum, and median AS path lengths and highlight any significant path changes observed in BGP updates.


In [None]:
# Import necessary libraries
import pybgpstream
from datetime import datetime
from statistics import median
from collections import defaultdict

def analyze_as_paths():
    # Define the time range
    from_time = "2024-10-28 13:00:00"
    until_time = "2024-10-28 13:15:00"

    # Initialize BGPStream
    stream = pybgpstream.BGPStream(
        from_time=from_time,
        until_time=until_time,
        record_type="updates"
    )

    # Initialize data structures
    prefix_paths = defaultdict(list)  # Store AS paths for each prefix
    path_changes = defaultdict(list)  # Track significant path changes per prefix

    # Iterate over records and elements
    for rec in stream.records():
        for elem in rec:
            # Extract element information
            elem_time = datetime.utcfromtimestamp(elem.time)
            elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals
            fields = elem.fields
            prefix = fields.get("prefix")

            if prefix is None:
                continue  # Skip elements without prefix information

            # Extract AS path and check if AS4766 is in the path
            as_path_str = fields.get('as-path', "")
            as_path = as_path_str.split()
            if '4766' not in as_path:
                continue  # Only process paths that contain AS4766

            # Only process announcement messages
            if elem_type == 'A':
                path_length = len(as_path)
                prefix_paths[prefix].append((as_path, path_length, elem_time))

    # Summarize AS path lengths and detect changes
    for prefix, paths in prefix_paths.items():
        path_lengths = [length for _, length, _ in paths]
        min_length = min(path_lengths)
        max_length = max(path_lengths)
        median_length = median(path_lengths)

        # Detect significant AS path changes
        previous_path = None
        for as_path, length, time in paths:
            if previous_path and as_path != previous_path:
                path_changes[prefix].append((time, previous_path, as_path))
            previous_path = as_path

        # Output the summary
        print(f"Prefix: {prefix}")
        print(f"  Minimum AS path length: {min_length}")
        print(f"  Maximum AS path length: {max_length}")
        print(f"  Median AS path length: {median_length}")

        if path_changes[prefix]:
            print("  Significant AS path changes observed:")
            for change_time, old_path, new_path in path_changes[prefix]:
                print(f"    - At {change_time}: {old_path} -> {new_path}")

if __name__ == "__main__":
    analyze_as_paths()

In [1]:
import os
import re
import pybgpstream
from datetime import datetime, timezone
from statistics import median
from collections import defaultdict

def analyze_local_as_paths():
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 13:15:00"
    target_asn = "4766"

    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    prefix_paths = defaultdict(list)
    path_changes = defaultdict(list)

    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)
                time_str = match.group(2)
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)
                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue
                        fields = elem.fields
                        prefix = fields.get("prefix")
                        if not prefix:
                            continue
                        as_path_str = fields.get('as-path', "")
                        as_path = as_path_str.split()
                        if target_asn not in as_path:
                            continue
                        if elem.type == 'A':
                            path_len = len(as_path)
                            prefix_paths[prefix].append((as_path, path_len, elem_time))

    for prefix, paths in prefix_paths.items():
        lengths = [length for _, length, _ in paths]
        min_len = min(lengths)
        max_len = max(lengths)
        med_len = median(lengths)

        prev_path = None
        for as_path, length, time in paths:
            if prev_path and as_path != prev_path:
                path_changes[prefix].append((time, prev_path, as_path))
            prev_path = as_path

        print(f"Prefix: {prefix}")
        print(f"  Min AS path length: {min_len}")
        print(f"  Max AS path length: {max_len}")
        print(f"  Median AS path length: {med_len}")
        if path_changes[prefix]:
            print("  Significant AS path changes observed:")
            for t, old_path, new_path in path_changes[prefix]:
                print(f"    - At {t}: {old_path} -> {new_path}")

if __name__ == "__main__":
    analyze_local_as_paths()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1315.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1300.gz
Prefix: 14.55.70.0/23
  Min AS path length: 6
  Max AS path length: 6
  Median AS path length: 6.0
Prefix: 14.55.72.0/23
  Min AS path length: 6
  Max AS path length: 6
  Median AS path length: 6.0
Prefix: 1.34.184.0/24
  Min AS path length: 7
  Max AS path length: 7
  Median AS path length: 7
Prefix: 211.52.3.0/24
  Min AS path length: 5
  Max AS path length: 5
  Median AS path length: 5
Prefix: 220.67.208.0/24
  Min AS path length: 6
  Max AS path length: 6
  Median AS path length: 6
Prefix: 220.69.200.0/24
  Min AS path length: 6
  Max AS path length: 6
  Median AS path length: 6
Prefix: 220.95.160.0/20
  Min AS path length: 5
  Max AS path length: 5
  Median AS path length: 5
Prefix: 211.104.115.0/24
  Min AS path length: 6
  Max AS path length: 6
  Median AS path length: 6
Prefix: 168.126.80.0/24
  Min AS path length: 6
 

# BGP Analysis Capabilities

4. Prefix Stability Analysis

Identify prefixes associated with ASN [target ASN] that show high update frequency (announcements and withdrawals) within the period [start time] to [end time]. Summarize the prefixes, update counts, and provide an assessment of their stability.

In [None]:
# Import necessary libraries
import pybgpstream
from datetime import datetime
from collections import defaultdict

def analyze_high_update_frequency():
    # Define the time range
    from_time = "2024-10-28 13:00:00"
    until_time = "2024-10-28 13:15:00"

    # Initialize BGPStream
    stream = pybgpstream.BGPStream(
        from_time=from_time,
        until_time=until_time,
        record_type="updates"
    )

    # Data structures to track update counts
    update_counts = defaultdict(int)

    # Iterate over records and elements
    for rec in stream.records():
        for elem in rec:
            # Extract element information
            elem_time = datetime.utcfromtimestamp(elem.time)
            elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals
            fields = elem.fields
            prefix = fields.get("prefix")
            if prefix is None:
                continue  # Skip elements without prefix information

            # Extract AS path and check if AS4766 is in the AS path
            as_path_str = fields.get('as-path', "")
            as_path = as_path_str.split()
            if '4766' not in as_path:
                continue  # Only process paths containing AS4766

            # Count announcements and withdrawals for each prefix
            update_counts[prefix] += 1

    # Assess prefix stability based on update frequency
    print("Prefixes associated with AS4766 and their update frequency:")
    for prefix, count in update_counts.items():
        stability = "Unstable" if count > 10 else "Stable"  # Define threshold for stability
        print(f"Prefix: {prefix}, Update Count: {count}, Stability: {stability}")

if __name__ == "__main__":
    analyze_high_update_frequency()

In [2]:
import os
import re
import pybgpstream
from datetime import datetime, timezone
from collections import defaultdict

def analyze_local_high_update_frequency():
    # Define the time window and target ASN
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 13:15:00"
    target_asn = "4766"

    # Parse into timezone-aware datetime objects
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    # Directory containing BGP update files
    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"

    # Regex pattern for matching filenames: updates.YYYYMMDD.HHMM.gz (or .bz2)
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    # Data structure to track update counts per prefix
    update_counts = defaultdict(int)

    # Traverse the directory and filter files by time window
    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)  # YYYYMMDD
                time_str = match.group(2)  # HHMM
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)
                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                # Iterate over records in this file
                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue

                        elem_type = elem.type  # 'A' (announcement), 'W' (withdrawal)
                        fields = elem.fields
                        prefix = fields.get("prefix")
                        if not prefix:
                            continue

                        as_path_str = fields.get('as-path', "")
                        as_path = as_path_str.split()
                        if target_asn not in as_path:
                            continue

                        # Count updates for each prefix referencing target_asn
                        if elem_type in {'A', 'W'}:
                            update_counts[prefix] += 1

    # Assess prefix stability based on update frequency
    print("\nPrefixes referencing AS4766 and their update frequency:")
    for prefix, count in update_counts.items():
        stability = "Unstable" if count > 10 else "Stable"
        print(f"  Prefix: {prefix}, Update Count: {count}, Stability: {stability}")

if __name__ == "__main__":
    analyze_local_high_update_frequency()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1315.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1300.gz

Prefixes referencing AS4766 and their update frequency:
  Prefix: 14.55.70.0/23, Update Count: 2, Stability: Stable
  Prefix: 14.55.72.0/23, Update Count: 2, Stability: Stable
  Prefix: 1.34.184.0/24, Update Count: 1, Stability: Stable
  Prefix: 211.52.3.0/24, Update Count: 1, Stability: Stable
  Prefix: 220.67.208.0/24, Update Count: 1, Stability: Stable
  Prefix: 220.69.200.0/24, Update Count: 1, Stability: Stable
  Prefix: 220.95.160.0/20, Update Count: 1, Stability: Stable
  Prefix: 211.104.115.0/24, Update Count: 1, Stability: Stable
  Prefix: 168.126.80.0/24, Update Count: 1, Stability: Stable
  Prefix: 218.146.146.0/24, Update Count: 1, Stability: Stable
  Prefix: 14.206.54.0/24, Update Count: 1, Stability: Stable
  Prefix: 101.250.141.0/24, Update Count: 1, Stability: Stable
  Prefix: 117.16.52.0/23, Update Count: 1, 

# BGP Analysis Capabilities

5. Community and MED Analysis

Analyze the range of MED values and the most common community tags associated with BGP update messages for ASN [target ASN] from [start time] to [end time]. Provide a summary of average MED values and frequently observed community tags.

In [4]:
import pybgpstream
from datetime import datetime
from collections import defaultdict, Counter
import statistics

def main():
    from_time = "2024-10-28 13:00:00"
    until_time = "2024-10-28 14:00:00"
    collectors = ["rrc00"]

    # Initialize BGPStream
    stream = pybgpstream.BGPStream(
        from_time=from_time,
        until_time=until_time,
        record_type="updates",
        collectors=collectors
    )

    target_asn = '4766'

    med_values = []
    community_tags = []

    # Iterate over records and elements
    for rec in stream.records():
        for elem in rec:
            fields = elem.fields
            elem_type = elem.type  # 'A' for announcements, 'W' for withdrawals

            as_path_str = fields.get('as-path', "")
            as_path = as_path_str.split()

            # Filter for AS4766 in the AS path
            if target_asn not in as_path:
                continue

            # Extract MED value
            med_value = fields.get('med')
            if med_value is not None:
                med_values.append(int(med_value))

            # Extract community tags
            communities = fields.get('communities', [])
            for community in communities:
                community_tags.append(community)

    # Calculate MED statistics
    if med_values:
        min_med = min(med_values)
        max_med = max(med_values)
        average_med = statistics.mean(med_values)
        print(f"MED Value Range: {min_med} - {max_med}")
        print(f"Average MED Value: {average_med}")
    else:
        print("No MED values found.")

    # Calculate most common community tags
    if community_tags:
        community_counter = Counter(community_tags)
        most_common_communities = community_counter.most_common(10)  # Top 10
        print("Most Common Community Tags:")
        for community, count in most_common_communities:
            print(f"  {community} (observed {count} times)")
    else:
        print("No community tags found.")

if __name__ == "__main__":
    main()

No MED values found.
Most Common Community Tags:
  34549:100 (observed 3074 times)
  174:22013 (observed 2998 times)
  34549:174 (observed 2994 times)
  174:21001 (observed 2671 times)
  174:21000 (observed 327 times)
  1299:35000 (observed 77 times)
  34549:1299 (observed 70 times)
  24482:2 (observed 61 times)
  24482:200 (observed 61 times)
  48858:1399 (observed 47 times)


In [5]:
import os
import re
import pybgpstream
import statistics
from datetime import datetime, timezone
from collections import defaultdict, Counter

def analyze_local_med_community():
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 14:00:00"
    target_asn = "4766"

    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    med_values = []
    community_tags = []

    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)
                time_str = match.group(2)
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue

                        fields = elem.fields
                        as_path_str = fields.get('as-path', "")
                        as_path = as_path_str.split()
                        if target_asn not in as_path:
                            continue

                        med_value = fields.get('med')
                        if med_value is not None:
                            try:
                                med_values.append(int(med_value))
                            except ValueError:
                                pass

                        communities = fields.get('communities', [])
                        for c in communities:
                            community_tags.append(c)

    if med_values:
        min_med = min(med_values)
        max_med = max(med_values)
        avg_med = statistics.mean(med_values)
        print(f"MED Value Range: {min_med} - {max_med}")
        print(f"Average MED Value: {avg_med}")
    else:
        print("No MED values found.")

    if community_tags:
        community_counter = Counter(community_tags)
        most_common_communities = community_counter.most_common(10)
        print("Most Common Community Tags:")
        for community, count in most_common_communities:
            print(f"  {community} (observed {count} times)")
    else:
        print("No community tags found.")

if __name__ == "__main__":
    analyze_local_med_community()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1400.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1330.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1345.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1315.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1300.gz
No MED values found.
Most Common Community Tags:
  34549:100 (observed 557 times)
  34549:174 (observed 549 times)
  174:22013 (observed 549 times)
  174:21001 (observed 503 times)
  174:21000 (observed 46 times)
  24482:11050 (observed 12 times)
  9002:64819 (observed 12 times)
  24482:11052 (observed 12 times)
  24482:2 (observed 12 times)
  24482:65116 (observed 12 times)


# BGP Analysis Capabilities

6. Update Count and Periodic Changes

Summarize the count of BGP update messages (announcements and withdrawals) for ASN [target ASN] between [start time] and [end time]. Provide total counts as well as breakdowns by 5, 30, and 60-minute intervals.

# BGP Analysis Capabilities

7. Route Flapping Analysis

Identify prefixes associated with ASN [target ASN] that exhibit flapping behavior within the period [start time] to [end time]. Summarize the prefixes, the number of flaps, and the duration between consecutive flaps.


In [1]:
import pybgpstream
import pandas as pd
from datetime import datetime

# Define parameters
asn = "4766"
start_time = "2024-10-28 13:00:00"
end_time = "2024-10-28 13:15:00"

# Configure BGPStream
stream = pybgpstream.BGPStream(
    from_time=start_time,
    until_time=end_time,
    record_type="updates",
    filter=f"origin {asn}"
)

# Initialize a list to store events
events = []

# Collect data
for rec in stream.records():
    for elem in rec:
        if elem.type in ['A', 'W']:
            prefix = elem.fields.get('prefix')
            timestamp = datetime.utcfromtimestamp(elem.time)
            event_type = elem.type  # 'A' for announcement, 'W' for withdrawal
            events.append({'prefix': prefix, 'timestamp': timestamp, 'event_type': event_type})

# Create DataFrame
df = pd.DataFrame(events)

# Identify flapping prefixes
flapping_info = []

for prefix, group in df.groupby('prefix'):
    group = group.sort_values('timestamp')
    # Detect changes in event_type (from A to W or W to A)
    group['change'] = group['event_type'].shift() != group['event_type']
    flap_events = group[group['change']]
    num_flaps = len(flap_events)
    if num_flaps > 1:
        # Calculate durations between consecutive flaps
        durations = flap_events['timestamp'].diff().dt.total_seconds().dropna()
        avg_duration = durations.mean()
        flapping_info.append({
            'prefix': prefix,
            'number_of_flaps': num_flaps,
            'average_duration_between_flaps_seconds': avg_duration
        })

# Create a DataFrame for flapping prefixes
flap_df = pd.DataFrame(flapping_info)

# Display the results
if not flap_df.empty:
    print("Flapping prefixes associated with ASN 4766:")
    print(flap_df)
else:
    print("No flapping prefixes detected for ASN 4766 in the given time period.")

No flapping prefixes detected for ASN 4766 in the given time period.


In [7]:
import os
import re
import pybgpstream
import pandas as pd
from datetime import datetime, timezone
from collections import defaultdict
import statistics

def analyze_local_flapping():
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-29 13:00:00"
    target_asn = "4766"

    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    events = []

    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)
                time_str = match.group(2)
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue

                        fields = elem.fields
                        prefix = fields.get("prefix")
                        if not prefix:
                            continue

                        elem_type = elem.type
                        as_path_str = fields.get('as-path', "")
                        as_path = as_path_str.split()

                        # Filter for origin = target_asn (last in as_path)
                        if as_path and as_path[-1] == target_asn:
                            if elem_type in ('A', 'W'):
                                events.append({
                                    'prefix': prefix,
                                    'timestamp': elem_time,
                                    'event_type': elem_type
                                })

    df = pd.DataFrame(events)
    if df.empty:
        print(f"No updates found for origin {target_asn} in the given time period.")
        return

    flapping_info = []

    for prefix, group in df.groupby('prefix'):
        group = group.sort_values('timestamp')
        group['change'] = group['event_type'].shift() != group['event_type']
        flap_events = group[group['change']]
        num_flaps = len(flap_events)

        if num_flaps > 1:
            durations = flap_events['timestamp'].diff().dt.total_seconds().dropna()
            avg_duration = durations.mean() if not durations.empty else 0
            flapping_info.append({
                'prefix': prefix,
                'number_of_flaps': num_flaps,
                'average_duration_between_flaps_seconds': avg_duration
            })

    if not flapping_info:
        print(f"No flapping prefixes detected for ASN {target_asn} in the given time period.")
        return

    flap_df = pd.DataFrame(flapping_info)
    print(f"Flapping prefixes for origin {target_asn}:")
    print(flap_df)

if __name__ == "__main__":
    analyze_local_flapping()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.1300.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1945.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.1145.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1730.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1900.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.0800.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.2300.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.0545.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.0445.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.0230.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241029.0700.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1400.gz
Processing file: /home/hb/ri

# BGP Analysis Capabilities

8. Anomaly Detection - Prefix Announcement

Identify any prefixes announced by ASN [target ASN] that exhibit anomalous announcement patterns during [start time] to [end time]. Summarize the prefixes, number of announcements, and rate of change compared to historical data.

In [2]:
import pybgpstream
import pandas as pd
from datetime import datetime, timedelta

# Define parameters
asn = "4766"
current_start_time = "2024-10-28 13:00:00"
current_end_time = "2024-10-28 13:15:00"

# Define historical period (e.g., one week before)
historical_start_time = (datetime.strptime(current_start_time, "%Y-%m-%d %H:%M:%S") - timedelta(weeks=54)).strftime("%Y-%m-%d %H:%M:%S")
historical_end_time = (datetime.strptime(current_end_time, "%Y-%m-%d %H:%M:%S") - timedelta(weeks=54)).strftime("%Y-%m-%d %H:%M:%S")

def collect_announcements(asn, start_time, end_time):
    stream = pybgpstream.BGPStream(
        from_time=start_time,
        until_time=end_time,
        record_type="updates",
        filter=f"origin {asn}"
    )
    prefixes = []
    for rec in stream.records():
        for elem in rec:
            if elem.type == 'A':
                prefix = elem.fields.get('prefix')
                prefixes.append(prefix)
    return pd.Series(prefixes).value_counts()

# Collect current and historical announcements
current_announcements = collect_announcements(asn, current_start_time, current_end_time)
historical_announcements = collect_announcements(asn, historical_start_time, historical_end_time)

# Combine and compare
df = pd.DataFrame({
    'current_count': current_announcements,
    'historical_count': historical_announcements
}).fillna(0)

df['rate_of_change'] = ((df['current_count'] - df['historical_count']) / df['historical_count'].replace(0, 1)) * 100

# Identify anomalous prefixes (e.g., rate of change > 50%)
anomalous_prefixes = df[df['rate_of_change'].abs() > 50]

# Display the results
if not anomalous_prefixes.empty:
    print("Anomalous prefixes announced by ASN 4766:")
    print(anomalous_prefixes)
else:
    print("No anomalous prefix announcements detected for ASN 4766 in the given time period.")


Anomalous prefixes announced by ASN 4766:
                 current_count  historical_count  rate_of_change
1.100.0.0/14               2.0                 1           100.0
1.104.0.0/14               2.0                 1           100.0
1.108.0.0/14               2.0                 1           100.0
1.231.122.0/24             3.0                 1           200.0
1.231.13.0/24              2.0                 1           100.0
...                        ...               ...             ...
61.97.160.0/21             3.0                 1           200.0
61.97.168.0/21             3.0                 1           200.0
74.124.196.0/24            2.0                 1           100.0
92.112.57.0/24             2.0                 1           100.0
92.112.61.0/24             2.0                 1           100.0

[984 rows x 3 columns]


# BGP Analysis Capabilities

9. Anomaly Detection - AS Path Changes

Detect unusual AS path changes for prefixes associated with ASN [target ASN] from [start time] to [end time]. Summarize the prefixes, number of path changes, and frequency of these changes.

In [3]:
import pybgpstream
import pandas as pd

# Define parameters
asn = "4766"
start_time = "2024-10-28 13:00:00"
end_time = "2024-10-28 13:15:00"

# Configure BGPStream
stream = pybgpstream.BGPStream(
    from_time=start_time,
    until_time=end_time,
    record_type="updates",
    filter=f"origin {asn}"
)

# Initialize a dictionary to store AS paths per prefix
prefix_as_paths = {}

# Collect data
for rec in stream.records():
    for elem in rec:
        if elem.type == 'A':
            prefix = elem.fields.get('prefix')
            as_path = elem.fields.get('as-path', '')
            if prefix not in prefix_as_paths:
                prefix_as_paths[prefix] = set()
            prefix_as_paths[prefix].add(as_path)

# Analyze AS path changes
as_path_changes = []

for prefix, paths in prefix_as_paths.items():
    num_paths = len(paths)
    if num_paths > 1:
        as_path_changes.append({
            'prefix': prefix,
            'number_of_path_changes': num_paths,
            'as_paths': paths
        })

# Convert to DataFrame
df_as_changes = pd.DataFrame(as_path_changes)

# Display the results
if not df_as_changes.empty:
    print("Prefixes with AS path changes for ASN 4766:")
    print(df_as_changes)
else:
    print("No unusual AS path changes detected for ASN 4766 in the given time period.")


Prefixes with AS path changes for ASN 4766:
              prefix  number_of_path_changes  \
0    2400:0:611::/48                       5   
1    2400:0:810::/44                       7   
2    2400:0:820::/44                       7   
3    2400:0:830::/44                       7   
4    2400:0:840::/44                       7   
..               ...                     ...   
672  211.38.114.0/24                       2   
673   211.51.48.0/24                       2   
674  211.33.209.0/24                       2   
675   210.179.0.0/21                       2   
676   210.113.7.0/24                       2   

                                              as_paths  
0    {59890 3356 4766, 140731 137990 2914 4766, 134...  
1    {207841 137409 174 4766, 59890 3356 4766, 1407...  
2    {207841 137409 174 4766, 59890 3356 4766, 1407...  
3    {207841 137409 174 4766, 59890 3356 4766, 1407...  
4    {207841 137409 174 4766, 59890 3356 4766, 1407...  
..                                   

In [10]:
import os
import re
import pybgpstream
import pandas as pd
from datetime import datetime, timezone
from collections import defaultdict

def analyze_local_as_path_changes():
    # Define the time window and target ASN
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 13:15:00"
    target_asn = "4766"

    # Parse time window
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    # Directory containing BGP update files
    directory = "/home/hb/ris_bgp_updates/2024/10/rrc00"
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    prefix_as_paths = defaultdict(set)

    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)
                time_str = match.group(2)
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                for rec in stream.records():
                    for elem in rec:
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue
                        if elem.type == 'A':
                            fields = elem.fields
                            prefix = fields.get('prefix')
                            if not prefix:
                                continue

                            as_path_str = fields.get('as-path', '')
                            as_path = as_path_str.split()
                            prefix_as_paths[prefix].add(as_path_str)

    as_path_changes = []
    for prefix, path_set in prefix_as_paths.items():
        if len(path_set) > 1:
            as_path_changes.append({
                'prefix': prefix,
                'number_of_path_changes': len(path_set),
                'as_paths': list(path_set)
            })

    df_as_changes = pd.DataFrame(as_path_changes)
    if not df_as_changes.empty:
        print(f"Prefixes with AS path changes for ASN {target_asn}:")
        print(df_as_changes)
    else:
        print(f"No unusual AS path changes detected for ASN {target_asn} in the given time period.")

if __name__ == "__main__":
    analyze_local_as_path_changes()

Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1315.gz
Processing file: /home/hb/ris_bgp_updates/2024/10/rrc00/updates.20241028.1300.gz
Prefixes with AS path changes for ASN 4766:
                  prefix  number_of_path_changes  \
0        223.196.46.0/24                     116   
1        200.115.69.0/24                      12   
2         2804:8330::/32                      55   
3       169.145.140.0/23                      97   
4      2a02:ac80:40::/48                     286   
...                  ...                     ...   
20905   162.247.202.0/24                       2   
20906   162.247.200.0/24                       2   
20907   162.247.206.0/24                       2   
20908   192.245.167.0/24                       2   
20909    204.117.91.0/24                       2   

                                                as_paths  
0      [3333 1103 3257 174 55644 55644 55644 55644 55...  
1      [58057 34549 6461 52320 269194 270885 270885

# BGP Analysis Capabilities

10. Anomaly Detection - Potential Hijacking

Identify suspected BGP hijacking incidents involving ASN [target ASN] or its prefixes from [start time] to [end time]. Summarize the prefixes, unauthorized ASNs, and frequency of suspected hijacks.


Define time window around the incident
start_time = "2018-04-24 10:00:00"  # UTC
end_time = "2018-04-24 12:00:00"    # UTC

Target prefix and expected origin ASN
target_prefix = "205.251.192.0/23"
expected_asn = "16509"


Identify suspected BGP hijacking incidents involving AS16509 or its prefixes (177.93.174.0/23, 138.59.238.0/23, 177.93.168.0/23) from April 24 10:00 to 12:00, 2018. Summarize the prefixes, unauthorized ASNs, and frequency of suspected hijacks.

In [2]:
import pybgpstream
import pandas as pd
from datetime import datetime

# Define parameters
asn = "16509"
start_time = "2018-04-24 10:00:00"
end_time = "2018-04-24 12:00:00"

# Define known prefixes of ASN 16509
known_prefixes = set([
    "177.93.174.0/23",
    "138.59.238.0/23",
    "177.93.168.0/23",
    # Add more known prefixes for ASN 16509
])

# Initialize BGPStream
stream = pybgpstream.BGPStream(
    from_time=start_time,
    until_time=end_time,
    collectors=["rrc00"],
    record_type="updates",
)

# Initialize a dictionary to store unauthorized announcements
hijack_attempts = {}

# Collect data
for rec in stream.records():
    for elem in rec:
        if elem.type == 'A':
            prefix = elem.fields.get('prefix')
            as_path = elem.fields.get('as-path', '')
            as_path_list = as_path.strip().split()
            origin_asn = as_path_list[-1] if as_path_list else None

            # Check if the prefix is known and the origin ASN is not the target ASN
            if prefix in known_prefixes and origin_asn != asn:
                if prefix not in hijack_attempts:
                    hijack_attempts[prefix] = {'unauthorized_asns': set(), 'count': 0}
                hijack_attempts[prefix]['unauthorized_asns'].add(origin_asn)
                hijack_attempts[prefix]['count'] += 1

# Prepare the results
results = []
for prefix, data in hijack_attempts.items():
    results.append({
        'prefix': prefix,
        'unauthorized_asns': list(data['unauthorized_asns']),
        'frequency_of_suspected_hijacks': data['count']
    })

# Convert to DataFrame
df_hijacks = pd.DataFrame(results)

# Display the results
if not df_hijacks.empty:
    print(f"Suspected BGP hijacking incidents involving ASN {asn}:")
    print(df_hijacks)
else:
    print(f"No suspected hijacking incidents detected for ASN {asn} in the given time period.")

Suspected BGP hijacking incidents involving ASN 16509:
            prefix unauthorized_asns  frequency_of_suspected_hijacks
0  177.93.168.0/23          [263154]                              11
1  177.93.174.0/23          [263154]                              11
2  138.59.238.0/23          [263154]                               9


In [11]:
import os
import re
import pybgpstream
import pandas as pd
from datetime import datetime, timezone
from collections import defaultdict

def analyze_local_hijacking():
    """
    Process locally stored BGP update files to detect suspected hijacking incidents 
    by comparing known prefixes of a target ASN against the observed origin ASNs.
    """

    # Define parameters
    asn = "16509"
    from_time_str = "2018-04-24 10:00:00"
    until_time_str = "2018-04-24 12:00:00"

    # Known prefixes for ASN 16509
    known_prefixes = {
        "177.93.174.0/23",
        "138.59.238.0/23",
        "177.93.168.0/23",
        # Add more known prefixes if needed
    }

    # Parse the time window strings into datetime objects (UTC)
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)

    # Directory containing BGP update files
    directory = "/home/hb/ris_bgp_updates/2018/04/rrc00"

    # Regex pattern to match filenames: updates.YYYYMMDD.HHMM.gz or .bz2
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'

    # Dictionary to store unauthorized announcements
    hijack_attempts = defaultdict(lambda: {"unauthorized_asns": set(), "count": 0})

    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern, file)
            if match:
                date_str = match.group(1)  # YYYYMMDD
                time_str = match.group(2)  # HHMM
                file_timestamp_str = date_str + time_str
                file_time = datetime.strptime(file_timestamp_str, "%Y%m%d%H%M").replace(tzinfo=timezone.utc)

                # Skip files outside the time window
                if file_time < from_time or file_time > until_time:
                    continue

                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")

                # Initialize BGPStream for local file processing
                stream = pybgpstream.BGPStream(data_interface="singlefile")
                stream.set_data_interface_option("singlefile", "upd-file", file_path)

                # Collect data
                for rec in stream.records():
                    for elem in rec:
                        # Filter by element timestamp
                        elem_time = datetime.utcfromtimestamp(elem.time).replace(tzinfo=timezone.utc)
                        if elem_time < from_time or elem_time > until_time:
                            continue

                        if elem.type == "A":  # Only process announcements
                            fields = elem.fields
                            prefix = fields.get("prefix")
                            if not prefix:
                                continue

                            as_path_str = fields.get("as-path", "")
                            as_path_list = as_path_str.strip().split()
                            origin_asn = as_path_list[-1] if as_path_list else None

                            # Check if prefix is known and origin ASN differs from the target ASN
                            if prefix in known_prefixes and origin_asn != asn:
                                hijack_attempts[prefix]["unauthorized_asns"].add(origin_asn)
                                hijack_attempts[prefix]["count"] += 1

    # Prepare results
    results = []
    for prefix, data in hijack_attempts.items():
        results.append({
            "prefix": prefix,
            "unauthorized_asns": list(data["unauthorized_asns"]),
            "frequency_of_suspected_hijacks": data["count"]
        })

    df_hijacks = pd.DataFrame(results)
    if not df_hijacks.empty:
        print(f"Suspected BGP hijacking incidents involving ASN {asn}:")
        print(df_hijacks)
    else:
        print(f"No suspected hijacking incidents detected for ASN {asn} in the given time period.")

if __name__ == "__main__":
    analyze_local_hijacking()

Processing file: /home/hb/ris_bgp_updates/2018/04/rrc00/updates.20180424.1045.gz
Processing file: /home/hb/ris_bgp_updates/2018/04/rrc00/updates.20180424.1145.gz
Suspected BGP hijacking incidents involving ASN 16509:
            prefix unauthorized_asns  frequency_of_suspected_hijacks
0  177.93.168.0/23          [263154]                               1
1  138.59.238.0/23          [263154]                               1
2  177.93.174.0/23          [263154]                               1
