# Prompt
You are tasked with writing a Python script that reads BGP update messages from a CSV file and performs various BGP data analysis. The script should adhere to the following guidelines:

- Script Structure:
Include a __main__ block or a usage example to demonstrate how to run the script.
Use pandas for data manipulation and analysis.
Implement time-based operations to process data over specified durations.

- Data Parsing and Preprocessing:
Read the CSV file using pandas.
timestamp: UNIX timestamp or datetime string of the update message.
prefix: The IP prefix being announced or withdrawn.
as_path: The AS path in the update message, represented as a string of ASNs separated by spaces (e.g., "3356 1299 45102").
type: The type of update message (A for announcement, W for withdrawal).
Other relevant fields as necessary (e.g., peer_asn, communities).
Convert timestamps to pandas datetime format.

Split the as_path into a list of ASNs and calculate AS path length.

Handle missing or malformed data gracefully, with appropriate error handling.
import pandas as pd

def read_bgp_updates(csv_file_path):
    df = pd.read_csv(csv_file_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
    df.dropna(subset=['timestamp', 'prefix', 'as_path', 'type'], inplace=True)
    df['as_path_list'] = df['as_path'].str.strip().str.split()
    df['as_path_length'] = df['as_path_list'].apply(len)
    return df

- Summary of BGP Data:

Compute total counts of update messages.
Calculate average counts per time period (e.g., 5, 30, 60 minutes, hourly, daily).
Determine the number of unique ASes and prefixes.
Calculate the average AS path length.
def compute_summary_statistics(df):
    total_updates = len(df)
    total_announcements = len(df[df['type'] == 'A'])
    total_withdrawals = len(df[df['type'] == 'W'])
    unique_ases = df['as_path_list'].explode().nunique()
    unique_prefixes = df['prefix'].nunique()
    average_as_path_length = df['as_path_length'].mean()

    print("Summary of BGP Data:")
    print(f"Total Updates: {total_updates}")
    print(f" - Announcements: {total_announcements}")
    print(f" - Withdrawals: {total_withdrawals}")
    print(f"Unique ASes: {unique_ases}")
    print(f"Unique Prefixes: {unique_prefixes}")
    print(f"Average AS Path Length: {average_as_path_length:.2f}")

Timeline Information:

Generate statistics over time periods for routing table changes.
Produce counts of update messages over time, overall and for specific ASes.
Visualize trends using plots.
import matplotlib.pyplot as plt

def updates_over_time(df, interval='5T'):
    df.set_index('timestamp', inplace=True)
    updates_count = df['type'].resample(interval).count()
    updates_announcements = df[df['type'] == 'A']['type'].resample(interval).count()
    updates_withdrawals = df[df['type'] == 'W']['type'].resample(interval).count()
    return updates_count, updates_announcements, updates_withdrawals

def plot_updates_over_time(updates_count, updates_announcements, updates_withdrawals):
    plt.figure(figsize=(12, 6))
    plt.plot(updates_count.index, updates_count.values, label='Total Updates')
    plt.plot(updates_announcements.index, updates_announcements.values, label='Announcements')
    plt.plot(updates_withdrawals.index, updates_withdrawals.values, label='Withdrawals')
    plt.title('BGP Updates Over Time')
    plt.xlabel('Time')
    plt.ylabel('Number of Updates')
    plt.legend()
    plt.tight_layout()
    plt.show()

- Event Detection:
Detect route flapping (frequent changes in route announcements/withdrawals).
Identify abrupt changes in announcements and withdrawals.
Flag potential anomalies.
def detect_route_flapping(df, flapping_threshold=5, time_window='1H'):
    df_flap = df[['prefix', 'type', 'timestamp']].copy()
    df_flap['change'] = df_flap.groupby('prefix')['type'].shift() != df_flap['type']
    df_flap['change'] = df_flap['change'].astype(int)
    df_flap = df_flap[df_flap['change'] == 1]

    flapping_counts = df_flap.groupby('prefix').rolling(time_window, on='timestamp').count()['change']
    flapping_prefixes = flapping_counts[flapping_counts > flapping_threshold].reset_index()['prefix'].unique()

    print("Detected Route Flapping Prefixes:")
    for prefix in flapping_prefixes:
        print(f" - {prefix}")

def detect_abrupt_changes(df, interval='5T', std_multiplier=3):
    updates_count = df.resample(interval)['type'].count()
    rolling_mean = updates_count.rolling(window=3).mean()
    rolling_std = updates_count.rolling(window=3).std()

    anomalies = updates_count[(updates_count - rolling_mean) > std_multiplier * rolling_std]
    print("Abrupt Changes Detected at:")
    for time in anomalies.index:
        print(f" - {time}")

- Specific Queries for Deep Analysis:
Implement functions that allow users to ask specific questions and receive answers based on the data.
def top_prefixes_by_announcements(df, top_n=10, time_window='1H'):
    recent_time = df['timestamp'].max()
    start_time = recent_time - pd.Timedelta(time_window)
    df_recent = df[(df['timestamp'] >= start_time) & (df['type'] == 'A')]

    prefix_counts = df_recent['prefix'].value_counts().head(top_n)
    print(f"Top {top_n} Prefixes by Announcements in the Last {time_window}:")
    for prefix, count in prefix_counts.items():
        print(f" - {prefix}: {count} announcements")

- Putting It All Together:

Main function to run the analyses.
def main():
    csv_file_path = 'bgp_updates.csv'  # Replace with your CSV file path
    df = read_bgp_updates(csv_file_path)

    compute_summary_statistics(df)

    # Timeline analysis
    updates_count, updates_announcements, updates_withdrawals = updates_over_time(df, interval='5T')
    plot_updates_over_time(updates_count, updates_announcements, updates_withdrawals)

    # Event detection
    detect_route_flapping(df)
    detect_abrupt_changes(df)

    # Deep analysis queries
    top_prefixes_by_announcements(df)

