In [15]:
# ripe-atlas measurement-search --search us --type ping --af 4 --started-after 2023-05-01 --ids-only --limit 5200 > measurement_ids_only_US.csv

# ripe-atlas measurement-search --search us --type ping --af 4 --started-after 2023-03-01 --status stopped --limit 10000 --ids-only > measurement_ids_only_US.csv

In [13]:
ping_measurement_ids = []
with open('measurement_ids_only_US.csv') as f:
    for line in f:
        ping_measurement_ids.append(line.strip())
print(len(ping_measurement_ids))

10000


In [14]:
import csv, subprocess
from tqdm import tqdm
# dict of set of ips per probe id
ip_probes_mapping = {}
measurement_id_errors = 0
with open('ping_stats_only_US.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['measurement_id', 'src_ip', 'dst_ip', 'bytes', 'times'])

    for measurement_id in tqdm(ping_measurement_ids, desc="Processing items"):
        try:
            command = ['ripe-atlas', 'report', str(measurement_id)]
            process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
            output, _ = process.communicate()

            # Extract source ID, destination ID, and median ping times from output
            lines = output.strip().split('\n')
            line_processing_errors = 0

            for line in lines[1:]:
                try:
                    if 'via probe #' in line:
                        dest, src = line.split('via probe #')
                        src_prb_id, src_ip = src.split()[:2]
                        src_ip = src_ip.split('):')[0].strip('(')
                        bytes, dst_ip = dest.split('from')
                        dst_ip = dst_ip.strip()
                        times = line.split('times=')[1].split(', ')
                        times = [time for time in times if time != 'None ms']
                        if len(times) <= 1:
                            continue
                        times.sort() # ['0.717 ms', '0.752 ms', '0.773 ms']
                        # print(times)
                        median_ping = float(times[1].split()[0])  # median

                        # Write the data to the CSV file
                        writer.writerow([measurement_id, src_ip, dst_ip, bytes, median_ping])
                        if src_prb_id in ip_probes_mapping:
                            ip_probes_mapping[src_prb_id].add(src_ip)
                        else:
                            ip_probes_mapping[src_prb_id] = set([src_ip])
                except Exception as e:
                    print(f"Error processing line: {line} (Error: {e})")
                    line_processing_errors += 1
            if line_processing_errors > 0:
                print(f"Processed measurement ID {measurement_id} with {line_processing_errors} errors out of {len(lines)} lines")
            

        except Exception as e:
            print(f"Error processing measurement ID {measurement_id} (Error: {e})")
            measurement_id_errors += 1
    if measurement_id_errors > 0:
        print(f"Processed {len(ping_measurement_ids)} measurement IDs with {measurement_id_errors} measurement ID errors")

Processing items: 100%|██████████| 10000/10000 [3:06:02<00:00,  1.12s/it]  


In [16]:
import csv
# Save ip_probes_mapping to a CSV file
csv_file = 'ip_probes_only_US.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['IP Address', 'Probe ID'])
    for probe_id, ip_address in tqdm(ip_probes_mapping.items()):
        for ip in ip_address:
            writer.writerow([ip, probe_id])

print(f"IP to Probe ID mapping saved to {csv_file}")


100%|██████████| 8013/8013 [00:00<00:00, 178970.01it/s]

IP to Probe ID mapping saved to ip_probes_only_US.csv





----

------

In [17]:
import csv
MEASUREMENT_ID = 0
SRC_IP = 1
DST_IP = 2
BYTES = 3
TIMES = 4

matrix = {}

dst_probe_not_found_cnt = 0
dst_probe_mismatch_cnt = 0
# Read the CSV file
with open('ping_stats_only_US.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    # count = 0
    for row in tqdm(reader):
        try:
            # if count % 10000 == 0:
            #     print(f"Processed {count} rows")
            # count += 1
            measurement_id = row[MEASUREMENT_ID]
            src_ip = row[SRC_IP]
            dst_ip = row[DST_IP]
            latency = float(row[TIMES])
            bytes_transferred = float(row[BYTES].split()[0])

            # dst_probe = ip_probes_mapping.get(dst_ip, None)
            # if dst_probe is None:
            #     dst_probe_not_found_cnt += 1
            #     continue

            one_way_latency = latency / 2 # not normalizing for number of bytes and considering only one way
            # Add latency to the matrix
            if src_ip not in matrix:
                matrix[src_ip] = {}
            if dst_ip not in matrix[src_ip]:
                matrix[src_ip][dst_ip] = []
            matrix[src_ip][dst_ip].append(one_way_latency) 

            if dst_ip not in matrix:
                matrix[dst_ip] = {}
            if src_ip not in matrix[dst_ip]:
                matrix[dst_ip][src_ip] = []
            matrix[dst_ip][src_ip].append(one_way_latency)
        except Exception as e:
            print(f"Error processing row: {row} (Error: {e})")

print(f"dst_probe_not_found_cnt: {dst_probe_not_found_cnt}")


4212727it [00:07, 598230.83it/s]

dst_probe_not_found_cnt: 0





In [18]:
len(matrix.keys())

16756

In [20]:
import math
# Get a set of all unique probe IDs
all_probes = set(matrix.keys())
print(f"Number of probes: {len(all_probes)}")

probe_list = sorted(all_probes)

# Create the square matrix
matrix_size = len(probe_list)

square_matrix = [['-'] * (matrix_size + 1) for _ in range(matrix_size + 1)]
square_matrix[0][0] = 'Probe'

# Fill the square matrix with probe IDs
for i, probe_id in enumerate(probe_list):
    square_matrix[0][i+1] = probe_id
    square_matrix[i+1][0] = probe_id

for i, src_probe in tqdm(enumerate(probe_list)):
    for j, dst_probe in enumerate(probe_list):
        if src_probe == dst_probe:
            square_matrix[i+1][j+1] = 0
        elif src_probe in matrix and dst_probe in matrix[src_probe]:
            square_matrix[i+1][j+1] = sum(matrix[src_probe][dst_probe])/len(matrix[src_probe][dst_probe])
        else:
            square_matrix[i+1][j+1] = math.inf

output_file = 'ping_matrix_only_US.csv'
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(square_matrix)

print(f"The square matrix has been written to {output_file}")

Number of probes: 16756


16756it [00:56, 295.44it/s]


The square matrix has been written to ping_matrix_only_US.csv


In [38]:
probe_list = probe_list[1:] # to eliminate ''

In [39]:
len(probe_list)

16755

In [4]:
import csv
input_file = 'ping_matrix_only_US.csv'
square_matrix = []

with open(input_file, 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        square_matrix.append(row)

In [6]:
probe_list = square_matrix[0][1:]


In [7]:
len(probe_list)

16756

In [12]:
probe_list[1873:1875]

['142.251.167.128', '142.251.208.112']

In [20]:
outer_probe_list = probe_list[1873:5000]

In [18]:
from tqdm import tqdm
import math

In [21]:
with open('ping_analysis_result_1873_onwards.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    for i, src_probe in enumerate(tqdm(outer_probe_list)):
        for j, dst_probe in enumerate(probe_list):
            if i == j:
                continue
            for k, mid_probe in enumerate(probe_list):
                if k == i or k == j:
                    continue
                if square_matrix[i+1][k+1] == 'inf' or square_matrix[k+1][j+1] == 'inf':
                    continue
                src_mid_latency = square_matrix[i+1][k+1]
                mid_dst_latency = square_matrix[k+1][j+1]
                src_dst_latency = square_matrix[i+1][j+1]
                if src_mid_latency + mid_dst_latency < src_dst_latency:
                    # print(f"{src_probe}, {mid_probe}, {dst_probe}, {src_mid_latency}, {mid_dst_latency}, {src_dst_latency}")
                    writer.writerow([src_probe, mid_probe, dst_probe, src_mid_latency, mid_dst_latency, src_dst_latency])

 71%|███████   | 2223/3127 [21:05:07<8:37:58, 34.38s/it]   