In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime
import tools
import re

In [None]:
directory = tools.get_latest_dir()

In [None]:
dfs = tools.get_all_data(directory, resample=False, align=False, interpolate=True)
start, end = tools.get_start_and_end(dfs)
events = tools.get_all_events(dfs)
info = tools.read_info(directory)
info

In [None]:
missing_files_df = tools.get_metrics_by_events(dfs, ['multicast_files_sent', 'multicast_files_received'])
missing_files_df = missing_files_df.fillna(method='ffill').fillna(0)
missing_files_df = missing_files_df.apply(np.floor)
# Iterate over the columns and create missing_files columns for each proxy
multicast_files_received_pattern = re.compile(r'^proxy_multicast_\d+ multicast_files_received$')
for column in missing_files_df.columns:
    if multicast_files_received_pattern.match(column):
        proxy_id = column.split(' ')[0].split('_')[-1]
        missing_files_column = f'proxy_{proxy_id} missing_files'
        missing_files_df[missing_files_column] = missing_files_df['server_multicast multicast_files_sent'] - missing_files_df[column]
tools.plot_data(missing_files_df, title='Missing files', start_time=start, end_time=end, figsize=(30,7))

In [None]:
emissions_df = tools.get_metrics_by_events(dfs, ['emit_missing_symbols'])
emissions_df = emissions_df.fillna(method='ffill').fillna(0)
emissions_df = emissions_df.apply(np.floor)

emist_missing_symbols_pattern = re.compile(r'^proxy_multicast_\d+ emit_missing_symbols$')
for column in emissions_df.columns:
    if emist_missing_symbols_pattern.match(column):
        emissions_df[column + ' diff'] = emissions_df[column].diff()
emissions_df = emissions_df.fillna(method='ffill').fillna(0)

tools.plot_data(emissions_df, title='Missing files emissions', start_time=start, end_time=end, figsize=(30,7))

In [None]:
diff_pattern = re.compile(r'^proxy_multicast_\d+ emit_missing_symbols diff$')
emissions_diff_columns = [column for column in emissions_df.columns if diff_pattern.match(column)]
recovery_latency_df = emissions_df[emissions_diff_columns]

# Iterate over the columns and create missing_files columns for each proxy
missing_files_pattern = re.compile(r'^proxy_\d+ missing_files$')
missing_files_columns = [column for column in missing_files_df.columns if missing_files_pattern.match(column)]
recovery_latency_df = recovery_latency_df.join(missing_files_df[missing_files_columns])

recovery_latency_df = recovery_latency_df.sort_values(by='timestamp')
recovery_latency_df = recovery_latency_df.apply(pd.to_numeric, errors='ignore')
recovery_latency_df.interpolate(inplace=True)
recovery_latency_df = recovery_latency_df.fillna(method='ffill').fillna(0)
recovery_latency_df = recovery_latency_df.apply(np.floor)

tools.plot_data(recovery_latency_df, title='Recovery latency', start_time=start, end_time=end, figsize=(30,7))

Note: time between an increase in emit_missing_symbols and a decrease of missing_files increases with the latency. This is logical as increased latency not only affects slow multicast but also slow recovery of missing symbols over http.

In [None]:
fetcher_latency = tools.get_metrics_by_events(dfs, ['fetcher_latency']).div(1000000000).round(3)
fetcher_latency = fetcher_latency[fetcher_latency != 0]
tools.plot_data(fetcher_latency, title='Recovery latency in seconds', start_time=start, end_time=end, figsize=(30,7))
means = fetcher_latency.mean().round(3)
print(means)
print(f'Average Recovery latency of all proxies: {means.mean().round(3)}')

In [None]:
fetcher_latency = tools.get_metrics_by_events(dfs, ['partial_processing_duration']).div(1000000).round(3)
tools.plot_data(fetcher_latency, title='Processing time of http server for partial requests in seconds', start_time=start, end_time=end, figsize=(30,7))
fetcher_latency.mean().round(3)