In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import datetime
import tools

In [None]:
filename = 'experiments.log' # Normal local tests
#filename = 'experiments_E.log' # manual virtual wall tests
filename = 'experiments_0.log' # Virtual wall tests

interval = 25
batch_count = 12
experiment_batches = [f'experiments_{i * interval}.log' for i in range(batch_count)]
print(experiment_batches)

selected_batch = 1
filename = experiment_batches[selected_batch-1]
print(filename)


In [None]:
experiments = tools.get_all_experiments(filename)
# [experiment['directory'] for experiment in experiments]
#len(experiments)

In [None]:
# Search for the lowest experiment id, and remember the dir that belongs to it
lowest_id = 1000000000000
lowest_dir = 0
for experiment in experiments:
    if int(experiment['id']) < lowest_id:
        lowest_id = int(experiment['id'])
        lowest_dir = int(experiment['directory'])

# dir is an integer as a string, we want to filter out all the experiments that have a lower dir
experiments = [experiment for experiment in experiments if int(experiment['directory']) >= lowest_dir]

len(experiments)

#experiments[0]['dfs']['server_http']

In [None]:
def reset_id_index(experiments):
    if len(experiments) == 0:
        return
    # Search for the lowest id in the array: el['id]
    min_id = int(experiments[0]['id'])
    for experiment in experiments:
        current_id = int(experiment['id'])
        if current_id < min_id:
            min_id = current_id
    # Decrease the id of all the experiments by min_id
    for experiment in experiments:
        new_id = str(int(experiment['id']) - min_id)
        experiment['id'] = new_id
        experiment['info']['id'] = new_id

reset_id_index(experiments)

In [None]:
def take_window(df, column):
    # Remove all the rows with 0 values, this removes the first rows
    df = df.dropna().loc[(df!=0).any(axis=1)]
    if (len(df) == 0):
        return df
    # Take the difference between rows, we want to find the last part where data is not increasing
    df_diff = df.diff()
    # Remove all non increasing rows
    df_diff = df_diff.loc[(df_diff!=0).any(axis=1)]
    if (len(df) == 0):
        return df
    # Get the last index of df_diff
    last_index = df_diff.iloc[-1:].index[0]
    # remove all rows from df after last_index
    return df.loc[:last_index]

def get_latency(experiments, should_round=False):
    dfs_found = {}
    events = ['multicast_files_sent', 'segments_requested']
    output_events = ["server_multicast multicast_files_sent", "client_1_1 segments_requested"]
    for experiment in experiments:
        df = tools.get_metrics_by_events(
            experiment['dfs'],
            events,
            interpolate=False,
        )
        if df.size > 0:            
            has_all_columns = True
            for event in output_events:
                if event not in df.columns:
                    has_all_columns = False
                    break

            if not has_all_columns:
                continue

            # Replace Nan, with the previous non Nan value
            df = df.ffill()
            
            # Replace all NaN values with 0
            df = df.fillna(0)

            # Floor all values
            df = df.apply(np.floor)

            df['in_transmission'] = df[output_events[0]] - df[output_events[1]]

            intervals = []
            in_transmission = False
            start_time = None
            end_time = None
            for index, row in df.iterrows():
                if row['in_transmission'] > 0:
                    if not in_transmission:
                        start_time = index
                        in_transmission = True
                else:
                    if in_transmission:
                        end_time = index
                        in_transmission = False
                        intervals.append((start_time, end_time))

            interval_durations = [end_time - start_time for start_time, end_time in intervals]

            # Get the experiment duration from the info dict
            experiment_duration = int(experiment['info']['seg_dur'])
            # Experiment duration is in seconds, convert to datetime
            experiment_duration = datetime.timedelta(seconds=experiment_duration + 1) # Add 1 second to the experiment duration as a margin
            # Remove interval durations that are longer than the experiment duration
            interval_durations = [duration for duration in interval_durations if duration < experiment_duration]
            
            
            avg_interval_duration = (sum(interval_durations, datetime.timedelta(0)) / len(interval_durations)) if len(interval_durations) > 0 else 0

            result = {
                'id': experiment['id'],
                'event': 'total_latency',
                'avg': avg_interval_duration,
            }
            dfs_found[experiment['id']] = result

    return dfs_found


def get_values(experiments, event, should_round=False):
    dfs_found = {}
    for experiment in experiments:
        df = tools.get_metrics_by_events(
            experiment['dfs'],
            [event],
            interpolate=False,
        )
        if df.size > 0:
            df.columns = [experiment['id'] + ' ' + col for col in df.columns]
            df_window = take_window(df, event)
            if (len(df_window) == 0):
                continue
            # Get the first index of df_window
            first_index = df_window.index[0]
            # Get the last index of df_window
            last_index = df_window.iloc[-1:].index[0]
            # Get the first value
            first_value = df_window.iloc[0].item()
            # Get the last value
            last_value = df_window.iloc[-1].item()
            # Get the sum of all values
            sum_value = df_window.sum().item()
            # Get the max value
            max_value = df_window.max().item()
            # Get the difference between the first and last value
            value_diff = (df_window.iloc[-1] - df_window.iloc[0]).item()
            # Get the average value
            avg_value = sum_value / len(df_window)

            result = {
                'id': experiment['id'],
                'event': event,
                'first': round(first_value, 3) if should_round else first_value,
                'last': round(last_value, 3) if should_round else last_value,
                'sum': round(sum_value, 3) if should_round else sum_value,
                'max': round(max_value, 3) if should_round else max_value,
                'diff': round(value_diff, 3) if should_round else value_diff,
                'avg': round(avg_value, 3) if should_round else avg_value,
                'start_index': first_index,
                'end_index': last_index,
            }
            dfs_found[experiment['id']] = result

    return dfs_found


def get_MBps(experiments, event, start_end_times=None, time_window=None):
    dfs_found = {}
    for experiment in experiments:
        df = tools.get_metrics_by_events(
            experiment['dfs'],
            [event],
            interpolate=False,
        )
        if df.size > 0:
            df.columns = [experiment['id'] + ' ' + col for col in df.columns]
            df_window = take_window(df, event)
            if (len(df_window) == 0):
                continue

            start_time = df_window.index[0]
            end_time = df_window.index[-1]
            
            if start_end_times is not None:
                times = start_end_times.get(experiment['id'])
                if times is not None:
                    start_time = times['start_time']
                    end_time = times['end_time']
            

            # index is of type datetime, get the time difference between the first and last index
            time_diff = time_window if time_window is not None and time_window > 0 else (end_time - start_time).total_seconds()

            # Get the difference between the first and last value
            value_diff = (df_window.iloc[-1] - df_window.iloc[0]).item()
            # Get MBps
            MBps = value_diff / time_diff / 1000_000
            # round to 2 decimals
            MBps = round(MBps, 3)

            result = {
                'id': experiment['id'],
                'event': event,
                'time': time_diff,
                'bytes': value_diff,
                'Mbps': MBps * 8,
            }
            dfs_found[experiment['id']] = result

    return dfs_found


def convert_to_df(dictionaries, column_name, value_name='value'):
    df = pd.DataFrame(dictionaries).T
    if (len(df) == 0):
        return df
    df = df.pivot(index='id', columns='event', values=value_name).reset_index()
    df = df.rename_axis(None, axis=1)
    df = df.fillna(0)  # Replace NaN values with 0 if necessary
    df['id'] = df['id'].astype(int)
    df.set_index('id', inplace=True)  # Set 'id' as the index
    # Get the event name from dict
    event = list(dictionaries.values())[0]['event']

    df = df.rename(columns={event: column_name})
    return df.sort_index(ascending=True)

In [None]:
start_end_times_client = get_values(experiments, 'client_running')
start_end_times_mc = get_values(experiments, 'is_multicasting')
start_end_times = {}
greatest_window = 0
for time_client in start_end_times_client.values():
    start_time = time_client['start_index']
    end_time = time_client['end_index']

    #print(f"Experiment {time_client['id']}")

    #print("Client window: ", (end_time - start_time).total_seconds())

    for time_mc in start_end_times_mc.values():
        if time_mc['id'] == time_client['id']:
            # If the start time is smaller than the client start time, use the client start time
            if time_mc['start_index'] < time_client['start_index']:
                start_time = time_mc['start_index']

                #print(f"Multicast window: ", (time_mc['end_index'] - time_mc['start_index']).total_seconds())
            break

    window = (end_time - start_time).total_seconds()

    #print(f"Window with multicast: {window}")

    greatest_window = max(greatest_window, window)
    
    start_end_times[time_client['id']] = {
        'id': time_client['id'],
        'start_time': start_time,
        'end_time': end_time,
    }

print(f"Greatest window: {greatest_window}")

In [None]:
time_window = greatest_window
time_window = 96 # Total video length in seconds

In [None]:
dict_mc = get_MBps(experiments, 'total_bytes_mc_interface', time_window=time_window)
dict_uc = get_MBps(experiments, 'total_bytes_uc_interface', time_window=time_window)

In [None]:
df_mc = convert_to_df(dict_mc, 'MC Mbps', 'Mbps')
df_uc = convert_to_df(dict_uc, 'UC Mbps', 'Mbps')

merged_df = pd.concat([df_mc, df_uc], axis=1).sort_index(ascending=True)

if False:
    df_mc_bytes = convert_to_df(dict_mc, 'total MC [B]', 'bytes')
    df_uc_bytes = convert_to_df(dict_uc, 'total UC [B]', 'bytes')

    merged_df = pd.concat([merged_df, df_mc_bytes, df_uc_bytes], axis=1).sort_index(ascending=True)

if False:
    df_mc_time = convert_to_df(dict_mc, 'total MC [s]', 'time')
    df_uc_time = convert_to_df(dict_uc, 'total UC [s]', 'time')

    merged_df = pd.concat([merged_df, df_mc_time, df_uc_time], axis=1).sort_index(ascending=True)

if False:
    dict_mc_files = get_values(experiments, 'total_file_bytes_mc')
    dict_uc_files = get_values(experiments, 'total_file_bytes_uc')
    df_mc_files = convert_to_df(dict_mc_files, 'files MC [B]', 'last')
    df_uc_files = convert_to_df(dict_uc_files, 'files UC [B]', 'last')

    merged_df = pd.concat([merged_df, df_mc_files, df_uc_files], axis=1).sort_index(ascending=True)

if False:
    dict_uc_fdt = get_values(experiments, 'total_fdt_bytes_uc')
    dict_uc_partial = get_values(experiments, 'total_partial_bytes_uc')
    df_uc_fdt = convert_to_df(dict_uc_fdt, 'fdt UC [B]', 'last')
    df_uc_partial = convert_to_df(dict_uc_partial, 'partial UC [B]', 'last')

if False:
    dict_percentage_to_retrieve = get_values(experiments, 'alc_percentage_to_retrieve', should_round=True)
    # Go over the dict and multiply the values by 100
    for key, value in dict_percentage_to_retrieve.items():
        value['avg'] = value['avg'] * 100
    df_percentage_to_retrieve = convert_to_df(dict_percentage_to_retrieve, 'ALCs missing [%]', 'avg')

    merged_df = pd.concat([merged_df, df_percentage_to_retrieve], axis=1).sort_index(ascending=True)

if False:
    dict_files_sent = get_latency(experiments, should_round=True)
    df_files_sent = convert_to_df(dict_files_sent, 'Total latency', 'avg')
    print(df_files_sent)

    merged_df = pd.concat([merged_df, df_files_sent], axis=1).sort_index(ascending=True)

def get_loss_from_experiments(experiments, experiment_id):
    for experiment in experiments:
        if experiment['id'] == str(experiment_id):
            return experiment['info']['loss']
    return 0

def get_timestamp_from_experiments(experiments, experiment_id):
    for experiment in experiments:
        if experiment['id'] == str(experiment_id):
            return experiment['info']['dir']
    return 0

def get_clients_from_experiments(experiments, experiment_id):
    for experiment in experiments:
        if experiment['id'] == str(experiment_id):
            return experiment['info']['n_clients']
    return 0

def get_proxies_from_experiments(experiments, experiment_id):
    for experiment in experiments:
        if experiment['id'] == str(experiment_id):
            return experiment['info']['n_subnets']
    return 0

if len(merged_df) == 0:
    print("No data found")
else:
    odd_id_df = merged_df.loc[merged_df.index % 2 == 1]
    even_id_df = merged_df.loc[merged_df.index % 2 == 0]

    # Get the first row from even_id_df
    first_row = even_id_df.iloc[0]
    # Remove the first row from even_id_df
    # even_id_df = even_id_df.iloc[1:]
    # Add the row to the start of odd_id_df
    odd_id_df = pd.concat([pd.DataFrame([first_row]), odd_id_df])



    # Add column with losses to even_id_df
    losses = odd_id_df.index.map(lambda experiment_id: get_loss_from_experiments(experiments, experiment_id))
    timestamps = odd_id_df.index.map(lambda experiment_id: get_timestamp_from_experiments(experiments, experiment_id))
    clients = odd_id_df.index.map(lambda experiment_id: get_clients_from_experiments(experiments, experiment_id))
    proxies = odd_id_df.index.map(lambda experiment_id: get_proxies_from_experiments(experiments, experiment_id))

    # Rename the columns in the even_id_df to append 'FEC' to their names
    even_id_df.columns = [f'{col} (FEC)' for col in even_id_df.columns]
    # Decrease the index of even_id_df by 1
    even_id_df.index = even_id_df.index - 1
    # Set the row with index -1 to index 0
    even_id_df = even_id_df.rename(index={-1: 0})

    # Combine the two DataFrames side by side
    merged_df = pd.concat([odd_id_df, even_id_df], axis=1).sort_index(ascending=True)
    # Check if length of losses is the same, if not then add '?' untill it is equal length to losses
    while len(losses) < len(merged_df):
        losses = np.append(losses, '?')
    merged_df.insert(0, 'MC loss %', losses)
    while len(timestamps) < len(merged_df):
        timestamps = np.append(timestamps, '?')
    merged_df.insert(0, 'timestamp', timestamps)
    while len(clients) < len(merged_df):
        clients = np.append(clients, '?')
    merged_df.insert(0, 'CPP', clients)
    while len(proxies) < len(merged_df):
        proxies = np.append(proxies, '?')
    merged_df.insert(0, 'P', proxies)

    # All the experiments are multicast, except for the first one
    merged_df.insert(0, 'MC', 1)
    merged_df.iloc[0, 0] = 0
    # Set the value of 'MC loss %' to 100 for the first row
    merged_df.iloc[0, 4] = 100

In [None]:
merged_df

In [None]:
experiments[0]['info']

In [None]:
# Save merged_df to csv, ask for filename first
filename = input("Filename: ")
# If input was not cancelled, save the file
if filename != '':
    # Replace nan first with 0
    temp_df = merged_df.fillna(0)
    temp_df.to_csv(f'./{filename}.csv', index=False, header=True)

In [None]:
from cycler import cycler
from scipy import interpolate

def plot(data, y_axis='Mbps'):
    if data.empty:
        return

    fig, ax = plt.subplots(figsize=(30,7))
    start_time = float(data.index[0])
    end_time = float(data.index[-1]) if not isinstance(data.index[-1], str) else start_time + 1

    for column in data.columns:
        x_data = data.index.to_numpy()
        y_data = data[column].to_numpy()
        x_data = x_data.astype(float)
        y_data = y_data.astype(float)
        flinear = interpolate.interp1d(x_data, y_data)
        x_new = np.linspace(int(min(x_data)), int(max(x_data)), int(max(x_data))+1)
        y_new = flinear(x_new)
        
        latest_color = plt.gca()._get_lines.get_next_color()
        ax.plot(x_new, y_new, alpha=0.5, color=latest_color, label=column, linewidth=2.5)
        ax.scatter(x_data, y_data, marker='o', color=latest_color, linewidth=2.5)

    ax.set_xlim(start_time, end_time)  # Set the x-axis limits based on data range
    ax.set_title('Bandwidth')
    ax.set_xlabel('Loss')
    ax.set_ylabel(y_axis)
    ax.legend()
    ax.grid()

    plt.show()

if len(merged_df) > 0:
    df_copy = merged_df[['MC loss %', 'MC Mbps', 'UC Mbps','MC Mbps (FEC)','UC Mbps (FEC)']].copy().iloc[1:]
    df_copy.set_index('MC loss %', inplace=True)
    df_copy.columns = df_copy.columns.str.replace(r' Mbps', '')
    # replace nan with 0
    df_copy = df_copy.fillna(0)

    plot(df_copy)

In [None]:
df_copy['MC FEC overhead'] = ((df_copy['MC (FEC)'] - df_copy['MC']) / df_copy['MC']) * 100
df_copy['UC FEC overhead'] = ((df_copy['UC (FEC)'] - df_copy['UC']) / df_copy['UC']) * 100

plot(df_copy[['MC FEC overhead', 'UC FEC overhead']], y_axis='%')


In [None]:
df_copy[['MC FEC overhead', 'UC FEC overhead']]