In [82]:
%matplotlib notebook
import logging
import os
import math
import multiprocessing
import json
import re

from scipy.stats import t
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import natsort

In [83]:
use_markers = False
experiment_type = "cv2x"

use_line_types = False
image_format = "png"
figure_store = "../data/figures/"

overall_now="12:00:00"
confidence_intervals = False

## Runner for overall job

In [84]:
def generate_graphs(result_folders, now):

    print("Beginning graphing of result file: {}".format(result_folders))

    if not config["processed-result-dir"]:
        config["processed-result-dir"] = prepare_results(result_folders, now)

    for graph_title in results_["graph-configurations"]:
        print("Graphing configuration: {}".format(graph_title))
        folders_for_comparison = []
        configurations = []
        for configuration in results_["graph-configurations"][graph_title]:
            for folder in config["processed-result-dir"]:
                config_name = folder.split("/")[-1][:-20]
                if configuration == config_name:
                    folders_for_comparison.append(folder)
                    configurations.append(configuration)

        for graph in results_["graphs"]:
            if graph in ["PDR-SCI", "PDR-TB", "IPG"]:
                distance_graph(folders_for_comparison, graph, graph_title, configurations, now)
            elif graph == "CBR":
                cbr_graph(folders_for_comparison, graph, graph_title, configurations, now)

## Results Preparation stage

In [85]:
def prepare_results(result_folders, now):

    num_processes = config["parallel_processes"]
    if num_processes > multiprocessing.cpu_count():
        print("Too many processes, going to revert to total - 1")
        num_processes = multiprocessing.cpu_count() - 1

    processed_results = []
    for folder in result_folders:
        config_name = folder.split("/")[-1][:-20]
        print("Results for config: {}".format(config_name))
        folder_results = []
        files = natsort.natsorted(os.listdir(folder))

        filtered_files = []
        for i in range(len(files)):
            # Ensures we don't load files passed by accident
            if ".csv" in files[i]:
                filtered_files.append("{}/{}".format(folder, files[i]))

        i = 0
        while i < len(filtered_files):
            if len(filtered_files) < num_processes:
                num_processes = len(filtered_files)
            pool = multiprocessing.Pool(processes=num_processes)

            folder_results.append(pool.starmap(generate_results, zip(filtered_files[i: i + num_processes])))

            pool.close()
            pool.join()

            i += num_processes

        folder_results = [y for x in folder_results for y in x]
        # Go through each of the available stats and write them out to a csv file.
        output_csv_dir = "/Users/brianmccarthy/git_repos/results-analysis/data/processed_data/{}/{}-{}".format(
            os.getcwd(), experiment_type,config_name, now)
        
        return folder_results

        os.makedirs(output_csv_dir, exist_ok=True)

        # Shortcut ensures we get the stats from the parsed results
        for stat in folder_results[0]:
            if stat == "CBR":
                across_run_results_cbr(folder_results, output_csv_dir)
            else:
                across_run_results(folder_results, stat, output_csv_dir, "Distance")

        processed_results.append(output_csv_dir)

    print("Folders processed: {}".format(processed_results))
    return processed_results

In [164]:
def generate_results(output_csv):

    print("Generating results for file: {}".format(output_csv))

    results = {}

    pdr_sci_agg = pd.DataFrame()
    pdr_tb_agg = pd.DataFrame()
    # pdr_tb_ignore_sci_agg = pd.DataFrame()
    ipg_agg = pd.DataFrame()
    cbr_agg = pd.DataFrame()
    unsensed_errors = pd.DataFrame()
    hd_errors = pd.DataFrame()
    prop_errors = pd.DataFrame()
    interference_errors = pd.DataFrame()

    error_dfs = {}
    # Need a new for loop through all the errors and adding them as a stat distance
    for error in results_["errors"]:
        error_dfs[error] = pd.DataFrame()
    
    for chunk in pd.read_csv(output_csv, chunksize=10 ** 6):
        
        # CBR calculation doesn't aggregate the same way as the above so dealt with separately
        cbr_df = chunk[["Time", "cbr"]]
        cbr_df = cbr_df[cbr_df["cbr"] > -1]

        if cbr_agg.empty:
            cbr_agg = cbr_df
        else:
            cbr_agg = cbr_agg.append(cbr_df)
            

        # SCI PDR calculation
        pdr_sci_agg = stat_distance(pdr_sci_agg, chunk, "sciDecoded", "txRxDistanceSCI", True)

        # TB PDR calculation
        pdr_tb_agg = stat_distance(pdr_tb_agg, chunk, "tbDecoded", "txRxDistanceTB", True)

        # pdr_tb_ignore_sci_agg = self.stat_distance(pdr_tb_agg, chunk, "tbDecodedIgnoreSCI", "txRxDistanceTB", True)

        # IPG calculation
        ipg_agg = stat_distance(ipg_agg, chunk, "interPacketDelay", "txRxDistanceTB", False)

        chunk = chunk[chunk["tbReceived"] != -1]
        for error in error_dfs:
            if "sci" in error[0:3]:
                error_dfs[error] = stat_distance(error_dfs[error], chunk, error, "txRxDistanceSCI", True)
            else:
                error_dfs[error] = stat_distance(error_dfs[error], chunk, error, "txRxDistanceTB", True)

    results["PDR-SCI"] = pdr_sci_agg
    results["PDR-TB"] = pdr_tb_agg
    # results["PDR-IGNORE-SCI"] = pdr_tb_ignore_sci_agg
    results["IPG"] = ipg_agg
    results["CBR"] = cbr_agg

    for key, df in zip(["unsensed_errors", "hd_errors", "prop_errors", "interference_errors"],
                       [unsensed_errors, hd_errors, prop_errors, interference_errors]):
        for error in results_[key]:
            if df.empty:
                df = error_dfs[error]
            else:
                # Combine mean errors
                df["mean"] = df["mean"] + error_dfs[error]["mean"]

        results[key] = df

    return results

In [151]:
def stat_distance(agg_df, df, stat, distance, percentage):

    # Reduce the size of the DF to what we're interested in.
    distance_df = df[df[stat].notnull()]
    distance_df = distance_df[(distance_df["posX"] > 0) & (distance_df["posX"] < 2000)]
    distance_df = distance_df[["Time", "NodeID", stat, distance]]
    distance_df = distance_df[distance_df[stat] > -1]
    distance_df = distance_df.rename(columns={"Time": "Time", "NodeID": "NodeID", stat: stat, distance: "Distance"})

    # Only interested in max 500m simply as it's not all that relevant to go further.
    # Note that going to the max distance of the file can cause issues with how they are parsed.
    max_distance = min(530, distance_df["Distance"].max())

    # Get the mean, std, count for each distance
    distance_df = distance_df.groupby(
        pd.cut(distance_df["Distance"], np.arange(0, max_distance, 10))).agg(
        {stat: [np.mean, "count"]})

    # Remove over head column
    distance_df.columns = distance_df.columns.droplevel()

    if percentage:
        distance_df = distance_df.apply(lambda x: x * 100, axis=1)

    if agg_df.empty:
        agg_df = distance_df
    else:
        # combine_chunks
        agg_df = pd.merge(agg_df, distance_df, on="Distance", how='outer')
        agg_df = agg_df.apply(combine_line, axis=1, result_type='expand')
        agg_df = agg_df.rename({0: "mean", 1: "count"}, axis='columns')

    return agg_df

In [152]:
def combine_line(line):
    mean_a = line["mean_x"]
    count_a = line["count_x"]

    mean_b = line["mean_y"]
    count_b = line["count_y"]

    if np.isnan(mean_a) and np.isnan(mean_b):
        return [mean_a, count_a]
    elif np.isnan(mean_a) and not np.isnan(mean_b):
        return [mean_b, count_b]
    elif np.isnan(mean_b) and not np.isnan(mean_a):
        return [mean_a, count_a]
    else:
        ex_a = mean_a * count_a
        ex_b = mean_b * count_b

        tx = ex_a + ex_b
        tn = count_a + count_b

        overall_mean = tx / tn
        overall_count = tn

        return [overall_mean, overall_count]

In [153]:
def across_run_results(results, stat, output_csv_dir, merge_col):
    
    df = pd.DataFrame()
    print("Statistic of interest: {}".format(stat))
    for i in range(len(results)):
        if df.empty:
            df = results[i][stat]
        else:
            df = pd.merge(df, results[i][stat], how='outer', on=merge_col,
                          suffixes=(i, i + 1),
                          copy=True, indicator=False)

    mean_cols = df.filter(regex='mean').columns

    n = len(mean_cols) - 1
    t_value = t.ppf(p, n)

    df = df.apply(combine_runs, axis=1, result_type='expand', args=(mean_cols, t_value,))
    df = df.rename({0: "Mean", 1: "Confidence-Interval"}, axis='columns')
    df.to_csv("{}/{}.csv".format(output_csv_dir, stat))


In [154]:
def across_run_results_cbr(results, output_csv_dir):
    earliest_time = float("inf")
    latest_time = -float("inf")

    raw_cbr_df = pd.DataFrame()
    for folder in results:

        start_time = folder["CBR"]["Time"].min()
        if start_time < earliest_time:
            earliest_time = start_time

        end_time = folder["CBR"]["Time"].max()
        if end_time > latest_time:
            latest_time = end_time

        if raw_cbr_df.empty:
            raw_cbr_df = folder["CBR"]
        else:
            raw_cbr_df.append(folder["CBR"])

    print("Earliest time: {}s Latest time: {}s".format(earliest_time, latest_time))

    cbr_df = pd.DataFrame(columns=["Mean", "Time", "Confidence-Interval"])
    last_time = earliest_time
    for i in np.arange(earliest_time, latest_time, 0.1):
        subsection_df = pd.DataFrame()
        for folder in results:
            df = folder["CBR"]
            if subsection_df.empty:
                subsection_df = df[(df["Time"] < i) & (df["Time"] >= last_time) & (df["cbr"].notnull())]
            else:
                subsection_df.append(df[(df["Time"] < i) & (df["Time"] >= last_time) & (df["cbr"].notnull())])

        last_time = i

        cbr_df = cbr_df.append({"Mean": subsection_df["cbr"].mean(),
                                "Time": (i + last_time) / 2,
                                "Confidence-Interval": subsection_df["cbr"].std()
                                }, ignore_index=True)

    cbr_df.to_csv("{}/CBR.csv".format(output_csv_dir), index=False)
    raw_cbr_df.to_csv("{}/raw-CBR.csv".format(output_csv_dir), index=False)

In [155]:
def combine_runs(line, mean_cols, t_value):
    means = []
    for mean in mean_cols:
        means.append(line[mean])

    n = len(means)

    # Average Across runs
    xBar = sum(means) / n

    # Deviation between runs and average
    deviation = []
    for mean in means:
        deviation.append((mean - xBar) ** 2)
    s2 = sum(deviation) / (n - 1)

    # Confidence interval
    ci = t_value * math.sqrt(s2 / n)

    return [xBar, ci]

### Graphing stage

In [156]:
def distance_graph(folders, graph_type, graph_title, graph_info, now):
    means = []
    cis = []
    distances = []
    for folder in folders:
        df = pd.read_csv("{}/{}.csv".format(folder, graph_type))
        means.append(list(df["Mean"]))
        if confidence_intervals:
            cis.append(list(df["Confidence-Interval"]))
        distances = (list(range(0, df.shape[0] * 10, 10)))
        
    graph_info["means"] = means
    graph_info["cis"] = cis

    if graph_type in ["PDR-SCI", "PDR-TB"]:
        dist_graph(distances, graph_info, "{}-{}".format(graph_title, graph_type),
                   ylabel="Packet Delivery Rate %", now=now, confidence_intervals=cis, show=False, store=True)

In [157]:
def cbr_graph(folders, graph_type, graph_title, graph_info, now):
    # Might change this to time based graph but CBR is fine for now
    times = []
    cbrs = []
    cis = []
    for folder in folders:
        cbr = []
        time = []
        ci = []
        df = pd.read_csv("{}/CBR.csv".format(folder))
        times.append(list(df["Time"]))
        cbrs.append(list(df["cbr"]))
        if confidence_intervals:
            cis.append(list(df["Confidence-Interval"]))
        

    graph_info["means"] = cbrs
    graph_info["times"] = times
    graph_info["cis"] = cis

    cbr_plot(graph_info, "{}-{}".format(graph_title, graph_type), now=now,
             confidence_intervals=confidence_intervals, show=True, store=False)

In [158]:
def dist_graph(distances, graph_info, plot_name, ylabel, now, legend_pos="lower left",
               confidence_intervals=None, show=True, store=False):
    fig, ax = plt.subplots()

    for i in range(len(graph_info["config_name"])):
        if confidence_intervals:
            ax.errorbar(distances, means[i], yerr=confidence_intervals[i], label=labels[i])
        else:
            ax.plot(distances, graph_info["means"][i], label=graph_info["labels"][i],
                    fillstyle="none", marker=graph_info["markers"][i], markevery=5,
                    color=graph_info["colors"][i], linestyle=graph_info["linestyles"][i])

    ax.set(xlabel='Distance (m)', ylabel=ylabel)
    ax.legend(loc=legend_pos)
    ax.tick_params(direction='in')
    
    ax.set_xlim([0, 500])
    ax.set_ylim([0, 100])
    plt.xticks(np.arange(0, (max(distances) + 1), step=50))
    plt.yticks(np.arange(0, 101, step=10))
    plt.grid(b=True, alpha=0.5)

    if show:
        fig.show()
        
    print("{}/{}-{}".format(figure_store, plot_name, now))

    if store:
        fig.savefig("{}/{}-{}.png".format(figure_store, plot_name, now), dpi=300)
    plt.close(fig)

In [159]:
def cbr_plot(graph_info, plot_name, now, confidence_intervals=None, show=True, store=False):

    fig, ax = plt.subplots()

    for i in range(len(graph_info["config_name"])):
        if confidence_intervals:
            ax.errorbar(graph_info["times"][i], graph_info["means"][i], yerr=confidence_intervals[i],
                        label=graph_info["labels"][i],
                        fillstyle="none", color=graph_info["colors"][i], linestyle=graph_info["linestyles"][i])
        else:
            ax.plot(graph_info["times"][i], graph_info["means"][i], label=graph_info["labels"][i],
                    marker=graph_info["markers"][i], markevery=5, fillstyle="none",
                    color=graph_info["colors"][i], linestyle=graph_info["linestyles"][i])

    ax.legend(loc='upper left')
    ax.set(xlabel='Time (s)', ylabel='Channel Busy Ratio %')
    ax.tick_params(direction='in')

    ax.set_ylim([0, 100])
    plt.yticks(np.arange(0, 101, step=10))
    plt.grid(b=True, alpha=0.5)

    if show:
        fig.show()

    if store:
        fig.savefig("{}/{}-{}.png".format(figure_store, plot_name, now), dpi=400)

In [160]:
def errors_dist(distances, decoded, decoded_labels, errors, error_labels, plot_name):
    # TODO: Update to allow such graphing to be automatically configured.

    fig, ax = plt.subplots()

    if use_markers:
        for i in range(len(decoded)):
            ax.plot(distances, decoded[i], label=decoded_labels[i], marker=markers[i], markevery=3)

            for j in range(len(errors[i])):
                ax.plot(distances, errors[i][j], label=error_labels[i][j], marker=markers[i + j])

    elif use_line_types:
        for i in range(len(decoded)):
            ax.plot(distances, decoded[i], label=decoded_labels[i])

            for j in range(len(errors[i])):
                ax.plot(distances, errors[i][j], label=error_labels[i][j])

    else:
        for i in range(len(decoded)):
            ax.plot(distances, decoded[i], label=decoded_labels[i])

            for j in range(len(errors[i])):
                ax.plot(distances, errors[i][j], label=error_labels[i][j])

    ax.legend(loc='center left')

    ax.set(xlabel='Distance (m)', ylabel='Packet Delivery Rate (PDR) %')
    ax.grid()

    ax.set_ylim([0, 1])
    plt.yticks(np.arange(0, 1.1, step=.1))

    ax.set_xlim([0, (max(distances) + 1)])
    plt.xticks(np.arange(0, (max(distances) + 1), step=50))

    fig.savefig("{}/{}-{}.png".format(figure_store, plot_name, now), dpi=300)
    plt.close(fig)

In [161]:
config_file = "/Users/brianmccarthy/git_repos/results-analysis/configs/cv2x.json"
with open(config_file) as config_json:
    config = json.load(config_json)[experiment_type]
results_ = config["results"]
p = results_["confidence-interval"]
now="00_00_00"

In [162]:
folder_results = prepare_results(["/Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07"], now)

Results for config: Fast
Generating results for file: /Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07/run-2.csv
Generating results for file: /Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07/run-1.csv
Generating results for file: /Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07/run-4.csv
Generating results for file: /Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07/run-3.csv
Generating results for file: /Users/brianmccarthy/git_repos/results-analysis/data/parsed_data/cv2x/Fast-2020-05-28-23_03_07/run-5.csv
Min Time: 500.004s Max Time: 507.588s in this chunk
Min Time: 500.005s Max Time: 507.628s in this chunk
Min Time: 500.004s Max Time: 507.557s in this chunk
Min Time: 500.006s Max Time: 507.585s in this chunk
Min Time: 500.001s Max Time: 507.524s in this chunk
Min Time: 507.59s Max Time: 511.999s in this 

In [163]:
print(folder_results[0]["CBR"].min())
print(folder_results[0]["CBR"].max())

Time    500.006
cbr       0.000
dtype: float64
Time    507.585000
cbr       0.537415
dtype: float64
