In [1]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from matplotlib.ticker import FormatStrFormatter

from scipy.stats import t
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import natsort

import os
import math
import multiprocessing
import json

In [2]:
def generate_graphs(result_folders, now):

    print("Beginning graphing of result files: {}".format(result_folders))

    if not config["processed-result-dir"]:
        config["processed-result-dir"] = prepare_results(result_folders, now)

    for graph_title in __results["graph-configurations"]:
        print("Graphing configuration: {}".format(graph_title))
        folders_for_comparison = []
        configurations = []
        for configuration in __results["graph-configurations"][graph_title]:
            for folder in config["processed-result-dir"]:
                config_name = folder.split("/")[-1][:-20]
                if configuration == config_name:
                    folders_for_comparison.append(folder)
                    configurations.append(configuration)
                    
        print("Now going to the graphing stage")

        for graph in __results["graphs"]:
            if graph in ["PDR-SCI", "PDR-TB", "IPG"]:
                distance_graph(folders_for_comparison, graph, graph_title, configurations, now)
            elif graph == "CBR":
                cbr_graph(folders_for_comparison, graph, graph_title, configurations, now)
            elif graph == "Errors":
                errors_dist(folders_for_comparison, graph, graph_title, configurations, now)

In [3]:
def prepare_results(result_folders, now):

    num_processes = config["parallel_processes"]
    if num_processes > multiprocessing.cpu_count():
        print("Too many processes, going to revert to total - 1")
        num_processes = multiprocessing.cpu_count() - 1

    processed_results = []
    for folder in result_folders:
        config_name = folder.split("/")[-1][:-20]
        print("Results for config: {}".format(config_name))
        folder_results = []
        files = natsort.natsorted(os.listdir(folder))

        filtered_files = []
        for i in range(len(files)):
            # Ensures we don't load files passed by accident
            if ".csv" in files[i]:
                filtered_files.append("{}/{}".format(folder, files[i]))

        i = 0
        while i < len(filtered_files):
            if len(filtered_files) < num_processes:
                num_processes = len(filtered_files)
            pool = multiprocessing.Pool(processes=num_processes)

            folder_results.append(pool.starmap(generate_results, zip(filtered_files[i: i + num_processes])))

            pool.close()
            pool.join()

            i += num_processes

        folder_results = [y for x in folder_results for y in x]
        # Go through each of the available stats and write them out to a csv file.
        output_csv_dir = "{}/data/processed_data/{}/{}-{}".format("/hdd/results-analysis", experiment_type,
                                                                  config_name, now)

        os.makedirs(output_csv_dir, exist_ok=True)

        # Shortcut ensures we get the stats from the parsed results
        for stat in folder_results[0]:
            if stat == "CBR":
                across_run_results(folder_results, stat, output_csv_dir, "Time")
            else:
                across_run_results(folder_results, stat, output_csv_dir, "Distance")

        processed_results.append(output_csv_dir)

    print("Folders processed: {}".format(processed_results))
    return processed_results

In [4]:
def generate_results(output_csv):

    print("Generating results for file: {}".format(output_csv))

    results = {}

    pdr_sci_agg = pd.DataFrame()
    pdr_tb_agg = pd.DataFrame()
    ipg_agg = pd.DataFrame()
    cbr_agg = pd.DataFrame()
    unsensed_errors = pd.DataFrame()
    hd_errors = pd.DataFrame()
    prop_errors = pd.DataFrame()
    interference_errors = pd.DataFrame()
    
    error_dfs = {}
    # Need a new for loop through all the errors and adding them as a stat distance
    for error in __results["errors"]:
        error_dfs[error] = pd.DataFrame()

    for chunk in pd.read_csv(output_csv, chunksize=10 ** 6):

        # SCI PDR calculation
        pdr_sci_agg = stat_distance(pdr_sci_agg, chunk, "sciDecoded", "txRxDistanceSCI", True)

        # TB PDR calculation
        pdr_tb_agg = stat_distance(pdr_tb_agg, chunk, "tbDecoded", "txRxDistanceTB", True)

        # IPG calculation
        ipg_agg = stat_distance(ipg_agg, chunk, "interPacketDelay", "txRxDistanceTB", False)

        # CBR calculation doesn't aggregate the same way as the above so dealt with separately
        cbr_df = chunk[chunk["cbr"].notnull()]
        cbr_df = cbr_df[["Time", "cbr"]]
        cbr_df = cbr_df.groupby("Time").agg({"cbr": [np.mean, np.std, "count"]})
        cbr_df.columns = cbr_df.columns.droplevel()
        cbr_df = cbr_df.apply(lambda x: x * 100, axis=1)

        if cbr_agg.empty:
            cbr_agg = cbr_df
        else:
            # combine_chunks
            cbr_agg = cbr_agg.append(cbr_df)
            
        chunk = chunk[chunk["tbReceived"] != -1]
        for error in error_dfs:
            if "sci" in error[0:3]:
                error_dfs[error] = stat_distance(error_dfs[error], chunk, error, "txRxDistanceSCI", True)
            else:
                error_dfs[error] = stat_distance(error_dfs[error], chunk, error, "txRxDistanceTB", True)

    results["PDR-SCI"] = pdr_sci_agg
    results["PDR-TB"] = pdr_tb_agg
    results["IPG"] = ipg_agg
    results["CBR"] = cbr_agg
    
    
    for error in __results["unsensed_errors"]:
        if unsensed_errors.empty:
            unsensed_errors = error_dfs[error]
        else:
            # combine_chunks
            unsensed_errors = pd.merge(unsensed_errors, error_dfs[error], on="Distance", how='outer')
            unsensed_errors = unsensed_errors.apply(combine_line, axis=1, result_type='expand')
            unsensed_errors = unsensed_errors.rename({0: "mean", 1: "count"}, axis='columns')
            
    for error in __results["hd_errors"]:
        if hd_errors.empty:
            hd_errors = error_dfs[error]
        else:
            # combine_chunks
            hd_errors = pd.merge(hd_errors, error_dfs[error], on="Distance", how='outer')
            hd_errors = hd_errors.apply(combine_line, axis=1, result_type='expand')
            hd_errors = hd_errors.rename({0: "mean", 1: "count"}, axis='columns')
            
    for error in __results["prop_errors"]:
        if prop_errors.empty:
            prop_errors = error_dfs[error]
        else:
            # combine_chunks
            prop_errors = pd.merge(prop_errors, error_dfs[error], on="Distance", how='outer')
            prop_errors = prop_errors.apply(combine_line, axis=1, result_type='expand')
            prop_errors = prop_errors.rename({0: "mean", 1: "count"}, axis='columns')
        
    for error in __results["interference_errors"]:
        if interference_errors.empty:
            interference_errors = error_dfs[error]
        else:
            # combine_chunks
            interference_errors = pd.merge(interference_errors, error_dfs[error], on="Distance", how='outer')
            interference_errors = interference_errors.apply(combine_line, axis=1, result_type='expand')
            interference_errors = interference_errors.rename({0: "mean", 1: "count"}, axis='columns')
    
    results["unsensed_errors"] = unsensed_errors
    results["hd_errors"] = hd_errors
    results["prop_errors"] = prop_errors
    results["interference_errors"] = interference_errors

    return results

In [34]:
def stat_distance(agg_df, df, stat, distance, percentage):

    # Reduce the size of the DF to what we're interested in.
    distance_df = df[df[stat].notnull()]
    distance_df = df[(df["posX"] > 1500) & (df["posX"] < 3500)]
    distance_df = distance_df[["Time", "NodeID", stat, distance]]
    distance_df = distance_df[distance_df[stat] > -1]
    distance_df = distance_df.rename(columns={"Time": "Time", "NodeID": "NodeID", stat: stat, distance: "Distance"})

    # Only interested in max 500m simply as it's not all that relevant to go further.
    # Note that going to the max distance of the file can cause issues with how they are parsed.
    max_distance = min(525, distance_df["Distance"].max())

    # Get the mean, std, count for each distance
    distance_df = distance_df.groupby(
        pd.cut(distance_df["Distance"], np.arange(0, max_distance, 25))).agg(
        {stat: [np.mean, "count"]})

    # Remove over head column
    distance_df.columns = distance_df.columns.droplevel()

    if percentage:
        distance_df = distance_df.apply(lambda x: x * 100, axis=1)

    if agg_df.empty:
        agg_df = distance_df
    else:
        # combine_chunks
        agg_df = pd.merge(agg_df, distance_df, on="Distance", how='outer')
        agg_df = agg_df.apply(combine_line, axis=1, result_type='expand')
        agg_df = agg_df.rename({0: "mean", 1: "count"}, axis='columns')

    return agg_df

In [15]:
def combine_line(line):
    mean_a = line["mean_x"]
    count_a = line["count_x"]

    mean_b = line["mean_y"]
    count_b = line["count_y"]

    if np.isnan(mean_a) and np.isnan(mean_b):
        return [mean_a, count_a]
    elif np.isnan(mean_a) and not np.isnan(mean_b):
        return [mean_b, count_b]
    elif np.isnan(mean_b) and not np.isnan(mean_a):
        return [mean_a, count_a]
    else:
        ex_a = mean_a * count_a
        ex_b = mean_b * count_b

        tx = ex_a + ex_b
        tn = count_a + count_b

        overall_mean = tx / tn
        overall_count = tn

        return [overall_mean, overall_count]

In [16]:
def across_run_results(results, stat, output_csv_dir, merge_col):

    df = pd.DataFrame()
    print("Statistic of interest: {}".format(stat))
    for i in range(len(results)):
        if df.empty:
            df = results[i][stat]
        else:
            df = pd.merge(df, results[i][stat], how='outer', on=merge_col,
                          suffixes=(i, i + 1),
                          copy=True, indicator=False)

    mean_cols = df.filter(regex='mean').columns

    n = len(mean_cols) - 1
    t_value = t.ppf(p, n)

    df = df.apply(combine_runs, axis=1, result_type='expand', args=(mean_cols, t_value,))
    df = df.rename({0: "Mean", 1: "Confidence-Interval"}, axis='columns')
    df.to_csv("{}/{}.csv".format(output_csv_dir, stat))

In [17]:
def combine_runs(line, mean_cols, t_value):
    means = []
    for mean in mean_cols:
        means.append(line[mean])

    n = len(means)

    # Average Across runs
    xBar = sum(means) / n

    # Deviation between runs and average
    deviation = []
    for mean in means:
        deviation.append((mean - xBar) ** 2)
    s2 = sum(deviation) / (n - 1)

    # Confidence interval
    ci = t_value * math.sqrt(s2 / n)

    return [xBar, ci]

In [18]:
### Graphing utilities

def distance_graph(folders, graph, comparison, configurations, now):
    means = []
    cis = []
    distances = []
    for folder, config in zip(folders, configurations):
        df = pd.read_csv("{}/{}.csv".format(folder, graph))
        means.append(list(df["Mean"]))
        if confidence_intervals:
            cis.append(list(df["Confidence-Interval"]))
        distances = (list(range(0, df.shape[0] * 25, 25)))

    if graph in ["PDR-SCI", "PDR-TB"]:
        dist_graph(means, distances, configurations,
                    "{}-{}".format(comparison, graph), ylabel="Packet Delivery Rate %", now=now,
                    confidence_intervals=cis, show=False, store=True, percentage=True)
    elif graph == "IPG":
        dist_graph(means, distances, configurations,
                    "{}-{}".format(comparison, graph), ylabel="Inter-Packet Gap (ms)", now=now,
                    legend_pos="upper left", confidence_intervals=cis, show=False, store=True)

def cbr_graph(folders, graph, comparison, configurations, now):
    # Might change this to time based graph but CBR is fine for now
    times = []
    cbr = []
    cis = []
    for folder, config in zip(folders, configurations):
        df = pd.read_csv("{}/CBR.csv".format(folder))
        times.append(list(df["Time"]))
        cbr.append(list(df["Mean"]))
        if confidence_intervals:
            cis.append(list(df["Confidence-Interval"]))

    cbr_plot(cbr, times, "{}-{}".format(comparison, graph), configurations, now=now,
             confidence_intervals=cis, show=False, store=True)

def dist_graph(means, distances, labels, plot_name, ylabel, now, legend_pos="lower left",
               confidence_intervals=None, show=True, store=False, percentage=False):
    fig, ax = plt.subplots()

    for i in range(len(means)):
        if confidence_intervals:
            ax.errorbar(distances, means[i], yerr=confidence_intervals[i], label=labels[i])
        else:
            ax.plot(distances, means[i], label=labels[i])

    ax.set(xlabel='Distance (m)', ylabel=ylabel)
    ax.legend(loc=legend_pos)
    ax.tick_params(direction='in')

    ax.set_xlim([0, (max(distances) + 1)])
    plt.xticks(np.arange(0, (max(distances) + 1), step=50))

    if percentage:
        ax.set_ylim([0, 100])
        plt.yticks(np.arange(0, 101, step=10))

    if show:
        fig.show()

    if store:
        fig.savefig("{}/{}-{}.png".format(figure_store, plot_name, now), dpi=300)
    plt.close(fig)

def cbr_plot(cbr, times, plot_name, labels, now, confidence_intervals=None, show=True, store=False):
    fig, ax = plt.subplots()

    for i in range(len(cbr)):
        if confidence_intervals:
            ax.errorbar(times[i], cbr[i], yerr=confidence_intervals[i], label=labels[i])
        else:
            ax.plot(times[i], cbr[i], label=labels[i])

    ax.legend(loc='upper left')
    ax.set(xlabel='Time (s)', ylabel='Channel Busy Ratio %')
    ax.tick_params(direction='in')

    ax.set_ylim([0, 100])
    plt.yticks(np.arange(0, 101, step=10))

    if show:
        fig.show()

    if store:
        fig.savefig("{}/{}-{}.png".format(figure_store, plot_name, now), dpi=300)
    plt.close(fig)

In [19]:
def errors_dist(folders, graph, comparison, configurations, now):
    # TODO: Update to allow such graphing to be automatically configured.
        
    means = []
    cis = []
    distances = []
    labels = []    
    
    for folder, config in zip(folders, configurations):
        for error in ["unsensed_errors", "hd_errors", "prop_errors", "interference_errors"]:
            df = pd.read_csv("{}/{}.csv".format(folder, error))
            means.append(list(df["Mean"]))
            if confidence_intervals:
                cis.append(list(df["Confidence-Interval"]))
            distances = (list(range(0, df.shape[0] * 25, 25)))
            labels.append("{}-{}".format(config, error))
        

    
    dist_graph(means, distances, labels,
               "{}-{}".format(comparison, graph), ylabel="Error Probability %", now=now,
               confidence_intervals=cis, show=False, store=True, percentage=True, legend_pos="upper left")

### Setup

In [20]:
use_markers = False
use_line_types = False
image_format = "png"
figure_store = "/hdd/results-analysis/data/figures"
markers = [".", "o", "v", "^", "<", ">", "1", "2", "3", "4", "8", "s", "p", "P", "*", "h", "H", "+",
                    "x", "X", "D", "d", "|", "_", 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    
config_path = "/hdd/results-analysis/configs/mcs7.json"

experiment_type = "mcs7"
with open(config_path) as json_file:
    config = json.load(json_file)[experiment_type]
    
    __results = config["results"]
    
    p = __results["confidence-interval"]
    
    confidence_intervals = __results["graph-confidence-interval"]

In [21]:
generate_graphs(config["parsed-result-dir"], "00_00_00-01-01-2020")

Beginning graphing of result files: ['/hdd/results-analysis/data/parsed_data/mcs7/MCS7-1vpm-20dbm-2020-04-14-00_06_11']
Results for config: MCS7-1vpm-20dbm
Generating results for file: /hdd/results-analysis/data/parsed_data/mcs7/MCS7-1vpm-20dbm-2020-04-14-00_06_11/run-1.csv
Generating results for file: /hdd/results-analysis/data/parsed_data/mcs7/MCS7-1vpm-20dbm-2020-04-14-00_06_11/run-2.csv
Generating results for file: /hdd/results-analysis/data/parsed_data/mcs7/MCS7-1vpm-20dbm-2020-04-14-00_06_11/run-3.csv
Generating results for file: /hdd/results-analysis/data/parsed_data/mcs7/MCS7-1vpm-20dbm-2020-04-14-00_06_11/run-4.csv


TypeError: Cannot perform 'rand_' with a dtyped [float64] array and scalar of type [bool]