## Visualizing the results
- This file handles the visualization of the results. 
- They results data are first imported from the results folder.
- It is the processed to prepare for plotting.
- The plotting results in pdf files ready for the report, and png files that are a bit more explanatory for reference.

In [90]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


#Defining the plot style to fit in latex report
plt.rcParams['axes.titlesize'] = 9
plt.rcParams['axes.labelsize'] = 9
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['legend.fontsize'] = 8
plt.rcParams['text.usetex'] = True


def read_and_preppare_data(algorithm):
    '''Reads the data from the csv file and calculates the time per run
    and the standard deviation for each list type and size. Returns the
    data in a pandas dataframe.
    '''
    data = pd.read_pickle("../results/{}_timing.pkl".format(algorithm))
    data['time_per_run'] = data['time'] / data['n_ar']

    #Find the mean and std. deviation per list type and size
    stat_data = data.groupby(['list_type', 'list_size']).mean()
    stat_data['std_dev'] = data.groupby(['list_type', 'list_size']).std()['time_per_run']
    stat_data.reset_index(inplace=True)
    return stat_data

def plot_data(stat_data, algorithm):
    '''Plots the data from the dataframe stat_data. The data is grouped by list type
    and the average time per run is plotted for each list type and list size. The
    standard deviation is also plotted as error bars. The plot is saved as a pdf
    and a png file.
    '''
    # Group the averaged data by list type to plot
    grouped = stat_data.groupby('list_type')
    #Creating a plot
    fig = plt.figure(figsize=(84/25.4, 55/25.4))
    ax = fig.add_subplot(1,1,1)

    #Creating the colors (There is probably a better way to do this)
    color_maps = ['BuPu', 'Blues', 'Oranges']
    colors = []
    for color in color_maps:
        color_map = plt.get_cmap(color)
        #Creating 6 colors from the color map
        c = color_map(np.linspace(0.1, 0.6, 3))
        for col in c:
            colors.append(col)

    i = 0 #Counter for colors
    #Plotting each of the cases, aka best, average and worst case
    for name, group in grouped:
        ax.plot(group['list_size'], group['time_per_run'], marker='o', linestyle='-', ms=5, label=name, color=colors[i+1])
        ax.errorbar(group['list_size'], group['time_per_run'], yerr=group['std_dev'], fmt='o', capsize=5, color=colors[i+2])
        i += 3 #Incrementing the counter to get the next color
    #Setting the plot to log scale to better visualize the data
    ax.set_yscale('log')
    ax.set_xscale('log')
    #Setting the labels
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    #Exporting the plot before adding/changing labels and title
    fig.savefig("../results/plots/{}.pdf".format(algorithm), bbox_inches='tight')
    plt.legend(loc='upper left')
    plt.xlabel('List Size')
    plt.ylabel('Average Execution Time per Run')
    plt.title(algorithm)
    plt.savefig("../results/plots/{}_with_titles.png".format(algorithm))
    #plt.show()

    

In [91]:
#List of algorithms to plot
algorithm_list = ["insert_sort", "merge_sort", "quick_sort"]
#Looping through the algorithms to plot the data and save the figures 
for algorithm in algorithm_list:
    stat_data = read_and_preppare_data(algorithm)
    plot_data(stat_data, algorithm)