# GREW-MRS Project

## Run Full-Suite Simulation Trial

The run script is the main entrypoint for this project. It has a number of options that can be dynamically set for a wide range of simulation scenarios. This are available for reference by passing the help flag as a parameter.

In [None]:
!./run.sh -h

Let's create a dataset with the following settings:
- Target Coverage Percentage of 90%
- Across all three path planning algorithms/strategies.
- Independantly seeding 3 random child trials per parent iteration.
- With a target/plant value range of 4.

In [None]:
!sudo -S ./run.sh -t "0.90" -a "aco" -s 1 -n "100" -v < pass

## Parse and Pre-process Simulation Dataset

We will use the Independant Student's t-test to determine whether the difference between our **UAV-MRS** simulation datasets is significant. We first import the necessary python packages.

In [2]:
import pandas as pd
from scipy import stats
from math import sqrt
from scipy.stats import ttest_ind
from scipy.stats import t
import numpy as np
import matplotlib as plt
from math import sqrt

Convert the csv dataset into hdf5 format.

In [None]:
chunksize = 10 ** 4
filename = 'sample/data_0.csv'
headers = ['Type','TargetNum','TargetThresh','Step','Completed',
         'X','Y','Z','RtMProb','RtLProb','MinimumHold','LaunchStep',
         'InitialRtMProb','RtMDelta','InitialRtLProb','RtLDelta',
         'MinimumRest','InitialMinimumHold','MaximumHold','GlobalReach',
         'ProximityThresh','Attitude','SwarmParticles','SwarmSelfTrust',
         'SwarmPastTrust','SwarmGlobalTrust','SwarmAnts','MappingMean',
         'MappingStdDev','MappingSeed','RtMMin','RtMMax','RtMSeed','RtLMin',
         'RtLMax','RtLSeed','ACOSeed','TaskCompletedMin','TaskCompletedMax',
         'TaskCompletedSeed','TargetShuffleMin','TargetShuffleMax',
         'TargetShuffleSeed','NaiveMapping','VStep','HStep','ArgosSeed']

datatypes={
    'Type':np.string_,'TargetNum':np.uint8,'TargetThresh':np.uint8,'Step':np.uint32,'Completed':np.uint8,
    'X':np.float16,'Y':np.float16,'Z':np.float16,'RtMProb':np.float16,'RtLProb':np.float16,'MinimumHold':np.uint8,'LaunchStep':np.uint8,
    'InitialRtMProb':np.float16,'RtMDelta':np.float16,'InitialRtLProb':np.float16,'RtLDelta':np.float16,
    'MinimumRest':np.uint8,'InitialMinimumHold':np.uint8,'MaximumHold':np.uint8,'GlobalReach':np.float16,
    'ProximityThresh':np.float16,'Attitude':np.float16,'SwarmParticles':np.uint8,'SwarmSelfTrust':np.float16,
    'SwarmPastTrust':np.float16,'SwarmGlobalTrust':np.float16,'SwarmAnts':np.uint8,'MappingMean':np.float16,
    'MappingStdDev':np.float16,'MappingSeed':np.uint8,'RtMMin':np.uint8,'RtMMax':np.uint8,'RtMSeed':np.uint16,'RtLMin':np.uint8,
    'RtLMax':np.uint8,'RtLSeed':np.uint8,'ACOSeed':np.uint8,'TaskCompletedMin':np.uint8,'TaskCompletedMax':np.uint8,
    'TaskCompletedSeed':np.uint8,'TargetShuffleMin':np.uint8,'TargetShuffleMax':np.uint8,
    'TargetShuffleSeed':np.uint8,'NaiveMapping':np.string_,'VStep':np.float16,'HStep':np.float16,'ArgosSeed':np.uint8
}

def saveAsHDF(chunk):
    chunk.loc[chunk['Type'] == 'pso'].to_hdf('sample/pso.h5',  key = 'data', mode ='a', format='table', append = True)
    chunk.loc[chunk['Type'] == 'aco'].to_hdf('sample/aco.h5',  key = 'data', mode ='a', format='table', append = True)
    chunk.loc[chunk['Type'] == 'lawn'].to_hdf('sample/lawn.h5',  key = 'data', mode ='a', format='table', append = True)

for chunk in pd.read_csv(filename, chunksize=chunksize, dtype=datatypes):
    saveAsHDF(chunk)

We then load our categorised hdf5 datasets piecemeal and compute their means.

In [None]:
# Compute the pso means first
pso_f = pd.read_hdf('sample/preprocessed/pso.h5', 'data')
pso_target_thresh = pso_f.groupby('TargetNum', as_index=False).apply(lambda row: row[row['Completed'] == row['TargetThresh']])
pso_step_means = pso_target_thresh.groupby('ArgosSeed').head(1).groupby('TargetNum')['Step'].mean()

del pso_f, pso_target_thresh

print(pso_step_means)

In [None]:
# Compute the aco means
aco_f = pd.read_hdf('sample/preprocessed/aco.h5', 'data')
aco_target_thresh = aco_f.groupby('TargetNum', as_index=False).apply(lambda row: row[row['Completed'] == row['TargetThresh']])
aco_step_means = aco_target_thresh.groupby('ArgosSeed').head(1).groupby('TargetNum')['Step'].mean()

del aco_f, aco_target_thresh

print(aco_step_means)

In [None]:
# Compute the lawns means
lawn_f = pd.read_hdf('sample/lawn.h5', 'data')
lawn_target_thresh = lawn_f.groupby('TargetNum', as_index=False).apply(lambda row: row[row['Completed'] == row['TargetThresh']])
lawn_step_means = lawn_target_thresh.groupby('ArgosSeed').head(1).groupby('TargetNum')['Step'].mean()

del lawn_f, lawn_target_thresh

print(lawn_step_means)

Aggregate all the computed means.

In [None]:
backup = pd.HDFStore('sample/backup.h5')

# means = backup['means']
means = pd.DataFrame(columns=['pso', 'aco', 'lawn'])

means.pso = pso_step_means
means.aco = aco_step_means
means.lawn = lawn_step_means
means = means.dropna()

backup['means'] = means
print(means)

Generate summary plots

In [None]:
%matplotlib notebook

m_plot = means.plot(style=['r', 'g', 'b']) 
m_plot.set_title('Mean Algorithm Performance')
m_plot.set_xlabel('Plant Target Number')
m_plot.set_ylabel('Time to Target Threshold') 
m_plot_fig = m_plot.get_figure()
m_plot_fig.savefig('thesis/images/means_line_plot.png', bbox_inches='tight')

In [None]:
%matplotlib notebook 

pso_step_mean_plot = pso_step_means.plot.box()
pso_step_mean_plot.set_title('Distribution of PSO Performanc e')
pso_step_mean_plot.set_ylabel('Time to Target Threshold')
pso_step_mean_fig = pso_step_mean_plot.get_figure()
pso_step_mean_fig.savefig('thesis/images/pso_mean_box_plot.png',bbox_inches='tight')

In [None]:
%matplotlib notebook

aco_step_mean_plot = aco_step_means.plot.box()
aco_step_mean_plot.set_title('Distribution of ACO Performance')
aco_step_mean_plot.set_ylabel('Time to Target Threshold')
aco_step_mean_fig = aco_step_mean_plot.get_figure()
aco_step_mean_fig.savefig('thesis/images/aco_mean_box_plot.png',bbox_inches='tight')

In [None]:
%matplotlib notebook

lawn_step_mean_plot = lawn_step_means.plot.box()
lawn_step_mean_plot.set_title('Distribution of Lawn Performance')
lawn_step_mean_plot.set_ylabel('Time to Target Threshold')
lawn_step_mean_fig = lawn_step_mean_plot.get_figure()
lawn_step_mean_fig.savefig('thesis/images/lawn_mean_box_plot.png',bbox_inches='tight')

In [None]:
%matplotlib notebook

mean_plot = means.plot.box()
mean_plot.set_title('Performance Distributions')
mean_plot.set_ylabel('Time to Target Threshold')
mean_fig = mean_plot.get_figure()
mean_fig.savefig('thesis/images/all_means_box_plot.png',bbox_inches='tight')

## Perform Significance Testing

If the means backup file already exists, we can load that instead

In [3]:
# Load pre-processed means from HDF5 file

means = pd.read_hdf('sample/preprocessed/backup.h5')
print(means)

                    pso           aco          lawn
TargetNum                                          
2            407.750000    407.250000   1568.000000
3            741.333333    749.250000   7124.500000
4            723.500000    748.250000   4703.500000
5            774.000000    774.000000   4392.666667
12          4277.500000   4607.250000  59524.000000
13          4567.500000   4903.000000  55488.666667
14          4929.500000   4917.000000  62760.666667
15          4898.666667   4887.666667  55574.000000
16          6384.000000   6388.000000  62564.666667
17          6490.666667   7362.500000  67697.666667
18          4734.000000   4784.000000  60794.500000
19          6814.250000   5789.666667  61050.333333
20          5080.000000   5653.500000  60168.000000
21          5693.000000   5460.500000  57930.000000
22          5367.333333   5236.500000  64139.333333
23          5520.000000   5619.000000  65296.000000
24          5912.333333   5374.500000  62720.500000
25          

Perform a t-test on the generated means.

In [None]:
# Run independent t-test
ind_t_test_PA = ttest_ind(means.pso,means.aco)
ind_t_test_PL = ttest_ind(means.pso,means.lawn)
ind_t_test_AL = ttest_ind(means.aco,means.lawn)

We have a p-value of ~0.9 between aco and pso, indicating that there is no statistical significance between the two means. More interestingly though, we have p-values of < 0.05 when performed against pso/aco and the lawn means, this is an indication of statistical significance, telling us that our null-hypothesis is provably false!

Compute the mean difference between the distributions

In [None]:
mean_pso = means.pso.mean()
mean_aco = means.aco.mean()
mean_lawn = means.lawn.mean()

diff_mean_PA = mean_pso - mean_aco
diff_mean_PL = mean_pso - mean_lawn
diff_mean_AL = mean_aco - mean_lawn

Compute the 95% confidence interval of the mean sets using the Margin of Error

In [None]:
# Get the sample sizes
NP = means.pso.size
NA = means.aco.size
NL = means.lawn.size

# Compute the degrees of freedom
DF_PA = means.pso.size + means.aco.size - 2
DF_PL = means.pso.size + means.lawn.size - 2
DF_AL = means.aco.size + means.lawn.size - 2

# Compute our t-values
t_PA = t.ppf([0.975], DF_PA)
t_PL = t.ppf([0.975], DF_PL)
t_AL = t.ppf([0.975], DF_AL)

# Compute the standard deviations of the samples
std_pso = means.pso.std()
std_aco = means.aco.std()
std_lawn = means.lawn.std()

# Compute the average standard deviations between the samples
std_PA = sqrt(((NP - 1)*(std_pso)**2 + (NA - 1)*(std_aco)**2) / DF_PA)
std_PL = sqrt(((NP - 1)*(std_pso)**2 + (NL - 1)*(std_lawn)**2) / DF_PL)
std_AL = sqrt(((NA - 1)*(std_aco)**2 + (NA - 1)*(std_lawn)**2) / DF_AL)

# Compute our Margin of Errors
MoE_PA = t_PA * std_PA * sqrt(1/NP + 1/NA)
MoE_PL = t_PL * std_PL * sqrt(1/NP + 1/NL)
MoE_AL = t_AL * std_AL * sqrt(1/NA + 1/NL)

print('The results of the independent t-tests are: \nPSO:ACO -> tt-value = {:4.3f} tp-value = {:4.3f} \nPSO:LAWN -> tt-value = {:4.3f} tp-value = {:4.3f} \nACO:LAWN -> tt-value = {:4.3f} tp-value = {:4.3f}'.format(ind_t_test_PA[0],ind_t_test_PA[1],ind_t_test_PL[0],ind_t_test_PL[1],ind_t_test_AL[0],ind_t_test_AL[1]))
print ('\nThe difference between groups is \nPSO:ACO {:3.1f} [{} to {}] (mean [95% CI]) \nPSO:LAWN {:3.1f} [{} to {}] (mean [95% CI]) \nACO:LAWN {:3.1f} [{} to {}] (mean [95% CI])'.format(diff_mean_PA, diff_mean_PA - MoE_PA, diff_mean_PA + MoE_PA, diff_mean_PL, diff_mean_PL - MoE_PL, diff_mean_PL + MoE_PL, diff_mean_AL, diff_mean_AL - MoE_AL, diff_mean_AL + MoE_AL))