In [54]:
import pandas as pd
from PSAP.Problem import decimal_to_time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [55]:
# load in ML data
df_ml = pd.read_excel('results/SA/ML-Results/ML_151-200ex10-001_099.xlsx')
df_ml = df_ml[['instance', 'number_of_movements_reached', 'median_delay', 'average_delay','obj_val', 't0', 'alpha', 'predicted_delay', 'valid_solution']]
df_ml = df_ml.rename(columns={'number_of_movements_reached': 'movements_reached-ML', 'median_delay': 'median_delay-ML', 'average_delay': 'average_delay-ML', 'obj_val': 'obj_val-ML', 't0': 't0-ML', 'alpha': 'alpha-ML', 'valid_solution': 'valid_solution-ML', 'predicted_delay': 'predicted_delay-ML'})

# load in random data
df_sa = pd.read_excel('results/SA/ML-Results/output_150-200ex10-001-099.xlsx')
df_sa = df_sa[['instance', 'obj_val', 'alpha', 't0', 'median delay', 'average delay', 'number of movements', 'valid_solution']]
df_sa = df_sa.rename(columns={'median delay': 'median_delay-SA', 'average delay': 'average_delay-SA', 'number of movements': 'movements_reached-SA', 'obj_val': 'obj_val-SA', 't0': 't0-SA', 'alpha': 'alpha-SA', 'valid_solution': 'valid_solution-SA'})
# sort the dataframes on instance
df_sa = df_sa.sort_values(by=['instance'])
df_ml = df_ml.sort_values(by=['instance'])


# load in the instance data
df_instances = pd.read_excel('results/instanceData_200.xlsx')
df_instances = df_instances[['instance', 'number_of_movements']] 
# only keep the instace 151-200
df_instances = df_instances.sort_values(by=['instance'])
df_instances = df_instances[150:200]

# merge the instances with the SA&ML data
df_ml = pd.merge(df_ml, df_instances, on='instance')
df_sa = pd.merge(df_sa, df_instances, on='instance')

In [56]:
# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml['valid_solution-ML'].sum())

# calculate the average movement reached for SA and ML
df_results = pd.DataFrame()
df_results['avg_movements_reached-ML'] = df_ml['movements_reached-ML']/df_ml['number_of_movements']
df_results['avg_movements_reached-SA'] = df_sa['movements_reached-SA']/df_sa['number_of_movements']
average_SA = df_results['avg_movements_reached-SA'].sum() / len(df_results)
average_ML = df_results['avg_movements_reached-ML'].sum() / len(df_results)
print("average movements reached SA: ", average_SA)
print("average movements reached ML: ", average_ML)

valid solutions SA:  392
valid solutions ML:  408
average movements reached SA:  0.9631006224084153
average movements reached ML:  0.9697388404354822


In [45]:
# calculate the average delay for SA and ML
avg_delay_SA = df_sa['average_delay-SA'].sum() / len(df_sa)
avg_delay_ML = df_ml['average_delay-ML'].sum() / len(df_ml)
print("average delay SA: ", decimal_to_time(avg_delay_SA))
print("average delay ML: ", decimal_to_time(avg_delay_ML))

# calculate average median delay for SA and ML
median_delay_SA = df_sa['median_delay-SA'].sum() / len(df_sa)
median_delay_ML = df_ml['median_delay-ML'].sum() / len(df_ml)
print("average of median delay SA: ", decimal_to_time(median_delay_SA))
print("average median delay ML: ", decimal_to_time(median_delay_ML))

# calculate average objval for SA and ML
objval_SA = df_sa['obj_val-SA'].sum() / len(df_sa)
objval_ML = df_ml['obj_val-ML'].sum() / len(df_ml)
print("average objval SA: ", objval_SA) # i made a mistake in how the objval is saved: the ML saves the objval of the failed solution, while the SA saves the objval of the last valid solution
print("average objval ML: ", objval_ML)



average delay SA:  0:51
average delay ML:  0:48
average of median delay SA:  0:42
average median delay ML:  0:39
average objval SA:  87.24916666666667
average objval ML:  83.15333333333334


In [57]:
df_sa['valid_solution-ML'] = df_ml['valid_solution-ML']
df_ml['valid_solution-SA'] = df_sa['valid_solution-SA']

# now only look at the instances that are solved by both SA and ML
df_ml_solved = df_ml[df_ml['valid_solution-ML'] == 1]
df_sa_solved = df_sa[df_sa['valid_solution-SA'] == 1]

df_sa_solved = df_sa_solved[df_sa_solved['valid_solution-ML'] == 1]
df_ml_solved = df_ml_solved[df_ml_solved['valid_solution-SA'] == 1]

# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa_solved['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml_solved['valid_solution-ML'].sum())



# calculate the differnce between the objval of SA and ML
objval_diff = df_sa_solved['obj_val-SA'] - df_ml_solved['obj_val-ML']
avg_diff = df_sa_solved['average_delay-SA'] - df_ml_solved['average_delay-ML']
median_diff = df_sa_solved['median_delay-SA'] - df_ml_solved['median_delay-ML']
print("average and stddev: (", avg_diff.mean(), avg_diff.std(), ")")
print("median and stddev: (", median_diff.mean(), median_diff.std(), ")")


valid solutions SA:  382
valid solutions ML:  382
average and stddev: ( 0.056340157563702455 0.2971838602373348 )
median and stddev: ( 0.051047120418848096 0.35574175476373787 )


In [58]:
# now we do the same for other parameter ranges first, the reasonable range
df_ml = pd.read_excel('results/SA/ML-Results/ML_151-200ex10-020_099.xlsx')
df_ml = df_ml[['instance', 'number_of_movements_reached', 'median_delay', 'average_delay','obj_val', 't0', 'alpha', 'predicted_delay', 'valid_solution']]
df_ml = df_ml.rename(columns={'number_of_movements_reached': 'movements_reached-ML', 'median_delay': 'median_delay-ML', 'average_delay': 'average_delay-ML', 'obj_val': 'obj_val-ML', 't0': 't0-ML', 'alpha': 'alpha-ML', 'valid_solution': 'valid_solution-ML', 'predicted_delay': 'predicted_delay-ML'})

# load in random data
df_sa = pd.read_excel('results/SA/ML-Results/output_151-200ex10-020-099.xlsx')
df_sa = df_sa[['instance', 'obj_val', 'alpha', 't0', 'median delay', 'average delay', 'number of movements', 'valid_solution']]
df_sa = df_sa.rename(columns={'median delay': 'median_delay-SA', 'average delay': 'average_delay-SA', 'number of movements': 'movements_reached-SA', 'obj_val': 'obj_val-SA', 't0': 't0-SA', 'alpha': 'alpha-SA', 'valid_solution': 'valid_solution-SA'})

# sort the dataframes on instance
df_sa = df_sa.sort_values(by=['instance'])
df_ml = df_ml.sort_values(by=['instance'])

# load in the instance data
df_instances = pd.read_excel('results/instanceData_200.xlsx')
df_instances = df_instances[['instance', 'number_of_movements']]
# only keep the instace 151-200
df_instances = df_instances.sort_values(by=['instance'])
df_instances = df_instances[150:200]

# merge the instances with the SA&ML data
df_ml = pd.merge(df_ml, df_instances, on='instance')
df_sa = pd.merge(df_sa, df_instances, on='instance')



In [48]:
# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml['valid_solution-ML'].sum())


valid solutions SA:  407
valid solutions ML:  401


In [49]:
# calculate the average delay for SA and ML
avg_delay_SA = df_sa['average_delay-SA'].sum() / len(df_sa)
avg_delay_ML = df_ml['average_delay-ML'].sum() / len(df_ml)
print("average delay SA: ", decimal_to_time(avg_delay_SA))
print("average delay ML: ", decimal_to_time(avg_delay_ML))

# calculate average median delay for SA and ML
median_delay_SA = df_sa['median_delay-SA'].sum() / len(df_sa)
median_delay_ML = df_ml['median_delay-ML'].sum() / len(df_ml)
print("average of median delay SA: ", decimal_to_time(median_delay_SA))
print("average median delay ML: ", decimal_to_time(median_delay_ML))

# calculate average objval for SA and ML
objval_SA = df_sa['obj_val-SA'].sum() / len(df_sa)
objval_ML = df_ml['obj_val-ML'].sum() / len(df_ml)
print("average objval SA: ", objval_SA) # i made a mistake in how the objval is saved: the ML saves the objval of the failed solution, while the SA saves the objval of the last valid solution
print("average objval ML: ", objval_ML)


average delay SA:  0:49
average delay ML:  0:48
average of median delay SA:  0:40
average median delay ML:  0:40
average objval SA:  85.83916666666667
average objval ML:  84.1925


In [59]:
df_sa['valid_solution-ML'] = df_ml['valid_solution-ML']
df_ml['valid_solution-SA'] = df_sa['valid_solution-SA']

# now only look at the instances that are solved by both SA and ML
df_ml_solved = df_ml[df_ml['valid_solution-ML'] == 1]
df_sa_solved = df_sa[df_sa['valid_solution-SA'] == 1]

df_sa_solved = df_sa_solved[df_sa_solved['valid_solution-ML'] == 1]
df_ml_solved = df_ml_solved[df_ml_solved['valid_solution-SA'] == 1]

# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa_solved['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml_solved['valid_solution-ML'].sum())

avg_diff = df_sa_solved['average_delay-SA'] - df_ml_solved['average_delay-ML']
median_diff = df_sa_solved['median_delay-SA'] - df_ml_solved['median_delay-ML']
print("average and stddev: (", avg_diff.mean(), avg_diff.std(), ")")
print("median and stddev: (", median_diff.mean(), median_diff.std(), ")")

valid solutions SA:  389
valid solutions ML:  389
average and stddev: ( 0.03349071278318259 0.2922943132249796 )
median and stddev: ( 0.03245501285347037 0.3447036985515387 )


In [60]:
# lastly, for the training range
df_ml = pd.read_excel('results/SA/ML-Results/ML151-200ex10-040-099-check.xlsx')

# load in random data
df_sa = pd.read_excel('results/SA/100ex20-SA200/output_100e_200-101to200.xlsx')
# only keep the instace 151-200
df_sa = df_sa.sort_values(by=['instance'])
df_sa = df_sa[1000:2000]
# keep half of the data
df_sa = df_sa[df_sa.index % 2 == 0]
# rename the columns
df_sa = df_sa.rename(columns={'median delay': 'median_delay-SA', 'average delay': 'average_delay-SA', 'number of movements': 'movements_reached-SA', 'obj_val': 'obj_val-SA', 't0': 't0-SA', 'alpha': 'alpha-SA', 'valid_solution': 'valid_solution-SA'})


# import the instance data
df_instances = pd.read_excel('results/instanceData_200.xlsx')
df_instances = df_instances[['instance', 'number_of_movements']]
# only keep the instace 151-200
df_instances = df_instances.sort_values(by=['instance'])
df_instances = df_instances[150:200]

# merge the instances with the SA&ML data
df_ml = pd.merge(df_ml, df_instances, on='instance')
df_sa = pd.merge(df_sa, df_instances, on='instance')


In [61]:
# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml['valid_solution-ML'].sum())


valid solutions SA:  393
valid solutions ML:  384


In [62]:
df_sa['valid_solution-ML'] = df_ml['valid_solution-ML']
df_ml['valid_solution-SA'] = df_sa['valid_solution-SA']

# now only look at the instances that are solved by both SA and ML
df_ml_solved = df_ml[df_ml['valid_solution-ML'] == 1]
df_sa_solved = df_sa[df_sa['valid_solution-SA'] == 1]

df_sa_solved = df_sa_solved[df_sa_solved['valid_solution-ML'] == 1]
df_ml_solved = df_ml_solved[df_ml_solved['valid_solution-SA'] == 1]

# calculate the amount of valid solutions for SA and ML
print("valid solutions SA: ", df_sa_solved['valid_solution-SA'].sum())
print("valid solutions ML: ", df_ml_solved['valid_solution-ML'].sum())

avg_diff = df_sa_solved['average_delay-SA'] - df_ml_solved['average_delay-ML']
median_diff = df_sa_solved['median_delay-SA'] - df_ml_solved['median_delay-ML']
print("average and stddev: (", avg_diff.mean(), avg_diff.std(), ")")
print("median and stddev: (", median_diff.mean(), median_diff.std(), ")")

valid solutions SA:  365
valid solutions ML:  365
average and stddev: ( 0.024114946738449217 0.30021876972977135 )
median and stddev: ( 0.02203196347031975 0.35134045233134503 )
