# Assessment of the performance of the RA and PC-TSP models
## Based on computational time on various datasets


### Plan:

- Load all functions from modules, all dependencies
- Load all datasets for the analysis
- Run each algorithm in a loop, saving the time of ONLY the optimization algorithm
- Save the timing into a df
- Show the timings in a table (with the dataset details, objective values, time taken by each algorithm)
- Turn the table into a latex code for the report

In [2]:
from Data import *
from pctstp import *
from tiaor import *
import os

In [1]:
# List the .xlsx files in the Data folder
files = os.listdir("Data")
filepaths = ["data/" + file for file in files if file.endswith('.xlsx')]
print(filepaths)

# Make dictionaries to store the data for each file
ra_data = {}
pctsp_data = {}

# Load the data from each file
for filepath in filepaths:
    file_name = os.path.basename(filepath)
    file_name = file_name.split('.')[0]
    pairs, altruistic_donors, nodes, edges = get_data(filepath)
    ra_data[file_name] = {
        "pairs": pairs,
        "altruistic_donors": altruistic_donors,
        "nodes": nodes,
        "edges": edges
    }
    G_pctsp, pairs_pctsp, altruistic_donors_pctsp, nodes_pctsp, edges_pctsp, all_cycles_pctsp = get_data(filepath, pc_tsp=True)
    pctsp_data[file_name] = {
        "G": G_pctsp,
        "pairs": pairs_pctsp,
        "altruistic_donors": altruistic_donors_pctsp,
        "nodes": nodes_pctsp,
        "edges": edges_pctsp,
        "all_cycles": all_cycles_pctsp
    }


NameError: name 'os' is not defined

In [None]:
# Example of accessing the data
check_dataset = ra_data['Dataset1']
print(check_dataset)


{'pairs': [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62, 64, 65, 66, 67, 68, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 142, 144, 146, 148, 149, 150, 151, 153, 154, 155, 156, 157, 159, 160, 162, 163, 164, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 184, 185, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 214, 216, 217, 218, 219, 220, 221, 222, 223, 224, 226, 228, 229, 230, 232, 233], 'altruistic_donors': [256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 26

In [None]:
# Load existing results if they exist
results_file = 'results.csv'
if os.path.exists(results_file):
    results = pd.read_csv(results_file)
else:
    results = pd.DataFrame(columns = ['Dataset', 'NDDs', 'Pairs', 'Edges', 'RA', 'PCTSP', 'Objective Value RA', 'Objective Value PCTSP'])
    results['Dataset'] = [os.path.basename(filepath).split('.')[0] for filepath in filepaths]
    results['NDDs'] = [len(ra_data[dataset]['altruistic_donors']) for dataset in ra_data]
    results['Pairs'] = [len(ra_data[dataset]['pairs']) for dataset in ra_data]
    results['Edges'] = [len(ra_data[dataset]['edges']) for dataset in ra_data]

# Run the RA and PCTSP algorithms on each dataset and save the results
for dataset in ra_data:
    # reimport pctsp
    
    # I realise this is a very badly written line, but I was adding the failed datasets as they came
    if dataset == 'Dataset16' or dataset == 'Dataset17' or dataset == 'Dataset18' or dataset == 'Dataset20' or dataset == "Dataset22" or dataset == "Dataset25" or dataset == "Dataset3" or dataset == "Dataset5" or dataset=="Dataset7" or dataset=="Dataset9":
        continue
    
    if dataset in results['Dataset'].values and results.loc[results['Dataset'] == dataset, ['RA', 'PCTSP']].notnull().all(axis=None).all():
        print(f"Skipping {dataset} as it has already been processed.")
        continue
    
    pairs = ra_data[dataset]['pairs']
    altruistic_donors = ra_data[dataset]['altruistic_donors']
    nodes = ra_data[dataset]['nodes']
    edges = ra_data[dataset]['edges']
    
    G_pctsp = pctsp_data[dataset]['G']
    pairs_pctsp = pctsp_data[dataset]['pairs']
    altruistic_donors_pctsp = pctsp_data[dataset]['altruistic_donors']
    nodes_pctsp = pctsp_data[dataset]['nodes']
    edges_pctsp = pctsp_data[dataset]['edges']
    all_cycles_pctsp = pctsp_data[dataset]['all_cycles']
    
    print(f"Running on {dataset}")
    opt_val_ra, _, time_taken_ra, *rest = ra(pairs, altruistic_donors, edges, noisy=0)
    opt_val_pctsp, _, _, time_taken_pctsp = pctsp(G_pctsp, pairs_pctsp, altruistic_donors_pctsp, nodes_pctsp, edges_pctsp, all_cycles_pctsp, noisy=0)

    results.loc[results['Dataset'] == dataset, ['RA', 'PCTSP', 'Objective Value RA', 'Objective Value PCTSP']] = [time_taken_ra, time_taken_pctsp, opt_val_ra, opt_val_pctsp]

# Save the updated results to the CSV file
results.to_csv(results_file, index=False)

Skipping Dataset1 as it has already been processed.
Skipping Dataset10 as it has already been processed.
Skipping Dataset11 as it has already been processed.
Skipping Dataset12 as it has already been processed.
Skipping Dataset13 as it has already been processed.
Skipping Dataset14 as it has already been processed.
Skipping Dataset15 as it has already been processed.
Skipping Dataset19 as it has already been processed.
Skipping Dataset2 as it has already been processed.
Skipping Dataset21 as it has already been processed.
Skipping Dataset23 as it has already been processed.
Skipping Dataset24 as it has already been processed.
Skipping Dataset4 as it has already been processed.
Skipping Dataset6 as it has already been processed.
Skipping Dataset8 as it has already been processed.


In [None]:
# Run this if you get new results from the above cell
# Save the updated results to the CSV file
results.to_csv(results_file, index=False)