In [49]:
import glob
import re
import math

In [64]:
vehicle_numbers = [2, 3, 4]
t_values = [60, 80, 100]
msop_folder_path = "./MSOP_datasets/over_100/"
#msop_folder_path = "./MSOP_datasets/all/"
sop_folder_path = "./SOP_non_random_datasets/"
msop_comment_add = "SOP::[From Archetti, Carrabs, Cerulli. The set orienteering problem. (2018).]"

In [51]:
def transform_dataset(dataset_path, t_value):
    sop_lines = SOPDatasetLines()
    #print(dataset_path)
    #print(sop_lines)
    split_sop_file(dataset_path, sop_lines)
    sop_lines.check_lines()
    for vehicles in vehicle_numbers:
        create_msop_dataset(dataset_path, vehicles, sop_lines, t_value)

In [62]:
def create_msop_dataset(dataset_path, vehicle_number, sop_lines, t_value):
    msop_dataset_name = transform_name(dataset_path, vehicle_number)
    #msop_f = open(msop_folder_path + f"t_{t_value}_v_{vehicle_number}/"+ msop_dataset_name + ".msop", "w")
    msop_f = open(msop_folder_path + "/" + msop_dataset_name + ".msop", "w")

    # === NAME ===
    msop_name_line = "NAME: " + msop_dataset_name + "\n"
    msop_f.write(msop_name_line)
    # === VEHICLES ===
    msop_vehicles_line = "VEHICLES: " + str(vehicle_number) + "\n"
    msop_f.write(msop_vehicles_line)
    # === COMMENT ===
    msop_comment_line = " ".join([sop_lines.comment_line[:-1], msop_comment_add]) + "\n" #[:-2] in order not to include the \n
    msop_f.write(msop_comment_line)
    # === TYPE ===
    msop_type_line = sop_lines.type_line
    msop_f.write(msop_type_line)
    # === DIMENSION ===
    msop_dimension_line = sop_lines.dimension_line
    msop_f.write(msop_dimension_line)
    # === TMAX ===
    sop_tmax = int(sop_lines.tmax_line[:-1].split(" ")[-1]) # get the tmax number
    if t_value in set([60, 80, 100]):
        msop_tmax = math.ceil(sop_tmax / vehicle_number)
    else:
        msop_tmax = math.ceil(sop_tmax / vehicle_number * t_value / 100)
    msop_tmax_line = "TMAX: " + str(msop_tmax) + "\n"
    msop_f.write(msop_tmax_line)
    # === START_SET ===
    msop_start_set_line = sop_lines.start_set_line
    msop_f.write(msop_start_set_line)
    # === END_SET ===
    msop_end_set_line = sop_lines.end_set_line
    msop_f.write(msop_end_set_line)
    # === SETS ===
    msop_sets_line = sop_lines.sets_line
    msop_f.write(msop_sets_line)
    # === EDGE_WEIGHT_TYPE ===
    msop_edge_weight_type_line = sop_lines.edge_weight_type_line
    msop_f.write(msop_edge_weight_type_line)
    # === NODE_COORD_SECTION ===
    msop_node_coord_section_line = sop_lines.node_coord_section_line
    msop_f.write(msop_node_coord_section_line)
    # === GTSP_SET_SECTION ===
    msop_gtsp_set_section_line = sop_lines.gtsp_set_section_line
    msop_f.write(msop_gtsp_set_section_line)

    # close file
    msop_f.close()

In [61]:
def get_text_till_substring(text, delimeters):
    if isinstance(delimeters, list):
        delimeters = "|".join(delimeters)
    #print(text)
    #print(delimeters)
    text_to_get = re.split(delimeters, text)[0]
    #print(text_to_get)
    #remained_text = text.removeprefix(text_to_get) # for python v>3.9
    remained_text = text
    if text.startswith(text_to_get):
        remained_text = text[len(text_to_get):]
    #remained_text = re.split(delimeters, text)[1] 
    #print(remained_text)
    return text_to_get, remained_text

In [54]:
def transform_name(dataset_path, vehicle_number):
    dataset_name = dataset_path.split("\\")[1][:-4]
    gtsp_dataset_name, time_pct, profit_type = dataset_name.split("_")
    msop_dataset_name = "_".join([gtsp_dataset_name, time_pct, profit_type ,"v" + str(vehicle_number)])
    return msop_dataset_name

In [55]:
class SOPDatasetLines:
    
    def __init__(self):
        pass
    
    def check_lines(self):
        if (not self.name_line.startswith("NAME")):
            raise Exception("Name line error.")
        elif (not self.type_line.startswith("TYPE")):
            raise Exception("Type line error.")
        elif (not self.comment_line.startswith("COMMENT")):
            raise Exception("Comment line error.")
        elif (not self.dimension_line.startswith("DIMENSION")):
            raise Exception("Dimension line error.")
        elif (not self.tmax_line.startswith("TMAX")):
            raise Exception("Tmax line error.")
        elif (not self.start_set_line.startswith("START_SET")):
            raise Exception("Start set line error.")
        elif (not self.end_set_line.startswith("END_SET")):
            raise Exception("End set line error.")
        elif (not self.sets_line.startswith("SETS")):
            raise Exception("Sets line error.")
        elif (not self.edge_weight_type_line.startswith("EDGE_WEIGHT_TYPE")):
            raise Exception("Edge weight type line error.")
        elif (not self.node_coord_section_line.startswith("NODE_COORD_SECTION")):
            raise Exception("Node coord section line error.")
        elif (not self.gtsp_set_section_line.startswith("GTSP_SET_SECTION")):
            raise Exception("GTSP set section line error.")

In [56]:
def split_sop_file(dataset_path, sop_lines):
    sop_f = open(dataset_path, "r")
    sop_f_text = sop_f.read()
    #print(sop_f_text)
    # === NAME ===
    sop_lines.name_line, remained_text = get_text_till_substring(sop_f_text, ["TYPE", "COMMENT"])
    # === TYPE, COMMENT ===
    if remained_text.startswith("TYPE"):
        sop_lines.type_line, remained_text = get_text_till_substring(remained_text, "COMMENT")
        sop_lines.comment_line, remained_text = get_text_till_substring(remained_text, "DIMENSION")
    elif remained_text.startswith("COMMENT"):
        sop_lines.comment_line, remained_text = get_text_till_substring(remained_text, "TYPE")
        sop_lines.type_line, remained_text = get_text_till_substring(remained_text, "DIMENSION")
    else:
        raise Exception("Neither type nor comment in the second line.")
    # === DIMENSION ==
    sop_lines.dimension_line, remained_text = get_text_till_substring(remained_text, "TMAX")
    # === TMAX ==
    sop_lines.tmax_line, remained_text = get_text_till_substring(remained_text, "START_SET")
    # === START_SET ==
    sop_lines.start_set_line, remained_text = get_text_till_substring(remained_text, "END_SET")
    # === END_SET ==
    sop_lines.end_set_line, remained_text = get_text_till_substring(remained_text, "SETS")
    # === SETS ==
    sop_lines.sets_line, remained_text = get_text_till_substring(remained_text, "EDGE_WEIGHT_TYPE")
    # === EDGE_WEIGHT_TYPE ==
    sop_lines.edge_weight_type_line, remained_text = get_text_till_substring(remained_text, "NODE_COORD_SECTION")
    # === NODE_COORD_SECTION ==
    sop_lines.node_coord_section_line, remained_text = get_text_till_substring(remained_text, "GTSP_SET_SECTION")
    # === GTSP_SET_SECTION ==
    sop_lines.gtsp_set_section_line = remained_text

In [57]:
def create_datasets():
    for t_value in t_values:
        sop_datasets_to_transform = glob.glob(f"./SOP_non_random_datasets/*T{t_value}*")
        for sop_dataset in sop_datasets_to_transform:
            transform_dataset(sop_dataset, t_value)

In [65]:
over_100_t_values = [120, 140, 160, 180, 200]
def create_datasets_over_100():
    for t_value in over_100_t_values:
        sop_datasets_to_transform = glob.glob(f"./SOP_non_random_datasets/*T100*")
        for sop_dataset in sop_datasets_to_transform:
            transform_dataset(sop_dataset, t_value)

In [20]:
create_datasets_over_100()

AttributeError: 'str' object has no attribute 'removeprefix'

In [63]:
# t_values = [60, 80, 100]
create_datasets()