In [None]:
import json
from os import listdir
from pathlib import Path

In [None]:
def open_sm_file(sm_file_path):
    with open(sm_file_path, "r") as sm_file:
        data = sm_file.read().splitlines()
    return data

In [None]:
def get_resource_max(data):
    resource_max_index = data.index("RESOURCEAVAILABILITIES:") + 2
    resource_max = data[resource_max_index]
    resource_max = [int(i) for i in resource_max.split()]
    return resource_max

In [None]:
def get_n_jobs_full(data):
    for line in data:
        if line.startswith("jobs (incl. supersource/sink ):"):
            break
    n_jobs_full = line.split()[-1]
    n_jobs_full = int(n_jobs_full)
    return n_jobs_full

In [None]:
def get_graph_edges(data, n_jobs_full):
    graph_index_start = data.index("PRECEDENCE RELATIONS:") + 2
    graph_index_end = graph_index_start + n_jobs_full

    graph = data[graph_index_start:graph_index_end]
    graph = [i.split() for i in graph]
    graph = [[int(i) for i in row] for row in graph]
    graph = [[f"#{i}" for i in row] for row in graph]
    graph = [(row[0], row[3:]) for row in graph]

    graph_edges = []
    for parent, successors in graph:
        for successor in successors:
            graph_edges.append( (parent, successor) )
    
    return graph_edges

In [None]:
def get_requests(data, n_jobs_full):
    requires_index_start = data.index("REQUESTS/DURATIONS:") + 3
    requires_index_end = requires_index_start + n_jobs_full

    requires = data[requires_index_start:requires_index_end]
    requires = [row.split() for row in requires]
    requires = [[int(i) for i in row] for row in requires]

    durations = {f"#{row[0]}": row[2] for row in requires}
    resources = {f"#{row[0]}": row[3:] for row in requires}
    return (durations, resources)

In [None]:
def parse_sm_file(from_folder, file, to_folder):
    
    open_path = Path(from_folder) / file
    data = open_sm_file(open_path)
    
    n_jobs_full = get_n_jobs_full(data)
    resource_max = get_resource_max(data)
    n_resources = len(resource_max)
    
    graph_edges = get_graph_edges(data, n_jobs_full)
    durations, resources = get_requests(data, n_jobs_full)

    jobs = []
    for i in range(1, n_jobs_full+1):
        name = f"#{i}"
        job_data = dict(
            name=name,
            duration=durations[name],
            resources=resources[name],
            successors=[b for a, b in graph_edges if (a == name)],
            predecessors=[a for a, b in graph_edges if (b == name)]
        )
        jobs.append(job_data)

    assert len(jobs) == n_jobs_full
    assert all(len(r) == n_resources for r in resources.values())
    
    result = {}
    result["n_jobs_full"] = n_jobs_full
    result["n_resources"] = n_resources
    result["resource_pool"] = resource_max
    result["jobs"] = jobs
    
    file_id = file.split(".")[0]
    file_id = f"{file_id}.json"

    # remove excessive newlines
    json_string = json.dumps(result, indent=2)
    json_string = json_string.replace("\n        ", "")
    json_string = json_string.replace("\n      ]", "]")
    
    save_path = Path(to_folder) / file_id
    with open(save_path, "w") as json_file:
        json_file.write(json_string)
    return result

In [None]:
s = parse_sm_file(
    from_folder="j120_sm", 
    file="j1201_1.sm", 
    to_folder="j120_json"
)

In [None]:
from_filder = "j120_sm"
to_folder = "j120_json"

files_to_parse = listdir(from_filder)
for file_name in files_to_parse:
    parse_sm_file(from_filder, file_name, to_folder)