In [12]:
from importlist import *

In [13]:
# Example usage:

# points_count = 100
# to_plot = False
# noise = 0.1
# dataset_types = ["circles", "moons", "blobs"]
# dataset_type = dataset_types[2]
# dataset_sizes = [100, 500, 1000, 3000, 5000, 10000, 15000,20000, 30000 , 50000]
# dataset_types = ["circles", "moons", "blobs"]
# noise = 0.1
# no_centres = 1
# filename = "mst_results.json"

In [14]:
import json
import time as tm
from tabulate import tabulate


# Function to append results to JSON file
def append_results_to_json(result, filename):
    with open(filename, "a") as f:
        json.dump(result, f, indent=4)
        f.write("\n")  # Add a newline to separate entries


# Function to generate and evaluate MST algorithms
def evaluate_mst_algorithms(
    dataset_type, points_count, noise, no_centres, filename, runs=1
):
    all_results = []
    for run in range(runs):
        points = generate_dataset(
            dataset_type=dataset_type,
            points_count=points_count,
            noise=noise,
            no_centres=no_centres,
            to_plot=False,
        )
        mst_builder = MST(points)

        results = {
            "dataset_type": dataset_type,
            "points_count": points_count,
            "no_centres": no_centres,
            "run": run + 1,
        }

        # Measure runtime for K-MSTree
        start_time = tm.time()
        mst_weight, edge_count, final_graph = mst_builder.apply_mst(
            algorithm="kmistree", to_plot=False
        )
        end_time = tm.time()
        kmistree_runtime = end_time - start_time
        results["kmistree"] = {
            "weight": mst_weight,
            "edge_count": edge_count,
            "runtime": kmistree_runtime,
        }

        # Measure runtime for K-MST
        start_time = tm.time()
        mst_weight, edge_count, final_graph = mst_builder.apply_mst(
            algorithm="kmist", to_plot=False
        )
        end_time = tm.time()
        kmist_runtime = end_time - start_time
        results["kmist"] = {
            "weight": mst_weight,
            "edge_count": edge_count,
            "runtime": kmist_runtime,
        }

        # Measure runtime for Prim's MST
        start_time = tm.time()
        prim_weight, prim_edge_count, final_graph = mst_builder.apply_mst(
            algorithm="prim", to_plot=False
        )
        end_time = tm.time()
        prim_runtime = end_time - start_time
        results["prim"] = {
            "weight": prim_weight,
            "edge_count": prim_edge_count,
            "runtime": prim_runtime,
        }

        # Measure runtime for FMST
        start_time = tm.time()
        mst_weight, edge_count, final_graph = mst_builder.apply_mst(
            algorithm="fmst", to_plot=False
        )
        end_time = tm.time()
        fmst_runtime = end_time - start_time
        results["fmst"] = {
            "weight": mst_weight,
            "edge_count": edge_count,
            "runtime": fmst_runtime,
        }

        # Calculate errors compared to Prim's MST
        for key in ["kmistree", "kmist", "fmst"]:
            results[key]["weight_error"] = abs(
                results[key]["weight"] - results["prim"]["weight"]
            )
            results[key]["edge_count_error"] = (
                results["prim"]["edge_count"] / results[key]["edge_count"]
            )
            results[key]["runtime_error"] = (
                results["prim"]["runtime"] / results[key]["runtime"]
            )

        all_results.append(results)
        append_results_to_json(results, filename)

    return all_results


# Parameters
dataset_type = "blobs"
points_count = 10000
noise = 0.1
no_centres_list = [1, 2,  5 ,10 , 20 , 50 , 100 ,200 ,  500 ,  1000 ,2000 ,  5000 , 10000]  # Different number of centers
filename = "mst_results_10000_blobs.json"

# Run experiments for different numbers of centers
all_experiment_results = []
for no_centres in no_centres_list:
    experiment_results = evaluate_mst_algorithms(
        dataset_type, points_count, noise, no_centres, filename
    )
    all_experiment_results.extend(experiment_results)

# Prepare data for tabulate
table = []
headers = [
    "Dataset Type",
    "Points Count",
    "No Centres",
    "Run",
    "Algorithm",
    "Weight",
    "Edge Count",
    "Runtime (s)",
    "Weight Error",
    "Edge Count Ratio",
    "Runtime Ratio",
]

for result in all_experiment_results:
    for key, value in result.items():
        if key not in ["dataset_type", "points_count", "no_centres", "run"]:
            row = [
                result["dataset_type"],
                result["points_count"],
                result["no_centres"],
                result["run"],
                key,
                value["weight"],
                value["edge_count"],
                value["runtime"],
                value.get("weight_error", 0),
                value.get("edge_count_error", 0),
                value.get("runtime_error", 0),
            ]
            table.append(row)

# Print results using tabulate
print(tabulate(table, headers, tablefmt="grid"))

# Print results for verification
print(json.dumps(all_experiment_results, indent=4))


# Function to load results from JSON file
def load_results_from_json(filename):
    with open(filename, "r") as f:
        results = [json.loads(line.strip()) for line in f]
    return results


# Load results from JSON file
results = load_results_from_json(filename)

# Prepare data for tabulate
table = []
headers = [
    "Dataset Type",
    "Points Count",
    "No Centres",
    "Run",
    "Algorithm",
    "Weight",
    "Edge Count",
    "Runtime (s)",
    "Weight Error",
    "Edge Count Ratio",
    "Runtime Ratio",
]

for result in results:
    for key, value in result.items():
        if key not in ["dataset_type", "points_count", "no_centres", "run"]:
            row = [
                result["dataset_type"],
                result["points_count"],
                result["no_centres"],
                result["run"],
                key,
                value["weight"],
                value["edge_count"],
                value["runtime"],
                value.get("weight_error", 0),
                value.get("edge_count_error", 0),
                value.get("runtime_error", 0),
            ]
            table.append(row)

# Print results using tabulate
print(tabulate(table, headers, tablefmt="grid"))

+----------------+----------------+--------------+-------+-------------+----------+--------------+---------------+----------------+--------------------+-----------------+
| Dataset Type   |   Points Count |   No Centres |   Run | Algorithm   |   Weight |   Edge Count |   Runtime (s) |   Weight Error |   Edge Count Ratio |   Runtime Ratio |
| blobs          |          10000 |            1 |     1 | kmistree    |  313.54  |        29938 |      0.90052  |         0.02   |           3340.24  |        154.24   |
+----------------+----------------+--------------+-------+-------------+----------+--------------+---------------+----------------+--------------------+-----------------+
| blobs          |          10000 |            1 |     1 | kmist       |  314.12  |        25935 |      1.01455  |         0.6    |           3855.79  |        136.904  |
+----------------+----------------+--------------+-------+-------------+----------+--------------+---------------+----------------+--------------

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)