In [47]:
import pandas as pd
import QSLomuto, QSHoare, QSDijkstra, QSDoublePivot
import DataGenerator
import numpy as np  
import sys

In [48]:
df = pd.DataFrame()

In [None]:
# sizes = [100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 25000, 50000, 75000, 10**5, 10**6]
schemes = [QSLomuto.QuickSortLomuto, QSHoare.QuickSortHoare, QSDoublePivot.QuickSortDoublePivot, QSDijkstra.QuickSortDijkstra]
data_types = [DataGenerator.identical, DataGenerator.sorted, DataGenerator.random_data, DataGenerator.almost_sorted, DataGenerator.reverse_order, DataGenerator.triangular]
pivots = ["last", "random", "median_of_three", "median_of_three_random"]


In [None]:
sizes = [100, 500, 1000, 5000, 10000, 15000, 20000, 25000]

In [52]:
sys.setrecursionlimit(1000000)

In [None]:
results = []

for data_gen_func in data_types:
    data_type_name = data_gen_func.__name__  
    for size in sizes:
        data = data_gen_func(size)  
        for scheme in schemes:
            scheme_name = scheme.__name__  

            if scheme_name in ["QuickSortLomuto", "QuickSortHoare"]:
                for pivot in pivots:
                    print(data_type_name, size, scheme_name, pivot)
                    metrics = {"time": [], "swaps": [], "comparisons": [], "memory_used": []}

                    for _ in range(3):
                        data_copy = data.copy()
                        qs = scheme(pivot_type=pivot)
                        qs.qsort(data_copy)
                        print(qs.exec_time)
                        # Collect metrics
                        metrics["time"].append(qs.exec_time)
                        metrics["swaps"].append(qs.swaps)
                        metrics["comparisons"].append(qs.comparisons)
                        metrics["memory_used"].append(qs.memory_used)

                    results.append({
                        "scheme": scheme_name,
                        "pivot": pivot,
                        "data_type": data_type_name,
                        "size": size,
                        "time": np.mean(metrics["time"]),
                        "time_min": min(metrics["time"]),
                        "time_max": max(metrics["time"]),
                        "swaps": int(np.mean(metrics["swaps"])),
                        "comparisons": int(np.mean(metrics["comparisons"])),
                        "memory_used": int(np.mean(metrics["memory_used"])),
                    })
            else:
                pivot_type = "double" if scheme_name == "QuickSortDoublePivot" else "median_of_three_random"
                metrics = {"time": [], "swaps": [], "comparisons": [], "memory_used": []}
                print(data_type_name, size, scheme_name)
                for _ in range(3):  
                    data_copy = data.copy()
                    qs = scheme()
                    qs.qsort(data_copy)
                    print(qs.exec_time)

                    metrics["time"].append(qs.exec_time)
                    metrics["swaps"].append(qs.swaps)
                    metrics["comparisons"].append(qs.comparisons)
                    metrics["memory_used"].append(qs.memory_used)

                results.append({
                    "scheme": scheme_name,
                    "pivot": pivot_type,
                    "data_type": data_type_name,
                    "size": size,
                    "time": np.mean(metrics["time"]),
                    "time_min": min(metrics["time"]),
                    "time_max": max(metrics["time"]),
                    "swaps": int(np.mean(metrics["swaps"])),
                    "comparisons": int(np.mean(metrics["comparisons"])),
                    "memory_used": int(np.mean(metrics["memory_used"])),
                })


identical 15000 QuickSortLomuto last
7.603539943695068
7.546799182891846
7.303220510482788
identical 15000 QuickSortLomuto random
7.31971549987793
7.6051037311553955
7.867713212966919
identical 15000 QuickSortLomuto median_of_three
8.004558801651001
8.23539924621582
7.463460445404053
identical 15000 QuickSortLomuto median_of_three_random
7.35411810874939
7.386388301849365
7.895642042160034
identical 15000 QuickSortHoare last
0.07076621055603027
0.07529187202453613
0.05826210975646973
identical 15000 QuickSortHoare random
0.06589007377624512
0.07387590408325195
0.07742595672607422
identical 15000 QuickSortHoare median_of_three
0.07716679573059082
0.06249809265136719
0.061728477478027344
identical 15000 QuickSortHoare median_of_three_random
0.08958244323730469
0.08328533172607422
0.08031153678894043
identical 15000 QuickSortDoublePivot
5.9496636390686035
5.66797399520874
6.190120697021484
identical 15000 QuickSortDijkstra
0.0009472370147705078
0.0009243488311767578
0.0009207725524902344


scheme, pivot, data_type, size, time, swaps, comparisons, memory_used

In [54]:
df = pd.DataFrame(results)

In [None]:
df.to_csv("sorting_experiments.csv", index=False)

print("Experiment completed and results saved to 'sorting_experiments.csv'")

Experiment completed and results saved to 'sorting_experiments.csv'


In [60]:
sizes = [10**5, 10**6, 10**7, 10**8]
schemes = [QSDijkstra.QuickSortDijkstra]
data_types = [DataGenerator.identical, DataGenerator.sorted, DataGenerator.random_data, DataGenerator.almost_sorted, DataGenerator.reverse_order, DataGenerator.triangular]

In [None]:
results_dijkstra = []
qs = QSDijkstra.QuickSortDijkstra()
for data_gen_func in data_types:
    data_type_name = data_gen_func.__name__  
    for size in sizes:
        data = data_gen_func(size)
        print(data_type_name, size, scheme_name, pivot)
        metrics = {"time": [], "swaps": [], "comparisons": [], "memory_used": []}  
        for _ in range(3):
            data_copy = data.copy()
            qs.qsort(data_copy)
            print(qs.exec_time)
            metrics["time"].append(qs.exec_time)
            metrics["swaps"].append(qs.swaps)
            metrics["comparisons"].append(qs.comparisons)
            metrics["memory_used"].append(qs.memory_used)
            
        results_dijkstra.append({
                    "scheme": scheme_name,
                    "pivot": pivot_type,
                    "data_type": data_type_name,
                    "size": size,
                    "time": np.mean(metrics["time"]),
                    "time_min": min(metrics["time"]),
                    "time_max": max(metrics["time"]),
                    "swaps": int(np.mean(metrics["swaps"])),
                    "comparisons": int(np.mean(metrics["comparisons"])),
                    "memory_used": int(np.mean(metrics["memory_used"])),
                })


identical 100000 QuickSortDijkstra median_of_three_random
0.008156538009643555
0.013595104217529297
0.02052927017211914
identical 1000000 QuickSortDijkstra median_of_three_random
0.09543204307556152
0.08379888534545898
0.11656737327575684
identical 10000000 QuickSortDijkstra median_of_three_random
1.047454595565796
0.7433209419250488
0.6762449741363525
identical 100000000 QuickSortDijkstra median_of_three_random
7.192096948623657
7.325201034545898
7.2522873878479
sorted 100000 QuickSortDijkstra median_of_three_random
0.7740552425384521
0.5553083419799805
0.6094996929168701
sorted 1000000 QuickSortDijkstra median_of_three_random
6.003878355026245
5.766376495361328
5.8196351528167725
sorted 10000000 QuickSortDijkstra median_of_three_random
74.32002377510071
67.43062114715576
66.28761553764343
sorted 100000000 QuickSortDijkstra median_of_three_random
827.0138649940491
767.2608275413513
746.1222357749939
random_data 100000 QuickSortDijkstra median_of_three_random
0.20147490501403809
0.2045

In [None]:
df_dijkstra = pd.DataFrame(results_dijkstra)

In [None]:
df_dijkstra.to_csv("dijkstra_big_numbers.csv", index=False)

In [91]:
data = DataGenerator.random_data(10)
qs_d =QSDoublePivot.QuickSortDoublePivot()
qs_d.qsort(data)
data

[206, 206, 267, 325, 484, 654, 662, 674, 748, 929]