In [11]:
import os
import numpy as np
from scipy.spatial.distance import cdist

def parse_file(file_path, n=20):
    instances = []
    with open(file_path, 'r') as f:
        for line in f:
            tokens = line.strip().split()
            if "output" in tokens:
                idx = tokens.index("output")
                tokens = tokens[:idx]  # output 이전까지만 자르기
            try:
                coords_flat = list(map(float, tokens))
                if len(coords_flat) == 2 * n:
                    coords = np.array(coords_flat).reshape((n, 2))
                    instances.append(coords)
            except ValueError:
                continue
    return instances

def parse_file_with_optimal_tour(file_path, n=20):
    instances = []
    with open(file_path, 'r') as f:
        for line in f:
            tokens = line.strip().split()
            if "output" in tokens:
                idx = tokens.index("output")
                coords_flat = list(map(float, tokens[:idx]))
                tour = [int(x) - 1 for x in tokens[idx + 1:]]
                if len(coords_flat) == 2 * n and len(tour) == n + 1:
                    coords = np.array(coords_flat).reshape((n, 2))
                    instances.append((coords, tour))
    return instances

def save_distance_matrix_with_tour_txt(coords, tour, file_path):
    n = coords.shape[0]
    dist_matrix = cdist(coords, coords)
    dist_values = []
    for i in range(n):
        for j in range(i + 1, n):
            dist_values.append(dist_matrix[i, j])
    with open(file_path, 'w') as f:
        for val in dist_values:
            f.write(f"{val} ")
        f.write("output ")
        for node in tour:
            f.write(f"{node + 1} ")  # 다시 1-indexed로 저장
        f.write("\n")

input_path = os.path.join("dataset", "hw_2_tsp_20.txt")
instances_with_tour = parse_file_with_optimal_tour(input_path)

for i, (coords, tour) in enumerate(instances_with_tour, 1):
    file_path = os.path.join("dataset", f"20-{i}.txt")
    save_distance_matrix_with_tour_txt(coords, tour, file_path)

In [1]:
import os
import time
import numpy as np

# 설정
node_counts = [10, 20, 30, 40, 50]
num_samples = 30000
seed = 1234

np.random.seed(seed)

for n in node_counts:
    # 폴더 준비
    inst_dir = f"data/TSP{n}/instances"
    os.makedirs(inst_dir, exist_ok=True)

    start_all = time.time()
    for idx in range(num_samples):
        # 1) 무작위 좌표 생성
        coords = np.random.random((n, 2)).astype(np.float32)  # [0,1]^2

        # 2) 파일명 (5자리 zero-pad)
        fname = f"{idx:05d}"

        # 3) 좌표만 저장
        np.savez_compressed(
            os.path.join(inst_dir, f"instance_{fname}.npz"),
            coords=coords
        )

        # 4) 진행 로그 (예: 1000개마다)
        if (idx + 1) % 1000 == 0:
            elapsed = time.time() - start_all
            print(f"TSP{n}: Created {idx+1}/{num_samples} instances, elapsed {elapsed:.1f}s")

    total_time = time.time() - start_all
    print(f"✔ Finished TSP{n}, generated {num_samples} instances in {total_time:.1f}s\n")

TSP10: Created 1000/30000 instances, elapsed 0.9s
TSP10: Created 2000/30000 instances, elapsed 1.8s
TSP10: Created 3000/30000 instances, elapsed 2.6s
TSP10: Created 4000/30000 instances, elapsed 3.5s
TSP10: Created 5000/30000 instances, elapsed 4.3s
TSP10: Created 6000/30000 instances, elapsed 5.0s
TSP10: Created 7000/30000 instances, elapsed 5.5s
TSP10: Created 8000/30000 instances, elapsed 6.0s
TSP10: Created 9000/30000 instances, elapsed 6.6s
TSP10: Created 10000/30000 instances, elapsed 7.1s
TSP10: Created 11000/30000 instances, elapsed 7.6s
TSP10: Created 12000/30000 instances, elapsed 8.2s
TSP10: Created 13000/30000 instances, elapsed 8.8s
TSP10: Created 14000/30000 instances, elapsed 9.3s
TSP10: Created 15000/30000 instances, elapsed 9.8s
TSP10: Created 16000/30000 instances, elapsed 10.3s
TSP10: Created 17000/30000 instances, elapsed 10.8s
TSP10: Created 18000/30000 instances, elapsed 11.3s
TSP10: Created 19000/30000 instances, elapsed 11.9s
TSP10: Created 20000/30000 instances