# TSP Data Fix Verification

This notebook verifies the fix for `load_tsp_matrix` and re-calculates path costs to determine if previous results are valid (after removing the phantom "City 0").

In [1]:
import os
import sys
import pandas as pd
import numpy as np

# Add project root to sys.path
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, "../../"))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.utils.tsp_loader import load_tsp_matrix, load_tsp_matrix_broken
from src.utils.distance import route_length_fast

## 1. Load Data with New Loader
We load `Dane_TSP_48.xlsx` using the fixed loader. If correct, the shape should be (48, 48).

In [2]:
filename = "Dane_TSP_48.xlsx"
matrix = load_tsp_matrix(filename)
print(f"Loaded matrix shape: {matrix.shape}")
assert matrix.shape == (48, 48), f"Expected (48, 48), got {matrix.shape}"

Loaded matrix shape: (48, 48)


## 2. Load Old Results
We load the results CSV containing paths with "City 0".

In [4]:
# results_path = os.path.join(project_root, "results/GRASP/2025-11-17__06-27__35633_sec__results.csv")
results_path = os.path.join(project_root, "results_new/SA/2025-12-25__10-31__3942_sec__results.csv")
df_results = pd.read_csv(results_path)

# Filter for the relevant instance
df_48 = df_results[df_results['instance'] == filename].copy()

# Get top 5 best results
df_top5 = df_48.sort_values("min_cost").head(100)
df_top5[['instance', 'min_cost', 'min_route']]

Unnamed: 0,instance,min_cost,min_route
0,Dane_TSP_48.xlsx,10628.0,6-17-43-30-37-7-0-8-39-14-11-10-12-24-13-22-2-...
1,Dane_TSP_48.xlsx,10628.0,9-23-41-4-47-38-31-20-46-19-32-45-35-29-42-16-...
2,Dane_TSP_48.xlsx,10628.0,4-47-38-31-20-46-19-32-45-35-29-42-16-26-18-36...
3,Dane_TSP_48.xlsx,10628.0,21-15-40-33-28-1-25-3-34-44-9-23-41-4-47-38-31...
4,Dane_TSP_48.xlsx,10638.0,11-10-12-24-13-22-2-21-15-40-33-28-1-25-3-34-4...
...,...,...,...
95,Dane_TSP_48.xlsx,10807.0,29-35-45-14-11-32-19-46-20-31-38-47-4-41-23-9-...
96,Dane_TSP_48.xlsx,10809.0,45-35-29-42-16-26-18-36-5-27-6-17-43-30-37-7-0...
97,Dane_TSP_48.xlsx,10811.0,10-39-2-21-15-0-7-8-37-30-43-17-6-27-5-36-18-2...
98,Dane_TSP_48.xlsx,10812.0,47-4-28-1-41-25-3-34-44-9-23-31-38-20-12-24-13...


## 3. Re-calculate Costs
For each of the top 5 paths:
1. Parse the route string.
2. Remove '0'.
3. Shift indices: `new_id = old_id - 1` (assuming 0 was the header/index 0, and 1 was the first city).
4. Compute cost on the newly loaded matrix.
5. Compare.

In [5]:
print(f"{'Original Cost':<15} | {'New Cost':<15} | {'Difference':<15} | {'Valid?':<10}")
print("-" * 65)

for idx, row in df_top5.iterrows():
    original_cost = row['min_cost']
    route_str = row['min_route']
    
    # Parse route
    route = [int(x) for x in route_str.split('-')]
    
    # Remove 0
    route_no_zero = [x for x in route if x != 0]
    
    # Adjust indices: x - 1
    # If the original file had headers, and we read them as data:
    # Header row -> index 0 (which has 0 distance to self, but potentially non-zero to others if read incorrectly)
    # City 1 -> index 1
    # City 2 -> index 2
    # ...
    # So if we strip the first row/col, Old Index 1 becomes New Index 0.
    # So transformation is: new_city = old_city - 1
    
    # Verify if indices are within range
    # Old max index should be 48 (which corresponds to New Index 47).
    # If old results contain '48', then yes.
    
    new_route = np.array([x - 1 for x in route_no_zero], dtype=np.int32)
    
    # Calculate new cost
    new_cost = route_length_fast(matrix, new_route)
    
    diff = abs(original_cost - new_cost)
    is_valid = diff < 1e-6  # Allow small float diff
    
    print(f"{original_cost:<15.2f} | {new_cost:<15.2f} | {diff:<15.2f} | {is_valid}")

Original Cost   | New Cost        | Difference      | Valid?    
-----------------------------------------------------------------
10628.00        | 37767.00        | 27139.00        | False
10628.00        | 37767.00        | 27139.00        | False
10628.00        | 37767.00        | 27139.00        | False
10628.00        | 37767.00        | 27139.00        | False
10638.00        | 38576.00        | 27938.00        | False
10638.00        | 38576.00        | 27938.00        | False
10648.00        | 37476.00        | 26828.00        | False
10648.00        | 37476.00        | 26828.00        | False
10648.00        | 37476.00        | 26828.00        | False
10648.00        | 37476.00        | 26828.00        | False
10653.00        | 38601.00        | 27948.00        | False
10653.00        | 37646.00        | 26993.00        | False
10653.00        | 38601.00        | 27948.00        | False
10658.00        | 38285.00        | 27627.00        | False
10663.00        | 38455.00   