# Normalize Dataset

If Dataset of paths generated is not normalized, you can normalize it using this script. Normalizing the data before using it to train the diffusion model reduces train time. Normalizing live in the data loader slows down training.

In [13]:
import os
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

In [3]:
%cd Diffusion_Planning

[Errno 2] No such file or directory: 'Diffusion_Planning'
/home/jason/Desktop/Jason/Diffusion/Diffusion_Planning/Modeling


In [11]:
!ls

Captures     Modeling	   README.md	     Results
Checkpoints  PathPlanning  requirements.txt


In [12]:
def normalize_and_save(csv_file, map_dir, output_csv, single_map=False):
    """
    Normalize path coordinates and save to a new CSV file, retaining all other columns.
    Args:
    - csv_file (str): Path to the input CSV file.
    - map_dir (str): Directory containing the map files.
    - output_csv (str): Path to the output CSV file.
    """
    # Load the dataset
    path_data = pd.read_csv(csv_file)
    pair_ids = path_data['pair_id'].unique()

    normalized_rows = []

    for pair_id in pair_ids:

        # If paths are using all the same map
        if single_map:
            pair_id = 1
            
        # Load map data
        map_path = os.path.join(map_dir, f'map_{pair_id}.json')
        with open(map_path, 'r') as f:
            map_data = json.load(f)

        # Determine map dimensions
        map_height = len(map_data)
        map_width = len(map_data[0]) if map_height > 0 else 0
        center_x, center_y = map_width // 2, map_height // 2

        # Get all rows for the current pair_id
        pair_data = path_data[path_data['pair_id'] == pair_id]

        # Normalize paths
        for _, row in pair_data.iterrows():
            x_norm = (row['x'] - center_x) / (map_width / 2)
            y_norm = (row['y'] - center_y) / (map_height / 2)

            # Append normalized row
            normalized_rows.append({
                "pair_id": row['pair_id'],
                "algo": row['algo'],
                "x": x_norm,
                "y": y_norm,
                "planning_time": row['planning_time']  # Preserve planning_time
            })

    # Save to a new CSV
    normalized_df = pd.DataFrame(normalized_rows)
    normalized_df.to_csv(output_csv, index=False)

dataset = 4

# Usage
normalize_and_save(
    csv_file=f"PathPlanning/dataset_{dataset}/path_data/path_data.csv",
    map_dir=f"PathPlanning/dataset_{dataset}/map_data",
    output_csv=f"PathPlanning/dataset_{dataset}/path_data/normalized_path_data.csv", 
    single_map=True, # if you have a single map instead of a different map for each path.
)

Now you should see a `normalized_path_data.csv` that you can use when training your Diffusion model