In [None]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist  # scipy package

def validate_coordinates_format(coordinates):
    # check if all entries are in the "x, y" format and contain only numbers
    for coord in coordinates:
        try:
            x, y = map(float, coord.split(","))  # attempt to split and convert to floats
        except ValueError:
            raise ValueError(f"Invalid coordinate format: {coord}. Ensure it is in 'x, y' format with only numbers.")


# 2 csv files are required
# the leading column must be the list of locations for each

def load_coord(file_path):
    # load the csv and extract the first column
    df = pd.read_csv(file_path)
    
    # check if the leading column exists
    if df.shape[1] < 1:
        raise ValueError(f"The file {file_path} does not contain enough columns")

    # extract the first column as a list of coordinates
    coordinates = df.iloc[:, 0].astype(str).tolist()

    # validate coordinates format
    validate_coord(coordinates)

    # Convert to a list of [x, y] and then to a numpy array
    return np.array([list(map(float, coord.split(","))) for coord in coordinates])

# load coordinates from csv files
file1 = "file1.csv"  # replace with csv file path
file2 = "file2.csv"  # replace with csv file path

try:
    a1 = load_coord(file1)
    a2 = load_coord(file2)

    # calculate pairwise euclidean distance
    distance_matrix = cdist(a1, a2) 

    print("Distance Matrix:")
    print(distance_matrix)

    # find the index of the smallest value => the min distance - along each row of the distance matrix
    closest_indices = np.argmin(distance_matrix, axis=1)

    # prepare results for csv output
    results = []
    for i, idx in enumerate(closest_indices):
        closest_point = a2[idx]
        distance = distance_matrix[i, idx]
        result = {
            "Point in a1": f"{a1[i][0]}, {a1[i][1]}",
            "Closest Point in a2": f"{closest_point[0]}, {closest_point[1]}",
            "Distance": round(distance, 2)
        }
        results.append(result)
        print(result)

    # save results to a csv
    results_df = pd.DataFrame(results)
    results_df.to_csv("results.csv", index=False)
    print("Results saved to results.csv")

except ValueError as e:
    print(f"Error: {e}")
