In [None]:
from itertools import product
import math
import pandas as pd
from IPython.display import display, Markdown
pd.options.display.max_rows = 999
INPUT = "input.TXT"
EXAMPLE = "example.txt"

In [None]:
def printm(in_str):
    """Helper function to output strings as markdown"""
    display(Markdown(in_str))

In [None]:
def read_points(filename):
    """Input is just a comma separated text file, so we can read it in using read_csv
    
    Parameters
    ----------
    filename: str or Path
        file containing comma separated coordinates to be read in
    
    Returns
    -------
    df: DataFrame
        Dataframe with columns corresponding to x and y coordinates
    """
    df = pd.read_csv(filename, sep=",", header=None, names=["x", "y"])
    return df

In [None]:
def find_corners(points):
    """lower left and upper right of the space we're considering
    
    Parameters
    ----------
    points: DataFrame
        Dataframe with columns corresponding to x and y coordinates
    
    Returns
    -------
    (point_min, point_max): tuple of x,y coordinates
    """
    point_min = points["x"].min(), points["y"].min()
    point_max = points["x"].max(), points["y"].max()
    return point_min, point_max

In [None]:
def search_space(points):
    """Search space is all points within the rectangle encompassing all
    of our candidate points
    Parameters
    ----------
    points: DataFrame
        Dataframe with columns corresponding to x and y coordinates
    
    Returns
    -------
    df: DataFrame
        Dataframe with columns corresponding to x and y coordinates
        also has a boolean column which indicates if a point is at
        the boundary of the search space
    """
    point_min, point_max = find_corners(points)
    x_min = point_min[0]
    x_max = point_max[0]
    y_min = point_min[1]
    y_max = point_max[1]
    xrange = range(x_min, x_max + 1)
    yrange = range(y_min, y_max + 1)
    df = pd.DataFrame(index=pd.MultiIndex.from_product([xrange, yrange], names=["x", "y"])).reset_index()
    boundary_mask = df["x"].isin([x_min, x_max]) | df["y"].isin([y_min, y_max])
    df["boundary"] = False
    df.loc[boundary_mask, "boundary"] = True
    return df

In [None]:
def find_distance(df, x, y):
    """Calculate the manhatten distance to a point
    
    Parameters
    ----------
    df: DataFrame
        Dataframe with columns corresponding to x and y coordinates
    x: int
        x coordinate of the point to calculate distance to for every point in the dataframe
    y: int
        y coordinate of the point to calculate distance to for every point in the dataframe
    
    Returns
    pd.Series:
        Series of the same length as df, with values corresponding to the distance to the x,y coordinate
    """
    df[(x, y)] = abs(df["x"] - x) + abs(df["y"] - y)
    return df[(x, y)]


In [None]:
def solution_part1(search_space, candidates):
    """Solve the puzzle
    
    Calculate the manhatten distance to each candidate for all points in the search space
    Compute the minimum distance from all the candidate points to each point in the search space
    Eliminate points that are an equal minimum distance to more than one candidate point
    Drop any candidate points that are closest to any boundary point
    Total the number of points in the search space the remaining candidates are closest to
    Return the maximum point
    """
    df = search_space.copy()
    for x, y in candidates.values:
        df[str((x, y))] = find_distance(search_space, x, y)
    df["min_dist"] = df.drop(columns=["x", "y", "boundary"]).min(axis="columns")
    candidate_cols = df.drop(columns=["x", "y", "boundary", "min_dist"]).columns
    for col in candidate_cols:
        df[col] = df[col] == df["min_dist"]
    df["num_mins"] = df[candidate_cols].sum(axis="columns")
    mask = df["num_mins"] == 1
    df = df.loc[mask].copy()
    for col in candidate_cols:
        if df.loc[df["boundary"], col].sum() > 0:
            df = df.drop(columns=[col])
    candidate_cols = df.drop(columns=["x", "y", "boundary", "min_dist", "num_mins"]).columns
    result = df[candidate_cols].sum().max()              
    return result

In [None]:
points = read_points(EXAMPLE)
ssdf = search_space(points)
solution_part1(ssdf, points)

In [None]:
points = read_points(INPUT)
ssdf = search_space(points)

In [None]:
def find_distance(df, x, y):
    """Calculate the manhatten distance to a point
    
    Parameters
    ----------
    df: DataFrame
        Dataframe with columns corresponding to x and y coordinates
    x: int
        x coordinate of the point to calculate distance to for every point in the dataframe
    y: int
        y coordinate of the point to calculate distance to for every point in the dataframe
    
    Returns
    pd.Series:
        Series of the same length as df, with values corresponding to the distance to the x,y coordinate
    """
    return abs(df["x"] - x) + abs(df["y"] - y)

In [None]:
ssdf["total_dist"] = 0
for x, y in points.values:
    ssdf["total_dist"] += find_distance(ssdf, x, y)

In [None]:
ssdf.loc[ssdf["total_dist"] < 10_000].shape[0]

Should really clean up the part 2 solution but we were pressed for time