In [80]:
import json
import math

In [65]:
def read_data_from_file(filename):
    """
    Reads data from a file and returns it as a string.
    """
    with open(filename, 'r') as file:
        return file.read()

In [66]:
def parse_data(data):
    """
    Parses the provided data into key-value pairs of zip code and borough.
    Returns a dictionary of zip code: borough pairs, and lists of zip codes and boroughs.
    """
    zip_borough_dict = {}
    zip_codes = []
    boroughs = set()

    for line in data.splitlines():
        zip_code, borough = line.split("\t")
        zip_code = int(zip_code)  # Convert zip code to integer
        zip_borough_dict[zip_code] = borough
        zip_codes.append(zip_code)
        boroughs.add(borough)

    # Sort zip codes
    zip_codes.sort()

    # Create a sorted dictionary by zip code (key)
    sorted_zip_borough_dict = {k: zip_borough_dict[k] for k in sorted(zip_borough_dict)}

    return sorted_zip_borough_dict, zip_codes, list(boroughs)

In [67]:
def parse_neighborhood_data(data):
    """
    Parses the provided data into key-value pairs of zip code and [UHF, neighborhood name].
    Returns a dictionary of these pairs.
    """
    neighborhood_dict = {}
    all_zip_codes = []

    for line in data.splitlines():
        parts = line.split(', ')
        uhf, neighborhood = parts[0], parts[1]
        zip_codes = parts[2:]

        for zip_code in zip_codes:
            neighborhood_dict[zip_code] = [uhf, neighborhood]
            all_zip_codes.append(zip_code)

    return neighborhood_dict, all_zip_codes

In [73]:
def parse_zip_code_data(text):
    result = {}
    lines = text.split('\n')
    for line in lines:
        if line.strip():  # Check if line is not empty
            zip_code, latitude, longitude = [item.strip() for item in line.split(',')]
            if latitude not in result:
                result[latitude] = {}
            result[latitude][longitude] = zip_code
    return result

In [68]:
def save_to_json(data, filename):
    """
    Saves the provided data to a JSON file.
    """
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)


In [69]:
file_path = "all_zip.txt"
data = read_data_from_file(file_path)
zip_borough_dict, zip_codes, boroughs = parse_data(data)
save_to_json(zip_borough_dict, "zip_borough.json")

In [70]:
file_path = "all_uhf.txt"
data = read_data_from_file(file_path)
neighborhood_dict, zip_codes = parse_neighborhood_data(data)
save_to_json(neighborhood_dict, "neighborhood_data.json")

In [74]:
file_path = "all_coordinates.txt"
file_content = read_data_from_file(file_path)
parsed_data = parse_zip_code_data(file_content)
save_to_json(parsed_data, "coordinates.json")

In [75]:
def haversine(coord1, coord2):
    # Radius of the Earth in kilometers
    R = 6371.0

    lat1, lon1 = coord1
    lat2, lon2 = coord2

    # Convert coordinates from degrees to radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance in kilometers
    distance = R * c
    return distance

In [116]:
from scipy.spatial import KDTree

def create_zip_code_kd_tree(file_content):
    coordinates = []
    zip_codes = {}
    lines = file_content.split('\n')
    for line in lines:
        if line.strip():  # Check if line is not empty
            zip_code, latitude, longitude = [item.strip() for item in line.split(',')]
            coords = (float(latitude), float(longitude))
            coordinates.append(coords)
            zip_codes[coords] = zip_code

    # Create a k-d tree for coordinates
    tree = KDTree(coordinates)
    return tree, zip_codes

def get_closest_zip_code(tree, zip_codes, latitude, longitude):
    query_coords = (latitude, longitude)
    # Find the index of the closest point in the k-d tree
    _, idx = tree.query(query_coords)
    closest_coords = tree.data[idx]
    return zip_codes[tuple(closest_coords)]

# Example Usage
file_content = read_data_from_file(file_path)
tree, zip_codes = create_zip_code_kd_tree(file_content)
zip_code = get_closest_zip_code(tree, zip_codes, 40.76394, -74.97903)
print(zip_code)


10988


In [113]:
# zip_code = get_closest_zip_code(zip_code_map, 40.76394, -74.97903)
# print(zip_code)

10307
