In [306]:
import pandas as pd

radial_data = pd.read_csv("radial_zipcodes.csv")
radial_data.rename(columns={'Unnamed: 0': 'zip', "0": "neighbors"}, inplace=True)

radial_populations = pd.read_csv("radial_populations.csv")
radial_populations.rename(columns={'Unnamed: 0': 'zip', "0": "population"}, inplace=True)

uzips = pd.read_csv("uzips.csv")

In [307]:
#meta functions

import copy
import numpy as np
import bisect

"""
Parameters
ordered_list int[]: an ordered list of integers
element int: an integer

Returns
true if element is contained by ordered_list, false if it does not
"""
def ordered_contains(ordered_list, element):
    index = bisect.bisect_left(ordered_list, element)
    return index < len(ordered_list) and ordered_list[index] == element
    

In [309]:
#helper functions

"""
Parameters
df DataFrame: a dataframe with a zip and neighbors column
zips int[]: a list of zipcodes to be removed 


Returns
updated_dataframe DataFrame: a copy of the dataframe with specified zip codes omitted
"""

def omit_zips(df, zips):
    new_zips = []
    new_neighbors = []
    
    for idx, row in df.iterrows():
        if row['zip'] in zips:
            continue

        new_zips.append(row['zip'])
        new_neighbors.append(",".join([x for x in row['neighbors'].split(",") if not ordered_contains(zips, float(x))]))

    updated_dataframe = pd.DataFrame({"zip": np.array(new_zips), "neighbors": np.array(new_neighbors)})

    return updated_dataframe

In [310]:
#Data Preprocessing

#Remove all Alaska, Hawaii, and Puerto Rico Zip codes

exclude_ids = ['HI', 'PR', 'AK', 'VI']
filtered_df = uzips[uzips['state_id'].isin(exclude_ids)]
non_continental_zips = filtered_df['zip'].tolist()

radial_data = omit_zips(radial_data, non_continental_zips)

In [311]:
def get_neighbors(zip):
    return [float(x) for x in radial_data[radial_data["zip"] == zip]["neighbors"].to_list()[0].split(",")]

def total_population(zips):
    return uzips[uzips['zip'].isin(zips)]["population"].sum()

def total_new_population(zip):
    neighbors = get_neighbors(zip)
    intersection = [neighbor for neighbor in neighbors if neighbor in radial_data["zip"].to_list()]
    return total_population(intersection)
    

In [312]:
def get_populations(startidx, endidx):
    radial_populations = {}
    
    for idx, zip in radial_data.iterrows():
        if idx < startidx:
            continue
        if idx > endidx:
            break
        print(idx, zip["zip"])
        radial_populations[zip["zip"]] = total_new_population(zip["zip"])
    
    return radial_populations

In [320]:
from scipy.spatial import distance

"""
Parameters:
coords int[]: coordinates in the US (miles)

Returns: the coefficient for the specified point based on currentzipcodes
"""
def calculate_coefficient(longitude, latitude):
    if len(current_zipcodes) == 0:
        return 1
    centers = uzips[uzips["zip"].isin(current_zipcodes)][["lat", "lng"]] * 69.1

    coeff = 0
    total = 0
    for idx, center in centers.iterrows():
        lng = center["lng"] * 69.1
        lat = center["lat"] * 69.1
    
        dist = distance.euclidean((longitude, latitude), (lng, lat))
        coeff += abs(abs(dist % 200) - 100)/100
        total += 1

    coeff /= total

    return coeff
        
        
    
#for each center
    #calculate ||the distance mod 200| - 100|
    #take the average

#return coefficient
    

In [321]:
def calculate_coefficients(zips):
    coordinfo = uzips[uzips["zip"].isin(zips)][["lat", "lng"]] * 69.1
    for _, row in coordinfo.iterrows():
        return calculate_coefficient(row["lng"], row["lat"])

In [322]:
def update_radial_populations(deletion_values):
    zips = []
    populations = []
    
    for idx, row  in pd.merge(radial_data, radial_populations).iterrows():
        # print(row["zip"], row["population"], row["neighbors"])
        if row["zip"] in deletion_values:
            continue

        intersection = [i for i in [float(x) for x in row['neighbors'].split(",")] if i in deletion_values]

        zips.append(row["zip"])
        populations.append(row["population"] - total_population(intersection))

    return pd.DataFrame({"zip": zips, "population": populations})
        
    #for each zip code in radial_populations
        #if it's in delete_values, delete
        #get the intersection of the zip neighbors with the deleted values
        #get the population of the intersection
        #subtract the intersection population from the total population

In [None]:
current_zipcodes  = []

while len(radial_populations) > 0:
    #calculate modular coefficients
    radial_populations["coefs"] = radial_populations["zip"].apply(lambda x: calculate_coefficients([x]))
    radial_populations["adjusted"] = radial_populations["coefs"] * radial_populations["population"]
    
    #taking the first instance of max population because it doesn't matter if there are repeats
    max = radial_populations[radial_populations["adjusted"] == radial_populations["adjusted"].max()]["zip"].to_list()[0]
    
    
    #update radial populations
    delete_values = get_neighbors(max)
    radial_populations = update_radial_populations(delete_values)
    
    #update radial data
    radial_data = omit_zips(radial_data, get_neighbors(max))
    print(max)
    currentzipcodes.append(max)

In [None]:
#1. Master zip code tracker
#2. Calculate radial population for each zip code

#3. while we still have zipcodes
    #4. calculate punishment coefficients: 
            #average |the distance mod 200|
            #applied to the function |x-100|
            # ||the distance mod 200| - 100|
    #5. Select zip code with highest radial population multiplied by the punishment coefficient
        #6. Remove neighbors from all zip code lists
        #7. Update populations
        
    