In [352]:
import pandas as pd

radial_data = pd.read_csv("radial_zipcodes.csv")
radial_data.rename(columns={'Unnamed: 0': 'zip', "0": "neighbors"}, inplace=True)

radial_populations = pd.read_csv("radial_populations.csv")
radial_populations.rename(columns={'Unnamed: 0': 'zip', "0": "population"}, inplace=True)

uzips = pd.read_csv("uzips.csv")

In [353]:
#meta functions

import copy
import numpy as np
import bisect

"""
Parameters
ordered_list int[]: an ordered list of integers
element int: an integer

Returns
true if element is contained by ordered_list, false if it does not
"""
def ordered_contains(ordered_list, element):
    index = bisect.bisect_left(ordered_list, element)
    return index < len(ordered_list) and ordered_list[index] == element
    

In [354]:
#helper functions

"""
Parameters
df DataFrame: a dataframe with a zip and neighbors column
zips int[]: a list of zipcodes to be removed 


Returns
updated_dataframe DataFrame: a copy of the dataframe with specified zip codes omitted
"""

def omit_zips(df, zips):
    new_zips = []
    new_neighbors = []
    
    for idx, row in df.iterrows():
        if row['zip'] in zips:
            continue

        new_zips.append(row['zip'])
        new_neighbors.append(",".join([x for x in row['neighbors'].split(",") if not ordered_contains(zips, float(x))]))

    updated_dataframe = pd.DataFrame({"zip": np.array(new_zips), "neighbors": np.array(new_neighbors)})

    return updated_dataframe

In [355]:
#Data Preprocessing

#Remove all Alaska, Hawaii, and Puerto Rico Zip codes

exclude_ids = ['HI', 'PR', 'AK', 'VI']
filtered_df = uzips[uzips['state_id'].isin(exclude_ids)]
non_continental_zips = filtered_df['zip'].tolist()

radial_data = omit_zips(radial_data, non_continental_zips)

In [356]:
def get_neighbors(zip):
    return [float(x) for x in radial_data[radial_data["zip"] == zip]["neighbors"].to_list()[0].split(",")]

def total_population(zips):
    return uzips[uzips['zip'].isin(zips)]["population"].sum()

def total_new_population(zip):
    neighbors = get_neighbors(zip)
    intersection = [neighbor for neighbor in neighbors if neighbor in radial_data["zip"].to_list()]
    return total_population(intersection)
    

In [357]:
def get_populations(startidx, endidx):
    radial_populations = {}
    
    for idx, zip in radial_data.iterrows():
        if idx < startidx:
            continue
        if idx > endidx:
            break
        print(idx, zip["zip"])
        radial_populations[zip["zip"]] = total_new_population(zip["zip"])
    
    return radial_populations

In [358]:
from scipy.spatial import distance

"""
Parameters:
coords int[]: coordinates in the US (miles)

Returns: the coefficient for the specified point based on currentzipcodes
"""
def calculate_coefficient(longitude, latitude):
    if len(current_zipcodes) == 0:
        return 1
    centers = uzips[uzips["zip"].isin(current_zipcodes)][["lat", "lng"]] * 69.1

    coeff = 0
    total = 0
    for idx, center in centers.iterrows():
        lng = center["lng"] * 69.1
        lat = center["lat"] * 69.1
    
        dist = distance.euclidean((longitude, latitude), (lng, lat))
        coeff += abs(abs(dist % 200) - 100)/100
        total += 1

    coeff /= total

    return coeff
        
        
    
#for each center
    #calculate ||the distance mod 200| - 100|
    #take the average

#return coefficient
    

In [359]:
def calculate_coefficients(zips):
    coordinfo = uzips[uzips["zip"].isin(zips)][["lat", "lng"]] * 69.1
    for _, row in coordinfo.iterrows():
        return calculate_coefficient(row["lng"], row["lat"])

In [360]:
def update_radial_populations(deletion_values):
    zips = []
    populations = []
    
    for idx, row  in pd.merge(radial_data, radial_populations).iterrows():
        # print(row["zip"], row["population"], row["neighbors"])
        if row["zip"] in deletion_values:
            continue

        intersection = [i for i in [float(x) for x in row['neighbors'].split(",")] if i in deletion_values]

        zips.append(row["zip"])
        populations.append(row["population"] - total_population(intersection))

    return pd.DataFrame({"zip": zips, "population": populations})
        
    #for each zip code in radial_populations
        #if it's in delete_values, delete
        #get the intersection of the zip neighbors with the deleted values
        #get the population of the intersection
        #subtract the intersection population from the total population

In [350]:
radial_populations["coefs"] = radial_populations["zip"].apply(lambda x: calculate_coefficients([x]))
radial_populations["adjusted"] = radial_populations["coefs"] * radial_populations["population"]

#taking the first instance of max population because it doesn't matter if there are repeats
max = radial_populations[radial_populations["adjusted"] == radial_populations["adjusted"].max()]["zip"].to_list()[0]
max2 = radial_populations[radial_populations["population"] == radial_populations["population"].max()]["zip"].to_list()[0]

In [351]:
radial_populations

Unnamed: 0,zip,population,coefs,adjusted
0,1001.0,9008972.0,0.489046,4.405798e+06
1,1002.0,10871185.0,0.500719,5.443404e+06
2,1003.0,10234785.0,0.486937,4.983695e+06
3,1005.0,11646593.0,0.626360,7.294957e+06
4,1007.0,11420050.0,0.544016,6.212687e+06
...,...,...,...,...
30280,99363.0,640361.0,0.500168,3.202881e+05
30281,99371.0,1186188.0,0.396768,4.706420e+05
30282,99401.0,347474.0,0.511780,1.778302e+05
30283,99402.0,430149.0,0.563685,2.424687e+05


In [None]:
current_zipcodes  = []

while len(radial_populations) > 0:
    #calculate modular coefficients
    radial_populations["coefs"] = radial_populations["zip"].apply(lambda x: calculate_coefficients([x]))
    radial_populations["adjusted"] = radial_populations["coefs"] * radial_populations["population"]
    
    #taking the first instance of max population because it doesn't matter if there are repeats
    max = radial_populations[radial_populations["adjusted"] == radial_populations["adjusted"].max()]["zip"].to_list()[0]
    
    #update radial populations
    delete_values = get_neighbors(max)
    radial_populations = update_radial_populations(delete_values)
    
    #update radial data
    radial_data = omit_zips(radial_data, get_neighbors(max))
    print(max)
    current_zipcodes.append(max)

7977.0
92059.0
95376.0
1566.0
61016.0
78950.0
20137.0
33471.0
30565.0
27247.0
76048.0
43420.0
47022.0
93311.0
98576.0
32668.0
43903.0
85132.0
80117.0
37345.0
49120.0
54769.0
62650.0
78028.0
14469.0
70452.0
37657.0
74864.0
27849.0
29911.0
19352.0
66072.0
89025.0
84340.0
38024.0
35072.0
72856.0
48433.0
47845.0
12811.0
75604.0
68067.0
78353.0
52211.0
42602.0
45696.0
71302.0
54983.0
32578.0
97343.0
4022.0
15860.0
31795.0
39745.0
33034.0
87507.0
95375.0
87940.0
24938.0
99136.0
55389.0
77331.0
65584.0
67460.0
42411.0
79511.0
28430.0
83647.0
32118.0
95428.0


In [336]:
current_zipcodes  = []

while len(radial_populations) > 0:
    #calculate modular coefficients
    radial_populations["coefs"] = radial_populations["zip"].apply(lambda x: calculate_coefficients([x]))
    radial_populations["adjusted"] = radial_populations["coefs"] * radial_populations["population"]
    
    #taking the first instance of max population because it doesn't matter if there are repeats
    max = radial_populations[radial_populations["population"] == radial_populations["population"].max()]["zip"].to_list()[0]
    
    #update radial populations
    delete_values = get_neighbors(max)
    radial_populations = update_radial_populations(delete_values)
    
    #update radial data
    radial_data = omit_zips(radial_data, get_neighbors(max))
    print(max)
    currentzipcodes.append(max)

7977.0
92780.0
60071.0
20747.0
94511.0
6247.0
33853.0
44824.0
78944.0
30572.0
75159.0
47031.0
27205.0
98533.0
34142.0
85123.0
16051.0
80109.0
49021.0
55125.0
35749.0
62640.0
27869.0
29331.0
39667.0
78675.0
14469.0
68431.0
93249.0
73061.0
31548.0
84004.0
72392.0
92332.0
31901.0
45631.0
42344.0
64756.0
13407.0
78353.0
29527.0
52586.0
47907.0
36473.0
75930.0
3832.0
24458.0
40840.0
17059.0
54950.0
97434.0
87027.0
88347.0
72950.0
99136.0
71302.0
52073.0
48747.0
39817.0
39741.0
63869.0
68062.0
65041.0
96124.0
79511.0
83647.0
30442.0
12855.0
56571.0
98256.0
73543.0
77535.0
38575.0
45302.0
92233.0
96076.0
67470.0
79025.0
24311.0
49654.0
50597.0
4411.0
83236.0
86024.0
81652.0
54876.0
19941.0
97033.0
35007.0
71646.0
74561.0
59647.0
70082.0
59874.0
28582.0
97138.0
68971.0
84710.0
58540.0
14006.0
57220.0
78014.0
32696.0
75551.0
59075.0
57782.0
82058.0
2532.0
58230.0
64642.0
76444.0
65777.0
61526.0
67844.0
59868.0
62421.0
88056.0
87420.0
93665.0
49935.0
56469.0
93561.0
99115.0
79741.0
98362.0
55952

In [None]:
#1. Master zip code tracker
#2. Calculate radial population for each zip code

#3. while we still have zipcodes
    #4. calculate punishment coefficients: 
            #average |the distance mod 200|
            #applied to the function |x-100|
            # ||the distance mod 200| - 100|
    #5. Select zip code with highest radial population multiplied by the punishment coefficient
        #6. Remove neighbors from all zip code lists
        #7. Update populations
        
    