In [85]:
import openpyxl
import pandas as pd
import geojson

In [86]:
workbook = openpyxl.load_workbook('co-est2023-pop.xlsx')
worksheet = workbook.active

In [87]:
start_row = 6
end_row = 3149
columns = ['A', 'B', 'C', 'D', 'E', 'F']

data = {col: [] for col in columns}

for row in range(start_row, end_row + 1):
    for col in columns:
        cell_value = worksheet[col + str(row)].value
        data[col].append(cell_value)

df = pd.DataFrame(data)

In [88]:
new_column_names = ['County', 'Estimates Base', '2020', '2021', '2022', '2023']
df.columns = new_column_names

In [89]:
county_names = df['County'].apply(lambda x: (x.split(',')[0][1:]).split()[0])

df['County'] = county_names
df

Unnamed: 0,County,Estimates Base,2020,2021,2022,2023
0,Autauga,58809,58915,59203,59726,60342
1,Baldwin,231768,233227,239439,246531,253507
2,Barbour,25229,24969,24533,24700,24585
3,Bibb,22301,22188,22359,21986,21868
4,Blount,59130,59107,59079,59516,59816
...,...,...,...,...,...,...
3139,Sweetwater,42271,42197,41626,41374,41249
3140,Teton,23323,23379,23605,23297,23232
3141,Uinta,20445,20457,20681,20727,20745
3142,Washakie,7679,7657,7719,7724,7710


In [90]:
with open('counties.geojson') as f:
    gj = geojson.load(f)
features = gj['features']
features
dic = {
    "County": [],
    "Coordinates": []
}
for i in range(len(features)):
    coor = features[i]['geometry']['coordinates']
    nm = features[i]['properties']['NAME']
    dic['Coordinates'].append(coor)
    dic['County'].append(nm)
cor = pd.DataFrame(dic)
cor

Unnamed: 0,County,Coordinates
0,San Francisco,"[[[-122.511983, 37.77113], [-122.465396, 37.80..."
1,Suffolk,"[[[-71.191155, 42.283059], [-71.156887, 42.330..."
2,Banner,"[[[-104.052825, 41.697954], [-103.370391, 41.6..."
3,Vance,"[[[-78.497783, 36.514477], [-78.457278, 36.541..."
4,Sherman,"[[[-102.162463, 36.500326], [-102.032339, 36.5..."
...,...,...
3215,Pike,"[[[-91.460442, 39.450722], [-91.182876, 39.598..."
3216,Adjuntas,"[[[-66.832736, 18.22799], [-66.817271, 18.2303..."
3217,Hamilton,"[[[-93.971583, 42.558139], [-93.499485, 42.557..."
3218,Carroll,"[[[-95.090851, 42.210405], [-94.858412, 42.209..."


In [92]:
duplicate_counties = df[df.duplicated(subset='County', keep=False)]['County'].unique()
suffix_count = {}
for county in duplicate_counties:
    suffix_count[county] = 1
for i, row in df.iterrows():
    county = row['County']
    if county in duplicate_counties:
        suffix = suffix_count[county]
        df.at[i, 'County'] = f"{county}{suffix}"
        suffix_count[county] += 1

d_c = cor[cor.duplicated(subset='County', keep=False)]['County'].unique()
s_c = {}
for c in d_c:
    s_c[c] = 1
for i, row in cor.iterrows():
    c = row['County']
    if c in d_c:
        s = s_c[c]
        cor.at[i, 'County'] = f"{c}{s}"
        s_c[c] += 1

In [96]:
merged_df = pd.merge(df, cor, on='County', )
merged_df

Unnamed: 0,County,Estimates Base,2020,2021,2022,2023,Coordinates
0,Autauga,58809,58915,59203,59726,60342,"[[[-86.917595, 32.664169], [-86.71339, 32.6617..."
1,Baldwin1,231768,233227,239439,246531,253507,"[[[-88.026319, 30.753358], [-87.944546, 30.827..."
2,Barbour1,25229,24969,24533,24700,24585,"[[[-85.735732, 31.624493], [-85.66623, 31.7728..."
3,Bibb1,22301,22188,22359,21986,21868,"[[[-87.421936, 33.003379], [-87.318539, 33.006..."
4,Blount1,59130,59107,59079,59516,59816,"[[[-86.963358, 33.858221], [-86.924387, 33.909..."
...,...,...,...,...,...,...,...
2923,Sweetwater,42271,42197,41626,41374,41249,"[[[-110.053708, 42.270744], [-109.496675, 42.2..."
2924,Teton3,23323,23379,23605,23297,23232,"[[[-111.398781, 43.92289], [-111.187256, 43.93..."
2925,Uinta,20445,20457,20681,20727,20745,"[[[-111.046637, 41.251627], [-111.0466, 41.360..."
2926,Washakie,7679,7657,7719,7724,7710,"[[[-108.550562, 44.168455], [-107.541198, 44.1..."
