In [1]:
import pandas as pd
import os
import geopandas as gpd
import json
from itertools import chain
import string
import folium

In [2]:
state = 'Pennsylvania'

###### Rawdata is Shapefile ######
# citydata = 'shp_bdry_mn_city_township_unorg'
# countydata = 'mn_county_boundaries'

###### Rawdata is GeoJSON ######
citydata = 'PaMunicipalities2020_01'
countydata = 'PaCounty2020_01'

output = os.path.join('geojson', state, state + '.json')

In [3]:
def conversion(inputfile):
    ## convert file to json 
    inputfile = json.loads(inputfile.to_json())
    ## display features properties as dataframe
    df = pd.json_normalize(inputfile['features'])
    return df

###### Rawdata is Shapefile ######
def shp_to_df(rawdata):
    path = os.path.join('geojson', state, rawdata)
    shp = gpd.read_file(path, driver = 'shapefile').to_crs('EPSG:4326')
    df = conversion(shp)
    return df

# **************** uncomment **********************
# df_city = shp_to_df(citydata)
# df_county = shp_to_df(countydata)
# *************************************************

###### Rawdata is GeoJSON ######
def geojson_to_df(rawdata):
    path = os.path.join('geojson', state, rawdata + '.geojson')
    geojson = gpd.read_file(path).to_crs('EPSG:4326')
    df = conversion(geojson)
    return df

# **************** uncomment **********************
df_city = geojson_to_df(citydata)
df_county = geojson_to_df(countydata)
# *************************************************

In [4]:
def extract_keyword(name):
    if name == 'type':
        return input(f'Please enter the city class name: ').strip()
    else:
        return input(f'Please enter the column storing {name} name: ').strip()

In [5]:
df_city.head()

Unnamed: 0,id,type,properties.CLASS_OF_M,properties.COUNTY,properties.FED_AID_UR,properties.FED_ID_NUM,properties.FIPS_AREA_,properties.FIPS_COUNT,properties.FIPS_MUN_C,properties.FIPS_MUN_P,properties.FIPS_NAME,properties.FIPS_SQ_MI,properties.GEOMETRY_A,properties.GEOMETRY_L,properties.GPID,properties.MSLINK,properties.MUNICIPAL1,properties.MUNICIPAL_,geometry.type,geometry.coordinates
0,0,Feature,2TWP,8,2,24-6002456,0,15,3400,5251,000000,42.8,210896800.0,95271.033189,7839,1826,ATHENS,204,Polygon,"[[[-76.55772697441479, 42.00006298590132], [-7..."
1,1,Feature,2TWP,35,4,23-6003189,0,69,140,1743,,4.4,21078240.0,24460.028459,7840,830,WAVERLY,201,Polygon,"[[[-75.71561543007037, 41.51387570821942], [-7..."
2,2,Feature,2TWP,8,1,23-6001376,0,15,11168,2143,,37.8,173067400.0,71078.128553,7841,1828,CANTON,207,Polygon,"[[[-76.88385285446502, 41.66903861309195], [-7..."
3,3,Feature,2TWP,8,1,25-1343885,0,15,15376,1196,,41.3,191412400.0,67003.726019,7844,1829,COLUMBIA,208,Polygon,"[[[-76.9117834068012, 41.89672991341941], [-76..."
4,4,Feature,BORO,35,4,24-6000604,281,69,38096,2169,SCRAN/WLKS-BARRE,0.9,3584202.0,9116.84506,7850,813,JERMYN,408,Polygon,"[[[-75.55179375820316, 41.53296083433373], [-7..."


In [6]:
###### Dataframe includes 'County Name', 'City Name', 'Type' ######

# **************** uncomment **********************
# def extract_city(df):
#     countyname = extract_keyword('county')
#     cityname = extract_keyword('city')
#     classname = extract_keyword('class')
#     citytype = extract_keyword('type')
#     df = df[df[classname] == citytype]
#     df = df[[cityname, countyname, 'geometry.coordinates']].rename(
#             columns={cityname:'City', countyname:'County', 'geometry.coordinates':'Coordinates'})   
#     ## capitalize the first letter of each word
#     df['City'] = df['City'].apply(lambda row: string.capwords(str(row)))
#     df['County'] = df['County'].apply(lambda row: string.capwords(str(row)))
#     df['State'] = state 

#     return df
# *************************************************



###### Dataframe includes 'City Name', 'Type' ######

# **************** uncomment **********************
def extract_city(df):
    cityname = extract_keyword('city')
    classname = extract_keyword('class')
    citytype = extract_keyword('type')
    df = df[df[classname] == citytype]
    df = df[[cityname, 'geometry.coordinates']].rename(
            columns={cityname:'City', 'geometry.coordinates':'Coordinates'})   
    ## capitalize the first letter of each word
    df['City'] = df['City'].apply(lambda row: string.capwords(str(row)))
    df['State'] = state  

    return df
# *************************************************



###### Dataframe includes 'City Name' ######

# **************** uncomment **********************
# def extract_city(df):
#     cityname = extract_keyword('city')
#     df = df[[cityname, 'geometry.coordinates']].rename(
#             columns={cityname:'City', 'geometry.coordinates':'Coordinates'})   
#     ## capitalize the first letter of each word
#     df['City'] = df['City'].apply(lambda row: string.capwords(str(row)))
#     df['State'] = state
    
#     return df
# *************************************************

df_city = extract_city(df_city)

Please enter the column storing city name: properties.MUNICIPAL1
Please enter the column storing class name: properties.CLASS_OF_M
Please enter the city class name: CITY


In [7]:
def round_coordinates(l, precision):
    def round_element(e):
        if isinstance(e, list):
            return round_coordinates(e, precision)
        else:
            return round(e, precision)
    return [round_element(e) for e in l]

df_city['Coordinates'] = round_coordinates(df_city['Coordinates'], 4)
df_city.head()

Unnamed: 0,City,Coordinates,State
84,Scranton,"[[[-75.6921, 41.3719], [-75.6947, 41.3735], [-...",Pennsylvania
96,Warren,"[[[-79.1188, 41.8251], [-79.1192, 41.8252], [-...",Pennsylvania
206,Bethlehem,"[[[-75.4077, 40.6471], [-75.4065, 40.6461], [-...",Pennsylvania
233,Connellsville,"[[[-79.5719, 40.0281], [-79.5725, 40.0277], [-...",Pennsylvania
242,Johnstown,"[[[-78.9515, 40.3553], [-78.9516, 40.3554], [-...",Pennsylvania


In [8]:
# create_geojson_features 
def create_geojson_features(df):
    print('> Creating GeoJSON features...')
    features = []
    geojson = {
        'type': 'FeatureCollection',
        'features': features
    }
        
    for _, row in df.iterrows():
        if type(row['Coordinates'][0][0][0]) is float:
            geometry_type = 'Polygon'
        else:
            geometry_type = 'MultiPolygon'
        feature = {
            'type': 'Feature',
            'geometry': {
                'type':geometry_type, 
                'coordinates':row['Coordinates']
            },
            'properties': {
#                 'City': row['City'],
                'County': row['County'] + ' County',
                'State': row['State']
            }
        }

        features.append(feature)
    return geojson

In [None]:
data_geojson = create_geojson_features(df_city)

In [None]:
with open(output, 'w') as txtfile:
    json.dump(data_geojson, txtfile)
print('> Creating GeoJSON file...')

In [None]:
print('> Making map...')
## change the location here to zoom to the center
m = folium.Map(location = [42.3756, -93.6397], control_scale = True, zoom_start = 5)

## check if the indexmap geojson files can be rendered properly
folium.GeoJson(data_geojson, 
               tooltip = folium.GeoJsonTooltip(fields=('City', 'County', 'State'),
               aliases=('City', 'County', 'State')),
               show = True).add_to(m)
m

In [9]:
def extract_county(df):
    ## possible county column names in the dataframe
    clist = ['properties.COUNTY', 'properties.NAME', 'properties.COUNTY_NAM', 'properties.COUNTY_NAME', 'properties.CTY_NAME']
    
    if set(df.columns).intersection(set(clist)):
        cname = ''.join(set(df.columns).intersection(set(clist)))
    else:
        cname = input('Please enter the column storing county names: ').strip()
          
    df = df[[cname, 'geometry.coordinates']].rename(
            columns={cname:'County', 'geometry.coordinates':'Coordinates'})      
    ## capitalize the first letter of each word in the county name
    df['County'] = df['County'].apply(lambda row: string.capwords(row))
    df['State'] = state
    
    return df

df_county = extract_county(df_county)

In [10]:
df_county.head()

Unnamed: 0,County,Coordinates,State
0,Butler,"[[[-80.15851259551978, 40.85509424693933], [-8...",Pennsylvania
1,Cambria,"[[[-78.53716642885084, 40.509941032143324], [-...",Pennsylvania
2,Cameron,"[[[-78.4200152934836, 41.512861569352985], [-7...",Pennsylvania
3,Carbon,"[[[-75.49629184001692, 40.98894589399537], [-7...",Pennsylvania
4,Centre,"[[[-78.36304777078917, 40.733157969872394], [-...",Pennsylvania


In [12]:
def df_to_gdf(df):
    gdf = gpd.GeoDataFrame(df, geometry = df['Coordinates'])
    gdf.crs = 'EPSG:4326'
    return gdf

gdf_city = df_to_gdf(df_city)
gdf_county = df_to_gdf(df_county)

TypeError: Input must be valid geometry objects: [[[-75.6921, 41.3719], [-75.6947, 41.3735], [-75.6948, 41.3736], [-75.6953, 41.3739], [-75.6958, 41.3742], [-75.6963, 41.3746], [-75.6968, 41.3749], [-75.6976, 41.3754], [-75.6978, 41.3755], [-75.6986, 41.376], [-75.6987, 41.3761], [-75.6989, 41.3762], [-75.6994, 41.3765], [-75.6995, 41.3766], [-75.6996, 41.3767], [-75.7001, 41.3769], [-75.7004, 41.3772], [-75.7007, 41.3773], [-75.7007, 41.3774], [-75.7008, 41.3774], [-75.7036, 41.3794], [-75.7073, 41.3829], [-75.7063, 41.3832], [-75.7054, 41.3835], [-75.7046, 41.3839], [-75.7037, 41.3842], [-75.7029, 41.3845], [-75.7021, 41.3847], [-75.7015, 41.3848], [-75.7009, 41.3849], [-75.7003, 41.385], [-75.6999, 41.3851], [-75.6993, 41.3851], [-75.6987, 41.385], [-75.6978, 41.385], [-75.6969, 41.385], [-75.696, 41.3851], [-75.6956, 41.3851], [-75.6951, 41.3853], [-75.6945, 41.3856], [-75.6941, 41.3859], [-75.6939, 41.3865], [-75.6939, 41.3867], [-75.694, 41.3872], [-75.6941, 41.3877], [-75.6941, 41.3881], [-75.6936, 41.3886], [-75.6931, 41.3891], [-75.6926, 41.3895], [-75.692, 41.3897], [-75.6915, 41.3899], [-75.6911, 41.39], [-75.6908, 41.39], [-75.6899, 41.3896], [-75.6889, 41.3893], [-75.6884, 41.3891], [-75.6877, 41.3889], [-75.6873, 41.3888], [-75.687, 41.3888], [-75.6866, 41.389], [-75.6862, 41.3893], [-75.6859, 41.3895], [-75.6856, 41.3897], [-75.6851, 41.39], [-75.6848, 41.3902], [-75.6843, 41.3906], [-75.684, 41.3908], [-75.6837, 41.3911], [-75.6834, 41.3914], [-75.6831, 41.3917], [-75.6828, 41.3919], [-75.6824, 41.3921], [-75.6818, 41.3923], [-75.6812, 41.3926], [-75.6804, 41.3929], [-75.6796, 41.3931], [-75.679, 41.3933], [-75.6786, 41.3935], [-75.6784, 41.3936], [-75.6782, 41.3936], [-75.6881, 41.3986], [-75.6891, 41.3962], [-75.7016, 41.4025], [-75.6999, 41.4045], [-75.6987, 41.4039], [-75.6969, 41.4031], [-75.6937, 41.4068], [-75.6982, 41.409], [-75.7017, 41.4106], [-75.7026, 41.411], [-75.7033, 41.4113], [-75.7057, 41.4125], [-75.7058, 41.4126], [-75.706, 41.4126], [-75.7061, 41.4127], [-75.707, 41.4131], [-75.7081, 41.4136], [-75.7099, 41.4145], [-75.71, 41.4145], [-75.7101, 41.4146], [-75.713, 41.416], [-75.7138, 41.4163], [-75.7157, 41.4172], [-75.7168, 41.4177], [-75.7169, 41.4178], [-75.72, 41.4192], [-75.7187, 41.4207], [-75.7185, 41.421], [-75.7151, 41.4248], [-75.7139, 41.4262], [-75.7075, 41.4335], [-75.7036, 41.438], [-75.6934, 41.45], [-75.6862, 41.4581], [-75.686, 41.4584], [-75.686, 41.4584], [-75.6847, 41.4599], [-75.6765, 41.4693], [-75.6623, 41.4625], [-75.6578, 41.4603], [-75.6379, 41.4503], [-75.6365, 41.4496], [-75.6223, 41.4426], [-75.6399, 41.4305], [-75.6443, 41.4254], [-75.6457, 41.4239], [-75.6453, 41.4237], [-75.6472, 41.4216], [-75.6506, 41.4184], [-75.6517, 41.4176], [-75.6483, 41.416], [-75.6179, 41.4013], [-75.6223, 41.3961], [-75.6223, 41.3961], [-75.6224, 41.396], [-75.6252, 41.3928], [-75.6328, 41.384], [-75.6341, 41.3825], [-75.6342, 41.3824], [-75.6375, 41.3831], [-75.652, 41.3661], [-75.6038, 41.3426], [-75.6823, 41.3394], [-75.6768, 41.3511], [-75.6682, 41.3694], [-75.6743, 41.372], [-75.6764, 41.3696], [-75.687, 41.3747], [-75.688, 41.3751], [-75.6891, 41.3757], [-75.6896, 41.3751], [-75.6921, 41.3719]]]

In [None]:
df_merged = gpd.sjoin(city_geojson, county_geojson, op = 'intersects', how = 'left')[['City', 'County','State']].astype(str)
df_merged