In [1]:
import geopandas as gpd


def read_zipcode_geojson():
    import geopandas as gpd
    url = 'https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/me_maine_zip_codes_geo.min.json'
    gdf = gpd.read_file(url)
    gdf['ZCTA5CE10'] = gdf['ZCTA5CE10'].astype('int')
    gdf = gdf.sort_values(by=['ZCTA5CE10'])
    gdf = gdf.drop(index=[405])  # the data come from census don't have those
    gdf['zip code tabulation area'] = gdf['ZCTA5CE10']
    gdf.drop('ZCTA5CE10', axis=1)
    return gdf


def read_census_data(social, zipcode_start, zipcode_end):  # social=B19113_001E 4992 3901
    import pandas as pd
    import requests
    res = []
    # B19113_001E stands for "MEDIAN FAMILY INCOME IN THE PAST 12 MONTHS (IN 2020 INFLATION-ADJUSTED DOLLARS)"
    url = 'https://api.census.gov/data/2020/acs/acs5?get=NAME,' + \
        social+'&for=zip%20code%20tabulation%20area:*'
    response = requests.get(url)
    res = response.json()
    columns = res[0]
    df1 = pd.DataFrame(res[1:], columns=columns)
    df1['zip code tabulation area'] = df1['zip code tabulation area'].astype(
        int)
    df1[social] = df1[social].astype(int)
    df1 = df1.loc[df1['zip code tabulation area'] <= zipcode_end]
    df1 = df1.loc[df1['zip code tabulation area'] >= zipcode_start]
    return df1


def merge_census_and_geojson(sensus, geo_data):
    Maine_zipcode_level = geo_data.merge(
        sensus, on='zip code tabulation area', how='left')
    return Maine_zipcode_level


def read_county_boundary_geojson():
    Maine_County = gpd.read_file(
        'https://raw.githubusercontent.com/ds5010/broadband/main/src/county_boundaries/Maine_County_Boundary_Polygons_Dissolved_Feature.geojson')
    # drop two useless columns that we are not gonna use
    Maine_County = Maine_County.drop(columns=['created_date'])
    Maine_County = Maine_County.drop(columns=['last_edited_date'])
    return Maine_County


def set_zipcode_geojson_to_centroid(zipcdoe_gdf):
    zipcdoe_gdf['centroid_column'] = zipcdoe_gdf.centroid
    zipcdoe_gdf = zipcdoe_gdf.set_geometry('centroid_column')
    return zipcdoe_gdf


def build_county_boundary_dist(Maine_County):
    maine_county_dict = {}
    for i, row in Maine_County.iterrows():
        maine_county_dict[row['COUNTY']] = row.geometry
    return maine_county_dict


def store_zipfile_to_file_v2(county_name, maine_counties_dict):
    gdf_list = []
    for index, i in zipcdoe_gdf.iterrows():
        if i.centroid_column.within(maine_counties_dict[county_name]):
            gdf_list.append(i)
    gdf = gpd.GeoDataFrame(gdf_list)
    del gdf['centroid_column']
    gdf.set_geometry('geometry')
    gdf.to_file('./county/'+county_name+'/'+county_name+socialType+'.geojson')


def get_social_value_for_zipcode_level_data_by_main_county(socialType):
    """This function return the geodata combine with  with maine zipcode data then divide it into county
    """
    Maine_County = read_county_boundary_geojson()
    maine_counties_dict = build_county_boundary_dist(Maine_County)
    gdf = read_zipcode_geojson()
    sensus = read_census_data(socialType, 3901, 4992)
    Maine_zipcode_level = merge_census_and_geojson(sensus, gdf)
    zipcdoe_gdf = set_zipcode_geojson_to_centroid(gdf)
    for county_name in maine_counties_dict:
        store_zipfile_to_file_v2(county_name, maine_counties_dict)
# for county_name in maine_counties_dict:
#     store_zipfile_to_file_v2(county_name, maine_counties_dict)


  zipcdoe_gdf['centroid_column'] = zipcdoe_gdf.centroid


{'Androscoggin': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd47c0>,
 'Aroostook': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4e50>,
 'Cumberland': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4760>,
 'Franklin': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4a60>,
 'Hancock': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd47f0>,
 'Kennebec': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd48e0>,
 'Knox': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4e20>,
 'Lincoln': <shapely.geometry.multipolygon.MultiPolygon at 0x7fb0b1bd4400>,
 'Oxford': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4220>,
 'Penobscot': <shapely.geometry.polygon.Polygon at 0x7fb0b1bd4160>,
 'Piscataquis': <shapely.geometry.polygon.Polygon at 0x7fb0a0aec700>,
 'Sagadahoc': <shapely.geometry.polygon.Polygon at 0x7fb0a0aec4f0>,
 'Somerset': <shapely.geometry.polygon.Polygon at 0x7fb0a0ade5e0>,
 'Waldo': <shapely.geometry.polygon.Polygon at 0x7fb0c09f1be0>,
 'Washington': <shapely.geometry.multipolygon.Multi

In [4]:
zipcdoe_gdf

Unnamed: 0,STATEFP10,ZCTA5CE10,GEOID10,CLASSFP10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,PARTFLG10,geometry,zip code tabulation area,centroid_column
371,23,3901,2303901,B5,G6350,S,97503017,875282,+43.2993321,-070.8420772,N,"POLYGON ((-70.87201 43.36208, -70.87057 43.363...",3901,POINT (-70.84362 43.30063)
214,23,3902,2303902,B5,G6350,S,50748339,1991699,+43.2149159,-070.6288062,N,"MULTIPOLYGON (((-70.61524 43.18770, -70.61517 ...",3902,POINT (-70.63618 43.22098)
157,23,3903,2303903,B5,G6350,S,51264299,3999811,+43.1465519,-070.7743831,N,"POLYGON ((-70.74672 43.16025, -70.74652 43.159...",3903,POINT (-70.78602 43.14880)
429,23,3904,2303904,B5,G6350,S,29002836,3697625,+43.1080513,-070.7285948,N,"POLYGON ((-70.70714 43.10603, -70.70717 43.105...",3904,POINT (-70.73105 43.11059)
430,23,3905,2303905,B5,G6350,S,17021886,4634935,+43.0861935,-070.6868463,N,"MULTIPOLYGON (((-70.68561 43.05868, -70.68587 ...",3905,POINT (-70.68671 43.08647)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,23,4986,2304986,B5,G6350,S,143362761,516693,+44.5494603,-069.2112917,N,"POLYGON ((-69.29153 44.53336, -69.29165 44.533...",4986,POINT (-69.22257 44.55298)
319,23,4987,2304987,B5,G6350,S,90451120,2598957,+44.6741485,-069.2518443,N,"POLYGON ((-69.19059 44.67042, -69.19042 44.669...",4987,POINT (-69.25361 44.67914)
181,23,4988,2304988,B5,G6350,S,128937902,5118169,+44.5701476,-069.3624413,N,"POLYGON ((-69.28651 44.58616, -69.28599 44.585...",4988,POINT (-69.35974 44.59591)
79,23,4989,2304989,B5,G6350,S,114435388,9171136,+44.4279799,-069.6495078,N,"POLYGON ((-69.62351 44.48574, -69.62329 44.485...",4989,POINT (-69.64789 44.42979)
