In [2]:
from safegraph_eval import ingest
from safegraph_eval import geometry

In [3]:
nested_dir = '/Users/eugene/Documents/Projects/safegraph_eval_dir/sample_data/zipped_core_geo/'

sample = ingest.read_sg_shop_zipfile(nested_dir, return_supplemental_files = False)

# Testing

In [4]:
reviewed_df = geometry.verify_POI_geometry(sample, basemap = 'esri_imagery')


    Rate polygon from 1 to 7. 
    Add an optional comment after a comma. Example input: "5, polygon extends into roadway."
    Type "quit" to exit.
     quit


In [10]:
reviewed_df.head()

Unnamed: 0,placekey,parent_placekey,safegraph_brand_ids,location_name,brands,top_category,sub_category,naics_code,latitude,longitude,...,building_height,enclosed,phone_number,is_synthetic,includes_parking_lot,iso_country_code,geometry,POI,rating,comment
0,222-223@5sc-fgu-jsq,,SG_BRAND_60b7b54d19fca719281e76d485a141ad,Costco,Costco,"General Merchandise Stores, including Warehous...",All Other General Merchandise Stores,452319,43.185083,-88.112217,...,,False,,False,False,US,"POLYGON ((-88.11216 43.18631, -88.11217 43.186...",Neighbor,7,
1,222-222@5vg-55k-p7q,,SG_BRAND_60b7b54d19fca719281e76d485a141ad,Costco,Costco,"General Merchandise Stores, including Warehous...",All Other General Merchandise Stores,452319,38.353024,-122.716918,...,,False,,False,False,US,"POLYGON ((-122.71655 38.35321, -122.71655 38.3...",Neighbor,6,edge is weird
2,zzw-222@63v-4c7-qvf,zzy-223@63v-4c7-9mk,SG_BRAND_60b7b54d19fca719281e76d485a141ad,Costco,Costco,"General Merchandise Stores, including Warehous...",All Other General Merchandise Stores,452319,42.381605,-83.331444,...,,False,,False,False,US,"POLYGON ((-83.33133 42.38220, -83.33133 42.382...",Neighbor,7,
3,zzw-223@5z4-rxm-zfz,zzw-222@5z4-rxm-f2k,SG_BRAND_60b7b54d19fca719281e76d485a141ad,Costco,Costco,"General Merchandise Stores, including Warehous...",All Other General Merchandise Stores,452319,33.862712,-117.921914,...,,False,,False,False,US,"POLYGON ((-117.92255 33.86293, -117.92122 33.8...",Neighbor,5,
4,222-223@629-38n-k75,,SG_BRAND_60b7b54d19fca719281e76d485a141ad,Costco,Costco,"General Merchandise Stores, including Warehous...",All Other General Merchandise Stores,452319,41.709448,-72.76365,...,,False,,False,False,US,"POLYGON ((-72.76221 41.70967, -72.76220 41.709...",Neighbor,7,


# Dev

In [30]:
import pandas as pd
import geopandas as gpd
import folium
from shapely import wkt
from shapely.geometry import LineString, shape
import shapely.speedups
shapely.speedups.enable()

from IPython.display import clear_output

In [31]:
def choose_most_recent_geometry(df, geometry_col = 'polygon_wkt', date_col = 'date_range_start'):
    
    if date_col in df.columns:
        output = df.sort_values(by = date_col, na_position = 'first')
        output = output.drop_duplicates('placekey', keep = 'last')
    else:
        output = df.drop_duplicates('placekey', keep = 'last')
    
    return output

In [32]:
def make_geometry(df, geometry_col = 'polygon_wkt', crs = 'EPSG:4326'):
    
    output = gpd.GeoDataFrame(df, geometry = gpd.GeoSeries.from_wkt(df[geometry_col]), crs = crs)
    
    return output

In [33]:
def choose_poi_and_neighbors(gdf, placekey, neighbor_radius, projection = 'EPSG:3857'):
    
    # classify Neighbors v. Target POI
    gdf['POI'] = 'Neighbor'
    gdf.loc[gdf['placekey'] == placekey, 'POI'] = 'Target'
    
    # transform to a projected coordinate reference system
    gdf_proj = gdf.to_crs(projection)
    
    # get the buffer for filtering neighbors
    target = gdf_proj.loc[gdf_proj['POI'] == 'Target']
    target_buffer = target.geometry.buffer(neighbor_radius)
    
    # find the neighbors
    output_proj = gdf_proj.loc[gdf_proj.intersects(target_buffer.unary_union)]
    
    # transform back to original coordinate reference system
    output = output_proj.to_crs(gdf.crs)
    
    return output
    

In [69]:
def map_poi_and_neighbors(map_df, basemap = 'ESRI_imagery'):
    
    # center map
    center = map_df.loc[map_df['POI'] == 'Target']
    
    # map_df = map_df.sort_values('POI', ascending = True).drop_duplicates(['geometry', 'is_synthetic', 'polygon_class', 'enclosed', 'includes_parking_lot', 'POI'], keep = 'first')
    
    map_df_grouped = map_df.groupby(['polygon_wkt', 'is_synthetic', 'polygon_class', 'enclosed', 'includes_parking_lot'])['location_name'].apply(list).reset_index(name = 'location_names')
    map_df_grouped['location_names_sample'] = map_df_grouped['location_names'].str[:3]
    map_df_grouped['total_POI_count'] = map_df_grouped['location_names'].apply(len)
    map_df_grouped = map_df_grouped.merge(map_df.loc[map_df['POI'] == 'Target'][['polygon_wkt', 'POI']], on = 'polygon_wkt', how = 'outer')
    map_df_grouped.loc[map_df_grouped['POI'].isnull(), 'POI'] = 'Neighbor'
    
    map_df_grouped = gpd.GeoDataFrame(map_df_grouped, geometry = gpd.GeoSeries.from_wkt(map_df_grouped['polygon_wkt']), crs = 'EPSG:4326')
    
    # initialize map
    if basemap.lower() == 'esri_imagery':
        map_ = folium.Map(location = [center["latitude"], center["longitude"]], zoom_start = 19)
        
        tile  = folium.TileLayer(
            tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri',
            name = 'Esri Satellite',
            overlay = False,
            control = True
           ).add_to(map_)
        
    elif basemap.lower() == 'osm':
        map_ = folium.Map(location = [center["latitude"], center["longitude"]], zoom_start = 19, tiles = 'OpenStreetMap')
    
    
    # tool tip list
    tool_tip_cols = ['total_POI_count', 'location_names_sample', 'is_synthetic', 'polygon_class', 'enclosed', 'includes_parking_lot']
    
    # draw map
    folium.GeoJson(
        map_df_grouped,
        style_function = lambda x: {
            'weight':0,
            'color':'red' if x['properties']['POI'] == 'Target' else 'blue',
            'fillOpacity': 0.7 if x['properties']['POI'] == 'Target' else 0.2,
        },
        tooltip = folium.features.GeoJsonTooltip(
            fields = tool_tip_cols
        )
    ).add_to(map_)
    
    return map_

In [70]:
# main function
def verify_POI_geometry(df, placekeys = None, basemap = 'ESRI_imagery', neighbor_radius = 50, geometry_col = 'polygon_wkt'):
    
    df = choose_most_recent_geometry(df, geometry_col = geometry_col, date_col = 'date_range_start')
    gdf = make_geometry(df, geometry_col = geometry_col)
    
    # map
    output_pks = []
    ratings = []
    comments = []
    
    prompt = '''
    Rate polygon from 1 to 7. 
    Add an optional comment after a comma. Example input: "5, polygon extends into roadway."
    Type "quit" to exit.
    '''
    
    if placekeys is None:
        placekeys = list(df['placekey'])
    for pk in placekeys:
        map_df = choose_poi_and_neighbors(gdf, placekey = pk, neighbor_radius = neighbor_radius).reset_index(drop = True)
        display(map_poi_and_neighbors(map_df, basemap = basemap))
        input_ = input(prompt)
        if "quit" in input_.lower():
            break
        else:
            rating, *comment = input_.split(',')
            comment = comment[0] if comment else ''

            output_pks.append(pk)
            ratings.append(int(rating))
            comments.append(comment.lstrip()) # remove any leading space
            clear_output(wait=True)
    
    ratings_df = pd.DataFrame({'placekey':output_pks, 'rating':ratings, 'comment':comments})
    output = df.merge(ratings_df, on = 'placekey')
    clear_output(wait=True)
    
    return output

In [None]:
reviewed_df = verify_POI_geometry(sample, basemap = 'osm')

In [None]:
# supply a dataframe of Shop data
# get the most recent geometry for each POI
# choose a POI
# find its neighbors within a radius
# plot the POI and its neighbors on a basemap
# prompt user for score and comment
# return df of examined POI with scores and comments