# NoteBook To Check Intersection of Point/Polygon and other Polygons

The objective of this notebook is to search a set of points or polygons for spatial overlap with a largerset.

This will take a long time to run on one core, but because of shapely objects it's not easily parallelized. Would love to build out with dask

By Cascsde Tuholske 2019-01-17

#### Notes
1. Points should be dialated to circles to prevent one falling just outside the larger polygon set (see Lusaka)

### Depedencies

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import mapping
from shapely.geometry import Polygon
from shapely.geometry import shape
import ast
from shapely.geometry import mapping
import rasterio
import copy

### Functions

Note: these functions are in .py file in the src directory

In [2]:
def load_points (file):
    """ This function loads a csv 
    of points and turns it into shapely points"""
    df = pd.read_csv(file)

    # creating a geometry column 
    geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

    # Coordinate reference system : WGS84
    crs = {'init': 'epsg:4326'}

    # Creating a Geographic data frame 
    point_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return point_gdf

In [None]:
def poly_point (poly, point):
    """This function will check if points are inside polygons if given two gpd dataframes with points and polygons
    Returns point ids, point geometry, polygon index # and polygon geometry
    
    Args: poly = geo pandas polygons ... shapely object
          point = geo pandas points ... shapely object
    """
    
    out_arr = [] #return an array <<< ---------------- ASK RYAN IF BETTER DO USE DICT 
    
    for index_point, row_point in point.iterrows():
        for index_poly, row_poly in poly.iterrows():
            if row_point['geometry'].within(row_poly['geometry']):
                point_id = row_point['id']
                point_geom = mapping(row_point['geometry']) # makes a dict w/ keys : type and cood
                poly_id = index_poly
                poly_geom = mapping(row_poly['geometry']) # makes a dict w/ keys : type and cood
                
                out_arr.append((point_id, 
                                point_geom, 
                                poly_id, 
                                poly_geom))

    return out_arr

# Note 2018-11-30 update arr to gpd_df 

In [None]:
def point_buffer(gpd_df, raduis):
    """Function to make a shapely polygon buffer around a point. Be sure to check crs to enter radius correctly
    
    Args: gpd_df = geopandas df
          raduis = radius to dialate points
    
    """

    # AGU 2018-12-04 - radius set to ~250m at the equator 
    #radius = 250*1/(111*1000)

    # new_gpd_df = gpd.GeoDataFrame()
    gpd_df.rename(columns={'geometry':'old_geom'}, inplace=True)
    
    arr = []
    
    for point in gpd_df['old_geom']:
        buffer = point.buffer(radius)
        arr.append((buffer))
    
    gpd_df['geometry'] = arr
    
    return gpd_df

In [None]:
def poly_overlap (point_buffer, poly_raster):
    """
    This function will check if point buffers intersect with polygons 
    if given two gpd dataframes with point buffers and polygons
    Returns point ids, point geometry, polygon index #, and polygon geometry in a geopandas DF.
    It goes faster if smaller list goes first
    
    Args: point_buffer = geopandas dilate osm points as polygons (shapely object)
          poly_raster = isolate polygons from rasters to check intersection 
    """
    
    # make arrays to fill
    country_arr = []
    osm_name_arr = []
    osm_type_arr = []
    lat_arr = []
    lon_arr = []
    osm_id_arr = [] 
    FID_arr = [] 
    poly_geom_arr = []
    #point_geom_arr = []
    
    for index_point_buffer, row_point_buffer in point_buffer.iterrows():
        for index_poly_raster, row_poly_raster in poly_raster.iterrows():
            if row_point_buffer['geometry'].intersects(row_poly_raster['geometry']):
                country = row_point_buffer['country'] # get country
                osm_name = row_point_buffer['osm_name'] # get urban name
                osm_type = row_point_buffer['osm_type'] # get osm type
                lat = row_point_buffer['lat'] # get osm lat 
                lon = row_point_buffer['lon'] # get long 
                osm_id = row_point_buffer['osm_id'] # get osm_id
                poly_id = row_poly_raster['FID'] # get FID from raster mask
                poly_geom = shape(mapping(row_poly_raster['geometry'])) # make polygon
                #point_geom = row_point_buffer['old_geom']
                
                # append arrays
                country_arr.append((country))
                osm_name_arr.append((osm_name))
                osm_type_arr.append((osm_type))
                lat_arr.append((lat))
                lon_arr.append((lon))
                osm_id_arr.append((osm_id))
                FID_arr.append((poly_id))
                poly_geom_arr.append((poly_geom))
                #point_geom_arr.append((point_geom))
    
    # put results into a geopandas df
    new_gpd_df = gpd.GeoDataFrame()
    new_gpd_df['osm_id'] = osm_id_arr
    new_gpd_df['FID'] = FID_arr
    new_gpd_df['geometry'] = poly_geom_arr
    new_gpd_df['country'] = country_arr           
    new_gpd_df['osm_name'] = osm_name_arr
    new_gpd_df['osm_type'] = osm_type_arr
    new_gpd_df['lat'] = lat_arr
    new_gpd_df['lon'] = lon_arr
    #new_gpd_df['point_geom'] = point_geom_arr
    
    
    return new_gpd_df

# Note 2018-11-30 update arr to gpd_df 
# Note 2019-04-30 updated to use osm_name instead of city/town for the OSM name

### Load Data

In [None]:
import os
os.getcwd()

In [3]:
# File Paths
IN = ''
OUT = ''

In [4]:
# Load Points
points = gpd.read_file(IN+'20190430_osm_All.shp')
print(len(points))


9813


In [6]:
len(points[points['osm_type']=='town'])

8863

In [None]:
# Load Polygons
polygons = gpd.read_file(temp_data+'GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_1500c300_A.shp')
print(len(polygons))
    
#Out File Name
out = 'GHS_POP_GPW42000_GLOBE_R2015A_54009_1k_v1_0_Clip_1500c300_A_polypoints'

In [None]:
polygons.head()

#### Change CRS if needed

For GHS-Pop, tranform points into ESRI:54009 for polysearch

In [None]:
polygons.crs

In [None]:
points.crs

In [None]:
# For GHS CRS

crs = {'ellps': 'WGS84',
 'lon_0': 0,
 'no_defs': True,
 'proj': 'moll',
 'units': 'm',
 'x_0': 0,
 'y_0': 0}

In [None]:
polygons.crs = crs

In [None]:
polygons.crs

In [None]:
points.head()

In [None]:
points = points.to_crs(polygons.crs)

In [None]:
points.head()

#### Buffer Points

In [None]:
# AGU 2018-12-04 - radius set to ~250m at the equator 
# 2019 05 01 --> Buffer should 

#radius = 250*1/(111*1000)
radius = 250

In [None]:
osm_buffer_gdf = point_buffer(points, 250)

In [None]:
osm_buffer_gdf.head(6)

In [None]:
# drop point geom and keep buffered

osm_buffer_gdf = osm_buffer_gdf.drop(['old_geom'], axis=1)
osm_buffer_gdf.head(6)

### Intersection

In [None]:
# run poly_overlap

import time
checkpoint = time.time()

poly_overlap_out = poly_overlap(osm_buffer_gdf, polygons)

print("elapsed time is: {}s".format(time.time()-checkpoint))
print(len(poly_overlap_out))

In [None]:
print(len(poly_overlap_out))

In [None]:
poly_overlap_out.tail(20)

In [None]:
poly_overlap_out.to_file(erl_v2_data+out+'.shp', driver='ESRI Shapefile')