# NoteBook To Check Point or Polygon Inside Polygon

The objective of this notebook is to search a set of points or polygons for spatial overlap with a largerset

By Cascsde Tuholske 2019-01-17

#### Notes
1. Points should be dialated to circles to prevent one falling just outside the larger polygon set (see Lusaka)

### Depedencies

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import mapping
from shapely.geometry import Polygon
from shapely.geometry import shape
import ast
from shapely.geometry import mapping
import rasterio
import copy

### Functions

In [2]:
def load_points (file):
    """ This function loads a csv 
    of points and turns it into shapely points"""
    df = pd.read_csv(file)

    # creating a geometry column 
    geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

    # Coordinate reference system : WGS84
    crs = {'init': 'epsg:4326'}

    # Creating a Geographic data frame 
    point_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return point_gdf

In [3]:
def point_buffer(gpd_df, raduis):
    "Function to make a shapely polygon buffer around a point"
   
    # AGU 2018-12-04 - radius set to ~250m at the equator 
    #radius = 250*1/(111*1000)

    # new_gpd_df = gpd.GeoDataFrame()
    gpd_df.rename(columns={'geometry':'old_geom'}, inplace=True)
    
    arr = []
    
    for point in gpd_df['old_geom']:
        buffer = point.buffer(radius)
        arr.append((buffer))
    
    gpd_df['geometry'] = arr
    
    return gpd_df

In [4]:
def poly_point (poly, point):
    """
    This function will check if points are inside polygons if given two gpd dataframes with points and polygons
    Returns point ids, point geometry, polygon index # and polygon geometry
    """
    
    out_arr = [] #return an array <<< ---------------- ASK RYAN IF BETTER DO USE DICT 
    
    for index_point, row_point in point.iterrows():
        for index_poly, row_poly in poly.iterrows():
            if row_point['geometry'].within(row_poly['geometry']):
                point_id = row_point['id']
                point_geom = mapping(row_point['geometry']) # makes a dict w/ keys : type and cood
                poly_id = index_poly
                poly_geom = mapping(row_poly['geometry']) # makes a dict w/ keys : type and cood
                
                out_arr.append((point_id, 
                                point_geom, 
                                poly_id, 
                                poly_geom))

    return out_arr

# Note 2018-11-30 update arr to gpd_df 

In [20]:
def poly_overlap (point_buffer, poly_raster):
    """
    This function will check if point buffers intersect with polygons 
    if given two gpd dataframes with point buffers and polygons
    Returns point ids, point geometry, polygon index #, and polygon geometry in a geopandas DF.
    It goes faster if smaller list goes first
    
    Change TOWN & CITY ARRA
    """
    
    # make arrays to fill
    country_arr = []
    city_arr = []
    osm_type_arr = []
    lat_arr = []
    lon_arr = []
    osm_id_arr = [] 
    FID_arr = [] 
    poly_geom_arr = []
    #point_geom_arr = []
    
    for index_point_buffer, row_point_buffer in point_buffer.iterrows():
        for index_poly_raster, row_poly_raster in poly_raster.iterrows():
            if row_point_buffer['geometry'].intersects(row_poly_raster['geometry']):
                country = row_point_buffer['country']
                city = row_point_buffer['town'] # Need to change town vs city
                osm_type = row_point_buffer['type']
                lat = row_point_buffer['lat']
                lon = row_point_buffer['lon']
                osm_id = row_point_buffer['osm_id']
                poly_id = row_poly_raster['FID']
                poly_geom = shape(mapping(row_poly_raster['geometry'])) # make polygon
                #point_geom = row_point_buffer['old_geom']

                country_arr.append((country))
                city_arr.append((city))
                osm_type_arr.append((osm_type))
                lat_arr.append((lat))
                lon_arr.append((lon))
                osm_id_arr.append((osm_id))
                FID_arr.append((poly_id))
                poly_geom_arr.append((poly_geom))
                #point_geom_arr.append((point_geom))
    
    # put results into a geopandas df
    new_gpd_df = gpd.GeoDataFrame()
    new_gpd_df['osm_id'] = osm_id_arr
    new_gpd_df['FID'] = FID_arr
    new_gpd_df['geometry'] = poly_geom_arr
    new_gpd_df['country'] = country_arr           
    new_gpd_df['city'] = city_arr
    new_gpd_df['osm_type'] = osm_type
    new_gpd_df['lat'] = lat_arr
    new_gpd_df['lon'] = lon_arr
    #new_gpd_df['point_geom'] = point_geom_arr
    
    
    return new_gpd_df

# Note 2018-11-30 update arr to gpd_df 

### Analysis

In [7]:
# File Paths

# data = ''
temp_data = '../../temp_data/'
osm_point_file = '20190114_osm_africa_towns.csv'

In [8]:
# Load OSM Points
osm_point_gdf = load_points(temp_data+osm_point_file)
print(len(osm_point_gdf))

8175


In [9]:
osm_point_gdf.head(6)

Unnamed: 0,country,osm_id,town,lat,lon,geometry
0,Algeria,89369215,Tamanrasset,22.785454,5.532446,POINT (5.5324465 22.7854543)
1,Algeria,89980948,In Salah,27.195033,2.482613,POINT (2.4826132 27.1950331)
2,Algeria,252600742,Boumerdès,36.758882,3.470596,POINT (3.470596 36.758882)
3,Algeria,253167052,Thenia,36.724986,3.556935,POINT (3.556935 36.724986)
4,Algeria,253167208,Zemmouri,36.786406,3.601221,POINT (3.6012209 36.7864064)
5,Algeria,253291208,Lakhdaria,36.563944,3.596907,POINT (3.596907 36.5639442)


In [10]:
# Add column for type: city or town
osm_point_gdf['type'] = 'town'
osm_point_gdf.head(6)

Unnamed: 0,country,osm_id,town,lat,lon,geometry,type
0,Algeria,89369215,Tamanrasset,22.785454,5.532446,POINT (5.5324465 22.7854543),town
1,Algeria,89980948,In Salah,27.195033,2.482613,POINT (2.4826132 27.1950331),town
2,Algeria,252600742,Boumerdès,36.758882,3.470596,POINT (3.470596 36.758882),town
3,Algeria,253167052,Thenia,36.724986,3.556935,POINT (3.556935 36.724986),town
4,Algeria,253167208,Zemmouri,36.786406,3.601221,POINT (3.6012209 36.7864064),town
5,Algeria,253291208,Lakhdaria,36.563944,3.596907,POINT (3.596907 36.5639442),town


In [11]:
# AGU 2018-12-04 - radius set to ~250m at the equator 

radius = 250*1/(111*1000)
radius

0.0022522522522522522

In [12]:
osm_buffer_gdf = point_buffer(osm_point_gdf, radius)

In [13]:
print(len(osm_buffer_gdf))

8175


In [14]:
osm_buffer_gdf.head(6)

Unnamed: 0,country,osm_id,town,lat,lon,old_geom,type,geometry
0,Algeria,89369215,Tamanrasset,22.785454,5.532446,POINT (5.5324465 22.7854543),town,"POLYGON ((5.534698752252252 22.7854543, 5.5346..."
1,Algeria,89980948,In Salah,27.195033,2.482613,POINT (2.4826132 27.1950331),town,"POLYGON ((2.484865452252253 27.1950331, 2.4848..."
2,Algeria,252600742,Boumerdès,36.758882,3.470596,POINT (3.470596 36.758882),town,"POLYGON ((3.472848252252252 36.758882, 3.47283..."
3,Algeria,253167052,Thenia,36.724986,3.556935,POINT (3.556935 36.724986),town,"POLYGON ((3.559187252252252 36.724986, 3.55917..."
4,Algeria,253167208,Zemmouri,36.786406,3.601221,POINT (3.6012209 36.7864064),town,"POLYGON ((3.603473152252253 36.7864064, 3.6034..."
5,Algeria,253291208,Lakhdaria,36.563944,3.596907,POINT (3.596907 36.5639442),town,"POLYGON ((3.599159252252252 36.5639442, 3.5991..."


In [15]:
osm_buffer_gdf = osm_buffer_gdf.drop(['old_geom'], axis=1)
osm_buffer_gdf.head(6)

Unnamed: 0,country,osm_id,town,lat,lon,type,geometry
0,Algeria,89369215,Tamanrasset,22.785454,5.532446,town,"POLYGON ((5.534698752252252 22.7854543, 5.5346..."
1,Algeria,89980948,In Salah,27.195033,2.482613,town,"POLYGON ((2.484865452252253 27.1950331, 2.4848..."
2,Algeria,252600742,Boumerdès,36.758882,3.470596,town,"POLYGON ((3.472848252252252 36.758882, 3.47283..."
3,Algeria,253167052,Thenia,36.724986,3.556935,town,"POLYGON ((3.559187252252252 36.724986, 3.55917..."
4,Algeria,253167208,Zemmouri,36.786406,3.601221,town,"POLYGON ((3.603473152252253 36.7864064, 3.6034..."
5,Algeria,253291208,Lakhdaria,36.563944,3.596907,town,"POLYGON ((3.599159252252252 36.5639442, 3.5991..."


In [27]:
# Load Polygon
poly_raster_file = 'AFR_PPP_2015_adj_v2_1500c300.shp'
raster_polygons = gpd.read_file(temp_data+poly_raster_file)
print(len(raster_polygons))

12429


In [28]:
raster_polygons.head(6)

Unnamed: 0,FID,geometry
0,0,"POLYGON ((18.466249 -34.283749, 18.466249 -34...."
1,1,"POLYGON ((9.857915999999999 37.34125, 9.866249..."
2,2,"POLYGON ((22.432916 -33.967083, 22.441249 -33...."
3,3,"POLYGON ((24.724582 -33.992083, 24.757916 -33...."
4,4,"POLYGON ((20.057916 -33.933749, 20.066249 -33...."
5,5,"POLYGON ((22.599582 -33.992083, 22.641249 -33...."


In [None]:
# For GHS CRS

# {'ellps': 'WGS84',
#  'lon_0': 0,
#  'no_defs': True,
#  'proj': 'moll',
#  'units': 'm',
#  'x_0': 0,
#  'y_0': 0}

In [None]:
# For GHS make FID and reset crs
# FID = list(range(len(raster_polygons)))
# raster_polygons['DN'] = FID
# raster_polygons.columns.values[0] = "FID"

In [None]:
# raster_polygons_crs = raster_polygons.to_crs({'proj':'longlat', 'ellps':'WGS84', 'datum':'WGS84'})

In [None]:
#raster_polygons_crs.head(6)

In [None]:
# test

# osm_sub = osm_buffer_gdf.iloc[0:100]
# osm_sub.head(6)

In [None]:
# poly_sub = raster_polygons.iloc[0:1000]
# poly_sub.head(6)

In [None]:
# subset test

# import time
# checkpoint = time.time()

# test = poly_overlap(osm_sub, poly_sub)

# print("elapsed time is: {}s".format(time.time()-checkpoint))

In [None]:
# len(test)

In [None]:
# test

In [29]:
# run poly_overlap

import time
checkpoint = time.time()

poly_overlap_out = poly_overlap(osm_buffer_gdf, raster_polygons)

print("elapsed time is: {}s".format(time.time()-checkpoint))

elapsed time is: 6950.059248209s


In [30]:
print(len(poly_overlap_out))

3154


In [31]:
poly_overlap_out.head(6)

Unnamed: 0,osm_id,FID,geometry,country,city,osm_type,lat,lon
0,89369215,11918,"POLYGON ((5.499583 22.81625, 5.541249 22.81625...",Algeria,Tamanrasset,town,22.785454,5.532446
1,89980948,10871,"POLYGON ((2.474583 27.224584, 2.482916 27.2245...",Algeria,In Salah,town,27.195033,2.482613
2,252600742,242,"POLYGON ((2.999583 36.824584, 3.016249 36.8245...",Algeria,Boumerdès,town,36.758882,3.470596
3,253167052,242,"POLYGON ((2.999583 36.824584, 3.016249 36.8245...",Algeria,Thenia,town,36.724986,3.556935
4,253167208,242,"POLYGON ((2.999583 36.824584, 3.016249 36.8245...",Algeria,Zemmouri,town,36.786406,3.601221
5,253291208,804,"POLYGON ((3.557916 36.599584, 3.582916 36.5995...",Algeria,Lakhdaria,town,36.563944,3.596907


In [35]:
poly_overlap_out.to_file(temp_data+'AFR_PPP_2015_adj_v2_1500c300_polyoverlap_towns.shp', driver='ESRI Shapefile')

In [34]:
test_gpd = 'AFR_PPP_2000_adj_v2_1500c300_polyoverlap_towns.shp'
test_open = gpd.read_file(temp_data+test_gpd)
len(test_open)

2494