# OSM Raster PolyPoints

This notebook contains analysis and visualizations for:

1. Loading polygons from masked raster 
2. Loading in points from OSM
3. Associating points with polygons

Updated: 2018-11-21

2018-11-30: Ran on WP1000c600 for 2000 & 2015 using 872 OSM cities with IDs and exported .shp polygons as
20181130_africa1k_20**XX**_mask_1000c600_polypoints.shp. 739 foot prints for 2015 & 705 for 2000

### NOTE on 2018-11-30 need to re run AFTER making OSM points into circles to save Lusaka
### NOTE on 2018-11-30 DROP polygon doubles at the VERY END

### NEED TO FIND AWAY TO ASSOCIATE RASTER PIXELS AND POINTS WITH COUNTRIES Before we run giant for loop - show kelly problems in QGIS

1. Can likely clip points by polygon geometry and chunk
https://www.earthdatascience.org/courses/earth-analytics-python/spatial-data-vector-shapefiles/clip-vector-data-in-python-geopandas-shapely/

2. Can likely clip polygons by countries

https://gis.stackexchange.com/questions/168266/pyqgis-a-geometry-intersectsb-geometry-wouldnt-find-any-intersections

#### updated 2018-11-21 Loop isn't that big and a good Africa basemap has a ton of polygons ... better to chunk later

In [None]:
# Load africa countries -- 762 polygons because of islands 
# Africa_poly = gpd.read_file(outfilepath+"Africa_polys_test.shp")
# len(Africa_poly)

# Dependencies

In [29]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import mapping
from shapely.geometry import Polygon
from shapely.geometry import shape
import ast
from shapely.geometry import mapping
import rasterio

# Load in OSM and Polygons from csv

In [2]:
# will build out folders later

# data folder git will ignore
infilepath = "/home/cascade/tana-crunch-cascade/projects/NTL/data/" # git will ignore
outfilepath = "/home/cascade/tana-crunch-cascade/projects/NTL/temp_data/" # git will not ignore - NO BIG FILES 

# Local computer 
# infilepath = '/Users/cascade/Github/NTL/data/raw/worldpop/Africa-1km-Population/'
# outfilepath = '/Users/cascade/Github/NTL/temp_data/'

In [3]:
def load_points (file):
    """ This function loads a csv of points and turns it into shapely points"""
    df = pd.read_csv(file)

    # creating a geometry column 
    geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

    # Coordinate reference system : WGS84
    crs = {'init': 'epsg:4326'}

    # Creating a Geographic data frame 
    point_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return point_gdf

In [4]:
# Load OSM Points
osm_point_gdf = load_points(outfilepath+'20181129_osm_africa_cities.csv')

In [63]:
WP2015_poly = gpd.read_file(outfilepath+'20181130_africa1k_2015_mask_1000c600_poly.shp')

In [61]:
print(len(osm_point_gdf))
print(len(WP2000_poly))

872
19482


# Function for searching if a point is within a polygon

In [18]:
def poly_point (poly, point):
    """
    This function will check if points are inside polygons if given two gpd dataframes with points and polygons
    Returns point ids, point geometry, polygon index # and polygon geometry
    """
    
    out_arr = [] #return an array <<< ---------------- ASK RYAN IF BETTER DO USE DICT 
    
    for index_point, row_point in point.iterrows():
        for index_poly, row_poly in poly.iterrows():
            if row_point['geometry'].within(row_poly['geometry']):
                point_id = row_point['id']
                point_geom = mapping(row_point['geometry']) # makes a dict w/ keys : type and cood
                poly_id = index_poly
                poly_geom = mapping(row_poly['geometry']) # makes a dict w/ keys : type and cood
                
                out_arr.append((point_id, 
                                point_geom, 
                                poly_id, 
                                poly_geom))

    return out_arr

# Note 2018-11-30 update arr to gpd_df 

In [64]:
import time

checkpoint = time.time()

WP2015_polypoints = poly_point(WP2015_poly, osm_point_gdf)

print("elapsed time is: {}s".format(time.time()-checkpoint))

elapsed time is: 1120.166103363037s


In [71]:
WP2015_polypoints_df = gpd.GeoDataFrame(WP2015_polypoints)
WP2015_polypoints_df.head(6)
print(len(WP2000_polypoints_df))

705


### Note polypoints on WP2015 picked up more OSM city points than 2000. Ask Kelly and check

In [67]:
WP2015_polypoints_df.columns = ['OSM_ID','OSM_point','FID','poly_geometry']
WP2015_polypoints_df.head(6)

Unnamed: 0,OSM_ID,OSM_point,FID,poly_geometry
0,27564946,"{'type': 'Point', 'coordinates': (6.6082600000...",1145,"{'type': 'Polygon', 'coordinates': (((6.624583..."
1,27565103,"{'type': 'Point', 'coordinates': (-0.649297599...",2325,"{'type': 'Polygon', 'coordinates': (((-0.80875..."
2,253292160,"{'type': 'Point', 'coordinates': (4.0493919, 3...",421,"{'type': 'Polygon', 'coordinates': (((3.966249..."
3,262963217,"{'type': 'Point', 'coordinates': (6.8603492, 3...",4622,"{'type': 'Polygon', 'coordinates': (((6.816249..."
4,262964638,"{'type': 'Point', 'coordinates': (6.066102, 33...",4724,"{'type': 'Polygon', 'coordinates': (((6.066249..."
5,264573224,"{'type': 'Point', 'coordinates': (2.8808616000...",4282,"{'type': 'Polygon', 'coordinates': (((2.849583..."


In [55]:
# from shapely.geometry import shape

def arr_gpd(gpd_df, incolname, newcolname):
    """Function takes a geopandas dataframe with dicts and returns proper geometry to make shapefiles"""
    arr = []

    for i in gpd_df[incolname]:
        i = shape(i)
        arr.append((i))

    # for poly in polypoints_2000_df.iloc[:,6]:
    #     poly = shape(ast.literal_eval(poly))
    #     test.append = (poly)

    #polypoints_2020_df['poly_geom'] = polypoints_2020_df['poly_geom'].apply(ast.literal_eval())
    gpd_df[newcolname] = arr
    
    return gpd_df

In [68]:
WP2015_polypoints_df_new = arr_gpd(WP2015_polypoints_df, 'poly_geometry','geometry')

In [69]:
WP2015_polypoints_df_new.head(6)

Unnamed: 0,OSM_ID,OSM_point,FID,poly_geometry,geometry
0,27564946,"{'type': 'Point', 'coordinates': (6.6082600000...",1145,"{'type': 'Polygon', 'coordinates': (((6.624583...","POLYGON ((6.624583 36.474584, 6.641249 36.4745..."
1,27565103,"{'type': 'Point', 'coordinates': (-0.649297599...",2325,"{'type': 'Polygon', 'coordinates': (((-0.80875...","POLYGON ((-0.808751 35.774584, -0.792084 35.77..."
2,253292160,"{'type': 'Point', 'coordinates': (4.0493919, 3...",421,"{'type': 'Polygon', 'coordinates': (((3.966249...","POLYGON ((3.966249 36.76625, 3.991249 36.76625..."
3,262963217,"{'type': 'Point', 'coordinates': (6.8603492, 3...",4622,"{'type': 'Polygon', 'coordinates': (((6.816249...","POLYGON ((6.816249 33.424584, 6.824583 33.4245..."
4,262964638,"{'type': 'Point', 'coordinates': (6.066102, 33...",4724,"{'type': 'Polygon', 'coordinates': (((6.066249...","POLYGON ((6.066249 33.157917, 6.082916 33.1579..."
5,264573224,"{'type': 'Point', 'coordinates': (2.8808616000...",4282,"{'type': 'Polygon', 'coordinates': (((2.849583...","POLYGON ((2.849583 33.86625, 2.866249 33.86625..."


In [70]:
WP2015_polypoints_df.to_file(outfilepath+'20181130_africa1k_2015_mask_1000c600_polypoints.shp', driver='ESRI Shapefile')


# Old Code

In [None]:
# building a function to check if points are in poly for lists of poly and points
# needs geopandas data frame with point and poly geometry 

# def poly_point (poly, point):
#     """
#     This function will check if points are inside polygons if given two gpds with points and polygons
#     Returns city names or no list 
#     """
    
#     out_arr = [] #return an array <<< ---------------- ASK RYAN IF BETTER DO USE DICT 
    
#     for index_point, row_point in point.iterrows():
#         for index_poly, row_poly in poly.iterrows():
#             if row_point['geometry'].within(row_poly['geometry']):
#                 country = row_point['Country']
#                 city = row_point['City']
#                 point_id = row_point['Id']
#                 point_geom = mapping(row_point['geometry']) # makes a dict w/ keys : type and cood
#                 poly_id = index_poly
#                 poly_geom = mapping(row_poly['geometry']) # makes a dict w/ keys : type and cood
                
#                 out_arr.append((country, 
#                                 city, 
#                                 point_id, 
#                                 point_geom, 
#                                 poly_id, 
#                                 poly_geom))
# #             else:
# #                 test.append('no')
#     return out_arr