# Notebook to Clean Data

2019 - 01 - 21 updated 2019-04-30
Notebook to clean final osm cities and towns
1. merge files
2. drop duplicates 


In [1]:
import geopandas as gpd
import pandas as pd
import fiona
import json
from shapely.geometry import Point

In [2]:
import os
os.getcwd()

'/Users/cascade/Github/Pop-ERL/notebooks/jupyter/ERL19'

### Merge OSM Files

In [3]:
def load_points (file):
    """ This function loads a csv 
    of points and turns it into shapely points"""
    df = pd.read_csv(file)

    # creating a geometry column 
    geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]

    # Coordinate reference system : WGS84
    crs = {'init': 'epsg:4326'}

    # Creating a Geographic data frame 
    point_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return point_gdf

In [4]:
v2_data = '../../../temp_data/ERL19v2/'

In [5]:
# Load Files
town = load_points(v2_data+'20190114_osm_africa_towns.csv')
city = load_points(v2_data+'20190114_osm_africa_cities.csv')

townSS = load_points(v2_data+'20190221_osm_S_Sudan_towns.csv')
citySS = load_points(v2_data+'20190221_osm_S_Sudan_cities.csv')

townDRC = load_points(v2_data+'20190430_osm_DRC_towns.csv')
cityDRC = load_points(v2_data+'20190430_osm_DRC_cities.csv')

In [6]:
# add column with osm type

town['osm_type'] = 'town'
townDRC['osm_type'] = 'town'
townSS['osm_type'] = 'town'

city['osm_type'] = 'city'
cityDRC['osm_type'] = 'city'
citySS['osm_type'] = 'city'

In [9]:
cityDRC.head()

Unnamed: 0,country,osm_id,town,lat,lon,geometry,osm_type
0,Democratic Republic of the Congo,27043346,Kinshasa,-4.321706,15.312597,POINT (15.3125974 -4.3217055),city
1,Democratic Republic of the Congo,27564973,Lubumbashi,-11.664232,27.482626,POINT (27.4826264 -11.6642316),city
2,Democratic Republic of the Congo,27565034,Mbuji-Mayi,-6.125894,23.599811,POINT (23.5998111 -6.1258942),city
3,Democratic Republic of the Congo,29806457,Mbanza-Ngungu,-5.250009,14.866661,POINT (14.8666614 -5.250008599999999),city
4,Democratic Republic of the Congo,29806459,Boma,-5.85,13.05,POINT (13.05 -5.85),city


In [10]:
# merge

towns = pd.concat([town, townDRC, townSS, cityDRC, citySS], sort = False) # col name of cityDRC and citySS is town, not city

towns.rename(columns={'town':'osm_name'}, inplace=True)
city.rename(columns={'city':'osm_name'}, inplace=True)

out = pd.concat([towns, city], sort = False)


In [11]:
out[1000:1100]

Unnamed: 0,country,osm_id,osm_name,lat,lon,geometry,osm_type
1000,Morocco,1885869493,Had Soualem ⵃⴰⴷ ⵙⵡⴰⵍⵎ حـد الـسـوالـم,33.422256,-7.853410,POINT (-7.853409700000001 33.422256),town
1001,Morocco,1886650445,Ifrane Atlas-Saghir ⵉⴼⵔⴰⵏ ⵏ ⴰⵟⵍⴰⵙ ⵎⵥⵥⵉⵏ يفران ...,29.218819,-9.491069,POINT (-9.491069 29.218819),town
1002,Morocco,1888060941,Tarsouat ⵜⴰⵔⵙⵡⴰⵜ تارسوات,29.584446,-9.026641,POINT (-9.026641 29.584446),town
1003,Morocco,1894017189,Sidi Boubker ⵙⵉⴷⵉ ⴱⵓⴱⴽⵔ سيدي بوبكر,34.477366,-1.734306,POINT (-1.7343057 34.4773665),town
1004,Morocco,1898652419,Sebt Guerdane ⵙⴱⵜ ⵍⴳⵔⴷⴰⵏ سبت الكردان,30.376528,-9.020829,POINT (-9.0208285 30.3765279),town
1005,Morocco,1920931046,Mechra Bel Ksiri ⵎⵛⵕⵄ ⴱⵍⵍⵇⵚⵉⵕⵉ مشرع بلقصيري,34.573416,-5.956615,POINT (-5.956615 34.57341599999999),town
1006,Morocco,1929403710,Tichla ⵜⵉⵛⵍⴰ تشلا,21.634795,-14.893889,POINT (-14.8938895 21.6347955),town
1007,Morocco,1931356549,Souira Qdima ⵎⵓⴳⴰⴹⵓⵕ ⵜⴰⵇⴱⵓⵕⵜ الصويرة القديمة,32.040222,-9.338755,POINT (-9.338754699999999 32.0402216),town
1008,Morocco,1932523192,Melga el Ouidane ⵎⵍⴳⴰ ⵍⵡⵉⴷⴰⵏ ملقى الويدان,34.557780,-3.025730,POINT (-3.0257302 34.5577801),town
1009,Morocco,1938288565,Ihaddaden ⵉⵃⴷⴷⴰⴷⵏ إحدادن,35.161815,-2.962132,POINT (-2.9621317 35.1618146),town


In [12]:
out.shape

(9813, 7)

In [13]:
out.to_file(v2_data+"20190430_osm_All.shp", driver = "ESRI Shapefile")

### Split 1500c300 Polygons

In [20]:
# Load

temp = '../../../temp_data/ERL19/'

WPE2016 = gpd.read_file(temp+'WPE_1KM_2016_Pop_Clip_1500c300.shp')


In [22]:
len(WPE2016)

30494

In [24]:
WPE2016_A = WPE2016[0:15000]
len(WPE2016_A)

15000

In [25]:
WPE2016_B = WPE2016[15000:]
len(WPE2016_B)

15494

In [26]:
WPE2016_A.tail()

Unnamed: 0,FID,geometry
14995,14995,"POLYGON ((8.789949 6.345617, 8.807914999999999..."
14996,14996,"POLYGON ((37.733806 6.462398, 37.751773 6.4623..."
14997,14997,"POLYGON ((4.963107 6.363583, 4.97209 6.363583,..."
14998,14998,"POLYGON ((-5.726896 6.471381, -5.717913 6.4713..."
14999,14999,"POLYGON ((-6.589283 6.480365, -6.589283 6.4713..."


In [27]:
WPE2016_B.head()

Unnamed: 0,FID,geometry
15000,15000,"POLYGON ((-5.969442 6.471381, -5.942493 6.4713..."
15001,15001,"POLYGON ((8.825882 6.471381, 8.834865000000001..."
15002,15002,"POLYGON ((-2.259382 6.480365, -2.250399 6.4803..."
15003,15003,"POLYGON ((11.538807 3.533876, 11.556773 3.5338..."
15004,15004,"POLYGON ((14.871572 27.285446, 14.889539 27.28..."


In [28]:
WPE2016_A.to_file(temp+'WPE_1KM_2016_Pop_Clip_1500c300_A.shp')
WPE2016_B.to_file(temp+'WPE_1KM_2016_Pop_Clip_1500c300_B.shp')

  with fiona.drivers():


In [31]:
WPE2016

Unnamed: 0,FID,geometry
0,0,"POLYGON ((19.228423 -34.42911, 19.228423 -34.4..."
1,1,"POLYGON ((9.858949000000001 37.337643, 9.86793..."
2,2,"POLYGON ((18.878078 -34.159614, 18.878078 -34...."
3,3,"POLYGON ((19.255372 -34.40216, 19.300288 -34.4..."
4,4,"POLYGON ((18.392985 -34.150631, 18.410952 -34...."
5,5,"POLYGON ((19.435036 -34.23148, 19.435036 -34.2..."
6,6,"POLYGON ((25.588525 -33.997917, 25.588525 -34...."
7,7,"POLYGON ((18.842145 -33.997917, 18.851128 -33...."
8,8,"POLYGON ((9.742167999999999 37.283744, 9.75115..."
9,9,"POLYGON ((22.399491 -33.935034, 22.417457 -33...."
