## Data pre-processing

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio
from shapely.geometry import Point

### Health facilities
We obtained the information of health facilities (location, name, level, etc.) from [GeoMINSA](http://www.geominsa.minsa.gob.pe:8080/geominsa/) as Excel format. Here, we read the data and create a shapefile of health facilities.

In [2]:
# Load health facilities obtained from GeoMINSA
filn_in = os.path.join('./data/health_facility_MINSA.xls')
df = pd.read_excel(filn_in, header=0).dropna()
df = df.rename(columns={'Distrito':'district',
                       'Latitud':'x',
                       'Longitud':'y'})
df.head()
# GeoDataFrame needs a shapely object
df['Coordinates'] = list(zip(df.x, df.y))           # Coordinates
df['Coordinates'] = df['Coordinates'].apply(Point)  # tuples to Shapely's Point
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry='Coordinates')

# Exclude facilities with wrong geolocations


# Write ESRI shapefile
if False:
    filn_out = os.path.join('data/health_facility_MINSA.shp')
    gdf.to_file(filn_out, encoding='utf-8')
    print('%s is saved.' % filn_out)

### DEM

### Land Cover
Here we clip the [land cover from European Space Agency (ESA)'s Climate Change Initiative (CCI)](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview) with the Peru national boundary. 

In [26]:
# from netCDF4 import Dataset
# import rioxarray
# import xarray
# filn = '/Users/dlee/data/landcover/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.nc'
# nc_fid = Dataset(filn, 'r')
# # Use RioXarray
# xds = xarray.open_dataset(filn)
# xds.rio.set_crs("epsg:4326")
# xds["lccs_class"].rio.to_raster('.\test.tif')

# Global LandCover layer is manually clipped using QGIS to the Peruvian national boundary
import rasterio
filn_out = './data/lcss_class_peru.tif'
data = rasterio.open(filn_out)

# Resampling of land cover (Upscaling from 300m to 1km) to be aligned with the DEM data
# This is done manually by QGIS > Raster > Align Rasters
filn_out = './data/lcss_class_peru_aligned.tif'
data = rasterio.open(filn_out)

### Roads

In [None]:
filn_in = ''

### Rivers and Waterways
