In [None]:
import ee
import json
import numpy as np
import overpy
import requests
import shapely.geometry
import h3

ee.Initialize()

In [None]:
class Metadata(object):
    def __init__(self, coords):
        self.coords = coords
        self.metadata = {}
    
    def get_population(self, radius, year=2020):
        """
        Get total population within a rectangle centered on coords, and size radius
        inputs:
            - coords: [lon, lat]
            - radius: distance in kilometers
            - year: optional year between 2000 and 2020
        returns: population float, or -1 if there is an error
        """
        
        dataset = ee.ImageCollection('WorldPop/GP/100m/pop')
        dataset = dataset.filter(ee.Filter.equals('year', year))
        dataset = dataset.mosaic().select('population')
        buffer_dist = radius * 1000
        response = dataset.reduceRegions(ee.Geometry.Point(self.coords).buffer(buffer_dist), ee.Reducer.sum(), scale=100).getInfo()
        try:
            response = int(response['features'][0]['properties']['sum'])
            self.metadata[f'Population - {radius} km'] = response
        except:
            response = -1
            self.metadata[f'Population - {radius} km'] = -1
        return response

    def get_waterways(self, plot=False):
        """
        Query OSM to get waterway nearest to point of interest
        inputs:
            - coords: [lon, lat]
            - plot: optional arg to plot waterways
        returns:
            - distance: distance to nearest waterway in meters
                        if no waterway is within 5000m, it will return -1
        """
        (lon, lat) = self.coords
        api = overpy.Overpass(max_retry_count=10, retry_timeout=30)
        result = api.query(f"""
                            (
                                way
                                (around:5000,{lat},{lon})
                                [natural=water];
                                >;
                                way
                                (around:5000,{lat},{lon})
                                [waterway];
                                >;
                                way
                                (around:5000,{lat},{lon})
                                [natural=coastline];
                                >;
                                way
                                (around:5000,{lat},{lon})
                                [landuse=reservoir];
                                >;
                                );out;

                        """)
        result.get_ways()

        site_distances = []
        if plot:
            import matplotlib.pyplot as plt
            plt.figure(figsize=(6,4), dpi=150)
        for way in result.ways:
            river_coords = [[float(node.lon), float(node.lat)] for node in way.nodes]
            river = shapely.geometry.LineString(river_coords)
            point = shapely.geometry.Point(self.coords)
            site_distances.append(point.distance(river) * 111.1 * 1000)
            if plot:
                plt.plot(np.array(river_coords)[:,0], np.array(river_coords)[:,1])
        if plot:
            plt.scatter(lon, lat, c='r')
            plt.axis('equal')
            plt.show()
        if len(site_distances) > 0:
            distance = np.min(site_distances)
        else:
            print(f"No waterways found within 5 km for site at [{lon:.3f},{lat:.3f}]")
            distance = -1

        self.metadata[f'Distance to Waterway (m)'] = distance


    def sample_gee_data(self, dataset_name):
        dataset = ee.Image(dataset_name)
        response = dataset.sampleRegions(ee.Geometry.Point(self.coords), geometries=True).getInfo()
        try:
            response = response['features'][0]['properties']
        except:
            response = -1
        return response

    def get_landform(self):
        dataset_name = "CSP/ERGo/1_0/Global/ALOS_landforms"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            landform_id = response['constant']
            landform_descriptions = {
                11: 'Peak/ridge (warm)', 
                12:	'Peak/ridge',
                13:	'Peak/ridge (cool)',
                14:	'Mountain/divide',
                15:	'Cliff',
                21:	'Upper slope (warm)',
                22:	'Upper slope',
                23:	'Upper slope (cool)',
                24:	'Upper slope (flat)',
                31:	'Lower slope (warm)',
                32:	'Lower slope',
                33:	'Lower slope (cool)',
                34:	'Lower slope (flat)',
                41:	'Valley',
                42:	'Valley (narrow)',
            }
            self.metadata['Landform Type'] = landform_descriptions[landform_id]
        else:
            self.metadata['Landform Type'] = -1

    def get_soil_bulk(self):
        dataset_name = "OpenLandMap/SOL/SOL_BULKDENS-FINEEARTH_USDA-4A1H_M/v02"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            self.metadata['Fine Earth Density (kg / m^3)'] = response['b0']
        else:
            self.metadata['Fine Earth Density (kg / m^3)'] = -1

    def get_clay_content(self):
        dataset_name = "OpenLandMap/SOL/SOL_CLAY-WFRACTION_USDA-3A1A1A_M/v02"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            self.metadata['Soil Clay Fraction'] = response['b0'] / 100
        else:
            self.metadata['Soil Clay Fraction'] = -1

    def get_sand_content(self):
        dataset_name = "OpenLandMap/SOL/SOL_SAND-WFRACTION_USDA-3A1A1A_M/v02"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            self.metadata['Soil Sand Fraction'] = response['b0'] / 100
        else:
            self.metadata['Soil Sand Fraction'] = -1

    def get_soil_group(self):
        dataset_name = "OpenLandMap/SOL/SOL_GRTGROUP_USDA-SOILTAX-HAPLUDALFS_P/v01"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            value = response['grtgroup']
            self.metadata['Soil Great Group'] = value
        else:
            self.metadata['Soil Great Group'] = -1

    def get_elevation(self):
        dataset_name = "CGIAR/SRTM90_V4"
        response = self.sample_gee_data(dataset_name)
        if response != -1:
            value = response['elevation']
            self.metadata['Elevation'] = value
        else:
            self.metadata['Elevation'] = -1

    def get_hydro_data(self):
        drainage_direction = {
            1: 'east',
            2: 'southeast',
            4: 'south',
            8: 'southwest',
            16: 'west',
            32: 'northwest',
            64: 'north',
            128: 'northeast',
            0: 'river mouth',
            -1: 'inland depression',
        }
        
        dataset_name = "MERIT/Hydro/v1_0_1"
        dataset = ee.Image(dataset_name)
        try:
            response = dataset.reduceRegions(ee.Geometry.Point(coords), ee.Reducer.mean()).getInfo()['features'][0]['properties']
            self.metadata['Drainage Direction'] = drainage_direction[response['dir']]
            self.metadata['Upstream Drainage Area (km^2)'] = response['upa']
            self.metadata['Height Above Nearest Drainage (m)'] = response['hnd']
        except:
            self.metadata['Drainage Direction'] = -1
            self.metadata['Upstream Drainage Area (km^2)'] = -1
            self.metadata['Height Above Nearest Drainage (m)'] = -1

    def get_slope(self):
        dataset_name = "CGIAR/SRTM90_V4"
        dataset = ee.Terrain.slope(ee.Image(dataset_name))
        result = dataset.reduceRegions(ee.Geometry.Point(coords), ee.Reducer.max()).getInfo()
        try:
            slope = result['features'][0]['properties']['max']
            self.metadata['Slope (degrees)'] = slope
        except:
            self.metadata['Slope (degrees)'] = -1

    def generate_id(self, zoom=15):
        self.metadata['id'] = h3.geo_to_h3(self.coords[1], self.coords[0], zoom)
    
    def get_place_name(self):
        request_url = f'https://nominatim.geocoding.ai/reverse.php?lat={self.coords[1]}&lon={self.coords[0]}&zoom=18&format=jsonv2&accept-language=en'
        response = requests.get(request_url)
        if response.status_code == 200:
            geocode_data = response.json()
            geocode_data['display_name']
            self.metadata['place_name'] = geocode_data['display_name']
        else:
            self.metadata['place_name'] = -1

    def generate_all(self):
        self.generate_id()
        self.get_clay_content()
        self.get_elevation()
        self.get_hydro_data()
        self.get_landform()
        self.get_sand_content()
        self.get_slope()
        self.get_soil_bulk()
        self.get_soil_group()
        self.get_waterways()
        self.get_place_name()
        for distance in [1, 5, 10]:
            self.get_population(distance)


In [None]:
coords = [114.20615010232092, 4.567702965181374]
metadata = Metadata(coords)
metadata.generate_all()

print(json.dumps(metadata.metadata, sort_keys=False, indent=4))

# Generate metadata for a list of sites

In [None]:
import geopandas as gpd
from tqdm.notebook import tqdm
data = gpd.read_file('../data/sampling_locations/compiled_sites.geojson')
data.head()

In [None]:
all_metadata = []
for site in tqdm(data['geometry']):
    coords = [site.x, site.y]
    metadata = Metadata(coords)
    metadata.generate_all()
    all_metadata.append(metadata.metadata)

In [None]:
features = []
for coords, info in zip(data['geometry'], all_metadata):
    feature_dict = {
        "id": info['id'],
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [
                coords.x,
                coords.y
            ]
        },
        "properties": info
    }
    features.append(feature_dict)

feature_collection = {
    "type": "FeatureCollection",
    "features": features
}

import json
with open('../data/site_metadata/compiled_sites_metadata.geojson', 'w') as f:
    json.dump(feature_collection, f, indent=4)

In [None]:
import requests
endpoint = 'https://api.dev.plastic.watch.earthrise.media/sites'
auth = requests.auth.HTTPBasicAuth('admin', 'plastics')
response = requests.post(endpoint, json=feature_collection, timeout=100, auth=auth)
print(response.status_code, response.text)