In [1]:
# common
import os
BASE_DIR = os.path.dirname(os.path.realpath('__file__'))

from datetime import datetime
from io import BytesIO
from pathlib import Path

# network & protocol
import requests
from requests.exceptions import BaseHTTPError

# computational
import numpy as np
import pandas as pd

In [2]:
start_date = '2012-01-01'
end_date = '2019-12-31'
stations = [
    {
        'name': 'Neak_Luong',
        'lat': 11.26304,
        'lon': 105.28012,
    },
    {
        'name': 'Stung_Treng',
        'lat': 13.5325,
        'lon': 105.95019,
    },
    {
        'name': 'Chaktomuk',
        'lat': 11.56299,
        'lon': 104.93529,
    },
    {
        'name': 'Kompong_Kdei',
        'lat': 13.12889,
        'lon': 105.33509,
    },
    {
        'name': 'Prek_Kdam',
        'lat': 11.81117,
        'lon': 104.80678,
    },
    {
        'name': 'Kg._Thmar',
        'lat': 12.50293,
        'lon': 105.12712,
    },
    {
        'name': 'Koh_Khel',
        'lat': 11.24204,
        'lon': 105.03616,
    },
    {
        'name': 'Lumphat',
        'lat': 13.50088,
        'lon': 106.97115,
    },
    {
        'name': 'Kompong_Cham',
        'lat': 11.91099,
        'lon': 105.3841,
    },
    {
        'name': 'Phnom_Penh_Port',
        'lat': 11.57702,
        'lon': 105.919038,
    },
    {
        'name': 'Voeun_Sai',
        'lat': 13.96858,
        'lon': 106.88483,
    },
    {
        'name': 'Kratie',
        'lat': 12.48141,
        'lon': 106.01762,
    },
    {
        'name': 'Kompong_Chen',
        'lat': 12.9389,
        'lon': 105.57906,
    },
    {
        'name': 'Kompong_Cham',
        'lat': 11.91099,
        'lon': 105.3841,
    },
    {
        'name': 'Kompong_Thom',
        'lat': 12.71483,
        'lon': 104.88792,
    },
    {
        'name': 'Siempang',
        'lat': 14.11514,
        'lon': 106.38795,
    },
    {
        'name': 'Battambang',
        'lat': 13.092,
        'lon': 103.20028,
    },
    {
        'name': 'Sisophon',
        'lat': 13.58665,
        'lon': 102.97661,
    },
]

In [3]:
def is_ed_greater_than_sd(start_date, end_date):
    """
    Checks whether the end date is greater than the start date.

    Given the start date and end date in the format 'YYYY-mm-dd', this function
    returns boolean telling if the end date is greater than the start date.

    Parameters
    ----------
    start_date : str
        The string representation of the start date in the form 'YYY-mm-dd'.
    end_date : str
        The string representation of the end date in the form 'YYY-mm-dd'.
        The end_date must be greater than the start date.
    Returns
    -------
    boolean True if end_date is greater than the start_date else boolean False
    Examples
    --------
    >>> is_greater_than = is_ed_greater_than_sd('2010-01-23', '2010-10-23')
    >>> is_greater_than
        False
    >>> is_greater_than = is_ed_greater_than_sd('2010-01-23', '2010-10-26')
    >>> is_greater_than
        True
    """

    try:
        # convert string date to date object
        sd = datetime.strptime(start_date, '%Y-%m-%d')
        ed = datetime.strptime(end_date, '%Y-%m-%d')
    except Exception as e:
        print('Error occured: ' + str(e))
        print(traceback.format_exc())
        return False

    # check if the end date is greater than the start date
    if ed > sd:
        return True

    return False

In [4]:
def get_spt_prediction(lat, lon, start_date, end_date):
    """
    Gets the Prediction Data from the SPT REST API.

    Given the latitude, longitude, start date and end date, this function uses
    the REST API of the SPT to retrieve the prediction data for the location
    and filters them temporally.

    Learn more about SPT mission here:
    <https://www.arcgis.com/apps/Cascade/index.html?appid=414730116a3c4c119b80ec9d1727ab74>
    The SPT documentation can be found here:
    <https://geoglows.ecmwf.int/documentation>
    Parameters
    ----------
    latitude : float
        The latitude of the streamflow to get the prediction data for.
    longitude : float
        The latitude of the streamflow to get the prediction data for.
    start_date : str
        The string representation of the start date in the form 'YYY-mm-dd'.
    end_date : str
        The string representation of the end date in the form 'YYY-mm-dd'.
        The end_date must be greater than the start date.
    Returns
    -------
    pandas.Series class containing the prediction datasets as float with the
    datetime as the index of the Series.
    dataset.
    Examples
    --------
    >>> predictions = get_spt_prediction(40.13877778, -105.0202222, '2005-01-01', '2010-01-01')
    >>> predictions = get_spt_prediction(lat=40.13877778,
                                         lon=-105.0202222,
                                         start_date='2005-01-01',
                                         end_date='2010-01-01')
    """

    # check if the end date is greater than start date
    if not is_ed_greater_than_sd(start_date, end_date):
        # if not raise user input error with ValueError
        raise ValueError(
            """Either check the format of the dates string.
            It should be 'YYYY-mm-dd' format.
            Or the end_date must be greater than the start_date."""
        )

    # rest api url
    rest_url = 'https://geoglows.ecmwf.int/api/HistoricSimulation/'

    # define the request parameters
    request_params = dict(lat=lat, lon=lon, return_format='csv')

    # send request
    response = requests.get(rest_url, params=request_params)

    # if the request is successful
    if response.status_code == 200:
        # convert the response into pandas dataframe
        prediction = pd.read_csv(BytesIO(response.content))
    else:
        raise BaseHTTPError('Error: Server Status Code {}'.format(response.status_code))

    prediction.rename(columns={'streamflow_m^3/s': 'discharge'}, inplace=True)
    prediction.datetime = pd.to_datetime(prediction.datetime)
    prediction = prediction[(prediction.datetime >= start_date) & (prediction.datetime <= end_date)]
    prediction = prediction.groupby(prediction.datetime.dt.date).mean()
    prediction[prediction.discharge < 0] = np.nan
    prediction = prediction.dropna()
    prediction.sort_values(by=['datetime'], ascending=True, inplace=True)

    return prediction

In [6]:
Path(f'{BASE_DIR}/spt_predictions/').mkdir(parents=True, exist_ok=True)
for station in stations:
    print('------------------------')
    name = station['name']
    print(f'Station: {name}')
    prediction = get_spt_prediction(station['lat'], station['lon'], start_date, end_date)
    prediction.to_csv(f'{BASE_DIR}/spt_predictions/{name}.csv', encoding='utf-8', index=True)

------------------------
Station: Neak_Luong
------------------------
Station: Stung_Treng
------------------------
Station: Chaktomuk
------------------------
Station: Kompong_Kdei
------------------------
Station: Prek_Kdam
------------------------
Station: Kg._Thmar
------------------------
Station: Koh_Khel
------------------------
Station: Lumphat
------------------------
Station: Kompong_Cham
------------------------
Station: Phnom_Penh_Port
------------------------
Station: Voeun_Sai
------------------------
Station: Kratie
------------------------
Station: Kompong_Chen
------------------------
Station: Kompong_Cham
------------------------
Station: Kompong_Thom
------------------------
Station: Siempang
------------------------
Station: Battambang
------------------------
Station: Sisophon
