In [1]:
# common
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))

from datetime import datetime, timedelta
from io import BytesIO
from pathlib import Path
import json

# network & protocol
import requests
from requests.exceptions import BaseHTTPError

# computational
import numpy as np
import pandas as pd


In [2]:
# stations with no discharge to calculate rating curves
# Kg. Chhnang, Kompong Luong, 
date_tail = 'T00:00:00.000Z'
stations = [
    {
        'name': 'Neak_Luong',
        'discharge_id': '661d84572a544af4b7f28d9dc685d4a8',
        'stage_id': '0c55bc1fb44e4caf881226bbf8a6e62a',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Stung_Treng',
         # [ calculated, measured ]
        'discharge_id': ['2066a06f57764afca57a6be0f3eff8a7', 'dcd4b92a9d9341859f5f1658836166f1'],
        # [ manual, telemetry ]
        'stage_id': ['000e84564d1b425ebcc2c2292ed14061', '0a723f8802e44a6bb12c08dcf1816a83'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Chaktomuk',
        'discharge_id': '983ed55df7e04410b23fc6d72eb8f235',
        'stage_id': ['7f84c4d851a543d1a5eccc53947e0d3c', '2c070f6a900b41e7ad19bafef374f52d'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kompong_Kdei',
        'discharge_id': '2d363b26ee524761b5d2da2a0ef59613',
        'stage_id': '75ba98758e7c4eb3b105f733db150307',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Prek_Kdam',
        'discharge_id': ['5579eb91df394552bac8c939724ba6cc', '6615982ab17b4a38a5269e5ba4ef8a4f'],
        'stage_id': ['81e259eb15f64135834fe9e388ee7fbb', '5a7037f272c745d5bcafcc16a5c2f89c'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kg._Thmar',
        'discharge_id': '2575ba232eb1406a8ce460de9fd5849a',
        'stage_id': '549c7f63d6dc4392b233b2049bd23d11',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Koh_Khel',
        'discharge_id': 'e2b360019f3f4c438766ab8ed04286aa',
        'stage_id': '64758ae8f24a46759d962ab9a2dea27a',
#         'start_date': '1990-12-31',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Lumphat',
        'discharge_id': 'f17adb5f893a4f40924f698978906b87',
        'stage_id': ['93337ffe7838467289bf5b55401ee6f8', '6955b9585cc94524bcff184034e4afd3'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kompong_Cham',
        'discharge_id': '30f15c16035f4bcfbac36fc545fb132a',
        'stage_id': 'b5da800e9c8e4346a3b8d9d6daa73e0a',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Phnom_Penh_Port',
        'discharge_id': 'e63fa05dafea4873bd44574125a97495',
        'stage_id': 'cd94408bcf944bc88033cf05675034fd',
#         'start_date': '1989-12-31',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Voeun_Sai',
        'discharge_id': 'eacd08c3a27e4fc5a3aa1af35792f6e3',
        'stage_id': ['d177b0f2dae547ceb9bc7c23ef2970ae', 'e821b15baedd4af2a4ecde624b3bc976'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kratie',
        'discharge_id': ['17c0ffe74e224b50a9e3142fc404262b', 'ecc50c5540564a27bb9f7f147027bd9f'],
        'stage_id': ['fca7bee14dcf43a59263158f18a88509', '39b735a5de5c440cb0bc789fb9fdd544'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kompong_Chen',
        'discharge_id': '0ef0b44545814f8fad8905bae66a4f11',
        'stage_id': '53359074bfcf4961866daac9fbee8292',
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Kompong_Thom',
        'discharge_id': '6b283ac2d44e476eba80325fba62b0b7',
        'stage_id': ['dae00e365f134b4bbfa6bf4162eb815c', 'aaebd95890ca49b9b5b5a82699159d6d'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Siempang',
        'discharge_id': '3f588063c8bd4f06a477251701ec6ab0',
        'stage_id': ['6a49fa1f6cb247d39b9e6c405205bfa3', '5794c0c5c5c045639e0efa1540466828'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Battambang',
        'discharge_id': '09cc6bff5c604eafb79eaf233ab63bf1',
        'stage_id': ['cba718574b1f4244a4ae2ef58806732d', '8456239a6baf4221a744886c8cb6efd3'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
    {
        'name': 'Sisophon',
        'discharge_id': '298b8492ba45480888b8043d2c9fa17f',
        'stage_id': ['2a8a001888a84456916ff7a89be300e7', 'f08efea6bd684203ab33c5a316e2459a'],
        'start_date': '1900-01-01',
        'end_date': '2020-12-31',
    },
]

In [3]:
len(stations)

17

In [4]:
_stations = pd.read_csv(f'{BASE_DIR}/other_exports/1990_2019_stations.csv')
stations_name = _stations.stationname.unique()

stations_ = [' '.join(station['name'].split('_')) for station in stations]

set(stations_name) == set(stations_)

True

In [5]:
# some needed variables
base_url = 'https://api.mrcmekong.org/api/v1/ts/highcharts'
# request_param = dict(sd=sd, ed=ed)
headers = {
    'x-mrc-data-portal-chart-key': '56b6f095-2243-4d73-9bcf-57600ef1f38b',
    'Cache-Control': 'no-cache',
    'Pragma': 'no-cache',
    'User-Agent': 'PostmanRuntime/7.26.10',
    'Connection': 'keep-alive',
    'Accept': '*/*',
}

In [6]:
sess = requests.Session()

In [7]:
def get_data(url, request_param, headers, column):
#     print(f'requesting: {url}')
    try:
        res = sess.get(url, params=request_param, headers=headers, timeout=600)
    except requests.Timeout as err:
        print(f'timeout error: {err}')
    except request.RequestException as err:
        print(f'other error: {err}')
    else:
        if res.status_code == 200:
            response = json.loads(res.content)
            observations = response['series'][0]['data']
            print(f'len(observations): {len(observations)}')
            if len(observations):
                df = pd.DataFrame(observations, columns=['datetime', column])
                df.datetime = pd.to_datetime(df.datetime, unit='ms', origin='unix')
    #             df.datetime = df.datetime.apply(lambda d: datetime.datetime.utcfromtimestamp(d/1000.0).strftime('%Y-%m-%d %H:%M:%S'))
                df[df[column] < 0] = np.nan
                df = df.dropna()
                return df
            else:
                print(f'no observations')
        else:
            print(f'server returned status code {res.status_code}')
    return None

In [8]:
cols = ['datetime', 'value']
# discharge stations
for station in stations:
    print('--------------------------------')
    print(f"station {station['name']}")

    discharge_id = station['discharge_id']
    discharge_id = np.array([discharge_id] if discharge_id is not isinstance(discharge_id, list) else discharge_id).flatten().tolist()
    stage_id = station['stage_id']
    stage_id = np.array([stage_id] if stage_id is not isinstance(stage_id, list) else stage_id).flatten().tolist()
    _ids = discharge_id + stage_id
    print(f'ids: {_ids}')
    request_param = dict(sd=f'{station["start_date"]}{date_tail}', ed=f'{station["end_date"]}{date_tail}')

    for _id in _ids:
        print(f'id: {_id}')
        df = get_data(f'{base_url}/{_id}', request_param, headers, cols[1])
        if df is not None:
            df = df.groupby(df.datetime.dt.date).mean()
            Path(f'{BASE_DIR}/mrc_download').mkdir(parents=True, exist_ok=True)
            df.to_csv(f'{BASE_DIR}/mrc_download/{_id}.csv', encoding='utf-8', index=True)
        else:
            print('empty df or error')

--------------------------------
station Neak_Luong
ids: ['661d84572a544af4b7f28d9dc685d4a8', '0c55bc1fb44e4caf881226bbf8a6e62a']
id: 661d84572a544af4b7f28d9dc685d4a8
len(observations): 10452
id: 0c55bc1fb44e4caf881226bbf8a6e62a
len(observations): 38811
--------------------------------
station Stung_Treng
ids: ['2066a06f57764afca57a6be0f3eff8a7', 'dcd4b92a9d9341859f5f1658836166f1', '000e84564d1b425ebcc2c2292ed14061', '0a723f8802e44a6bb12c08dcf1816a83']
id: 2066a06f57764afca57a6be0f3eff8a7
len(observations): 40543
id: dcd4b92a9d9341859f5f1658836166f1
len(observations): 136
id: 000e84564d1b425ebcc2c2292ed14061
len(observations): 45608
id: 0a723f8802e44a6bb12c08dcf1816a83
len(observations): 305736
--------------------------------
station Chaktomuk
ids: ['983ed55df7e04410b23fc6d72eb8f235', '7f84c4d851a543d1a5eccc53947e0d3c', '2c070f6a900b41e7ad19bafef374f52d']
id: 983ed55df7e04410b23fc6d72eb8f235
len(observations): 13765
id: 7f84c4d851a543d1a5eccc53947e0d3c
len(observations): 25505
id: 2c0