In [17]:
# common
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))

from datetime import datetime, timedelta
import json

from io import BytesIO
from pathlib import Path

# network & protocol
import requests
from requests.exceptions import BaseHTTPError

# computational
import numpy as np
import pandas as pd

In [18]:
start_date = '1990-01-01'
end_date = '2019-12-31'
date_tail = 'T00:00:00.000Z'
full_dates = pd.date_range(datetime.strptime(start_date, '%Y-%m-%d'),
                           datetime.strptime(end_date, '%Y-%m-%d'), freq='d')\
             .strftime('%Y-%m-%d').tolist()

In [19]:
# stations with no discharge to calculate rating curves
# Kg. Chhnang, Kompong Luong,
stations = [
    {
        'name': 'Battambang',
        'skip': False,
        'fitting': 'poly_3',
    },
    {
        'name': 'Chaktomuk',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Kg._Thmar',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Koh_Khel',
        'fitting': 'exp',
        'skip': False,
    },
    {
        'name': 'Kompong_Cham',
        'fitting': 'poly_3',
        'skip': False,
    },
    {
        'name': 'Kompong_Chen',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Kompong_Kdei',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Kompong_Thom',
        'fitting': 'poly_3',
        'skip': False,
    },
    {
        'name': 'Kratie',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Lumphat',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Neak_Luong',
        'fitting': 'poly_3',
        'skip': False,
    },
    {
        'name': 'Phnom_Penh_Port',
        'fitting': 'exp',
        'skip': False,
    },
    {
        'name': 'Prek_Kdam',
        'fitting': 'exp',
        'skip': False,
    },
    {
        'name': 'Siempang',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Sisophon',
        'fitting': 'poly_3',
        'skip': False,
    },
    {
        'name': 'Stung_Treng',
        'fitting': 'poly_2',
        'skip': False,
    },
    {
        'name': 'Voeun_Sai',
        'fitting': 'poly_2',
        'skip': False,
    },
]

In [20]:
count = (datetime.strptime(end_date, '%Y-%m-%d') - datetime.strptime(start_date, '%Y-%m-%d')).days + 1
count

10957

In [21]:
def open_load_json(file_path, eqn):
    f = open(file_path)
    parameters = json.load(f)
    f.close()
    if eqn == 'exp':
        return parameters['m'], parameters['c']
    elif eqn == 'poly_2':
        return parameters['a'], parameters['b'], parameters['c']
    elif eqn == 'poly_3':
        return parameters['w'], parameters['x'], parameters['y'], parameters['c']
    else:
        raise ValueError('only allowed fittings are exponential (as exp), 2nd order polynomial (as poly_2), and 3rd order polynomial (as poly_3)')


In [31]:
Path(f'{BASE_DIR}/mrc_observations').mkdir(parents=True, exist_ok=True)

no_fit_stations = ['Phnom_Penh_Port', 'Prek_Kdam']

for _station in stations:
    station = _station['name']
    if station not in no_fit_stations:
        print('-------------------------------------------')
        print(f'station: {station}')

        df_discharge = pd.read_csv(f'{BASE_DIR}/pre_rating_curves/{station}/discharge.csv', index_col='datetime')
        df_discharge.index = pd.to_datetime(df_discharge.index)

        df_stage = pd.read_csv(f'{BASE_DIR}/pre_rating_curves/{station}/stage.csv', index_col='datetime')
        df_stage.index = pd.to_datetime(df_stage.index)

        discharge_mask = (df_discharge.index >= start_date) & (df_discharge.index <= end_date)
        stage_mask = (df_stage.index >= start_date) & (df_stage.index <= end_date)

        df_discharge = df_discharge.loc[discharge_mask]
        df_stage = df_stage.loc[stage_mask]

        if len(df_discharge) == count:
            df_discharge = df_discharge.sort_index()
            df_discharge.to_csv(f'{BASE_DIR}/mrc_observations/{station}.csv', encoding='utf-8', index=False)

        if len(df_discharge) > 0:
            dates = df_discharge.index.tolist()
            dates = [date.strftime('%Y-%m-%d') for date in dates]
        else:
            dates = []

        remaining_dates = [datetime.strptime(_date, '%Y-%m-%d').date() for _date in full_dates if _date not in dates]
        df_stage = df_stage.loc[(df_stage.index.isin(remaining_dates)), :]

        fit_eqn = _station['fitting']

        stage = df_stage.stage

        if fit_eqn == 'exp':
            m, c = open_load_json(f'{BASE_DIR}/rating_curves/{station}_parameters.json', fit_eqn)
            # exponential: Q = 10 ^ c * stage ^ m
            df_stage['discharge'] =  np.multiply(np.power(10, c), np.power(stage, m))
        elif fit_eqn == 'poly_2':
            a, b, c = open_load_json(f'{BASE_DIR}/rating_curves/{station}_parameters.json', fit_eqn)
            # Q = a*d^2 + b*d + c
            df_stage['discharge'] = np.multiply(a, np.power(stage, 2)) + np.multiply(b, stage) + c
        elif fit_eqn == 'poly_3':
            w, x, y, c = open_load_json(f'{BASE_DIR}/rating_curves/{station}_parameters.json', fit_eqn)
            # Q = w*d^3 + x*d^2 + y*d + c
            df_stage['discharge'] = np.multiply(w, np.power(stage, 3)) + np.multiply(x, np.power(stage, 2)) + np.multiply(y, stage) + c
        else:
            raise ValueError('only allowed fittings are exponential (as exp), 2nd order polynomial (as poly_2), and 3rd order polynomial (as poly_3)')

        df_stage = df_stage.drop(['stage', 'manual'], axis = 1)
        df_discharge = df_discharge.drop(['calculated'], axis=1)
        

        df = pd.concat([df_stage, df_discharge])
        df = df.sort_index()
        df.to_csv(f'{BASE_DIR}/mrc_observations/{station}.csv', encoding='utf-8', index=True)

-------------------------------------------
station: Battambang
before len(df_discharge): 2344
before len(df_stage): 10933
after len(df_discharge): 1992
after len(df_stage): 7422
-------------------------------------------
station: Chaktomuk
before len(df_discharge): 13765
before len(df_stage): 20273
after len(df_discharge): 4203
after len(df_stage): 10957
-------------------------------------------
station: Kg._Thmar
before len(df_discharge): 2556
before len(df_stage): 8826
after len(df_discharge): 2191
after len(df_stage): 8095
-------------------------------------------
station: Koh_Khel
before len(df_discharge): 3653
before len(df_stage): 11076
after len(df_discharge): 3653
after len(df_stage): 10710
-------------------------------------------
station: Kompong_Cham
before len(df_discharge): 15706
before len(df_stage): 33238
after len(df_discharge): 4748
after len(df_stage): 10957
-------------------------------------------
station: Kompong_Chen
before len(df_discharge): 1807
before