In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from tqdm.notebook import tqdm

# Parsing

### Levels

In [11]:
geom_file = gpd.read_file(
    '/workspaces/my_dissertation/geo_data/geometry/russia_ws.gpkg')
geom_file = geom_file.set_index('gauge_id')

In [6]:
heigh_bs = pd.read_csv('./data/levels/height_id.csv')
heigh_bs = heigh_bs.rename(columns={'Unnamed: 0': 'gauge_id'})
heigh_bs['gauge_id'] = list(map(str, heigh_bs['gauge_id']))
heigh_bs = heigh_bs.set_index('gauge_id')
heigh_bs = heigh_bs.drop_duplicates()
heigh_bs

Unnamed: 0_level_0,name,height
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1
75389,ПРОТВА - P.ПPOTBA - C.CПAC-ЗAГOPЬE,119.04
75394,ОСЕТР - P.OCETP - Д.MAPKИHO,107.00
75395,МОСКВА - P.MOCKBA - Д.БAPCУKИ,183.00
75402,МОСКВА - P.MOCKBA - Г.ЗBEHИГOPOД,132.89
75403,МОСКВА - P.MOCKBA - C.ПETPOВО-ДAЛЬHEE,127.50
...,...,...
49048,"Войница (Каарто-йоки, Терва-йоки) - P.BOЙHИЦA ...",101.17
49052,"Ухта (Ухтуа-йоки, Ухтуска) - P.УXTA - ПГT KAЛE...",114.32
49053,Р.НОРВИ-ЙОКИ - P.HOPBИ-ЙOKИ - ПГT KAЛEBAЛA,104.00
49054,Керкешь - P.KEPKEШЬ - УCTЬE,109.84


In [7]:
level_storage = './data/levels/final'
discharge_storage = './data/discharge/final'

In [23]:
dis_file = pd.read_csv(f'{discharge_storage}/{gauge}.csv',
                    index_col='date')
dis_file.index = pd.to_datetime(dis_file.index)
ws_area = geom_file.loc[gauge, 'new_area']
dis_file['q_mm_day'] = (dis_file['discharge'] * 86400)/(ws_area * 1e3)
dis_file = dis_file.rename(columns={'discharge': 'q_cms_s'})
dis_file = dis_file[~dis_file.index.duplicated(keep='first')]
dis_file

Unnamed: 0_level_0,q_cms_s,q_mm_day
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-01-01,,
2008-01-02,,
2008-01-03,,
2008-01-04,,
2008-01-05,,
...,...,...
2020-12-27,1080.0,0.508497
2020-12-28,1080.0,0.508497
2020-12-29,1080.0,0.508497
2020-12-30,1070.0,0.503788


In [19]:
hydro_storage = '/workspaces/my_dissertation/geo_data/great_db/hydro_meteo'

In [24]:
for gauge in heigh_bs.index:
    try:
        m_bs = heigh_bs.loc[gauge, 'height']
        lvl_file = pd.read_csv(f'{level_storage}/{gauge}.csv',
                               index_col='date')
        lvl_file.index = pd.to_datetime(lvl_file.index)
        lvl_bs = heigh_bs.loc[gauge, 'height']
        lvl_file['lvl_mbs'] = lvl_bs + \
            lvl_file['level'] * 1e-2  # type: ignore
        lvl_file = lvl_file.rename(columns={'level': 'lvl_sm'})
        lvl_file = lvl_file[~lvl_file.index.duplicated(keep='first')]
        lvl_file.to_csv(
            f'{hydro_storage}/h/{gauge}.csv')
    except FileNotFoundError:
        lvl_file = pd.DataFrame()
        lvl_file['date'] = pd.date_range(start='01/01/2008',
                                         end='12/31/2020')
        lvl_file = lvl_file.set_index('date')
        lvl_file['lvl_sm'] = np.NaN
        lvl_file['lvl_mbs'] = np.NaN

    try:
        dis_file = pd.read_csv(f'{discharge_storage}/{gauge}.csv',
                               index_col='date')
        dis_file.index = pd.to_datetime(dis_file.index)
        try:
            ws_area = geom_file.loc[gauge, 'new_area']
        except KeyError:
            ws_area = np.NaN
        dis_file['q_mm_day'] = (dis_file['discharge'] * 86400)/(ws_area * 1e3)
        dis_file = dis_file.rename(columns={'discharge': 'q_cms_s'})
        dis_file = dis_file[~dis_file.index.duplicated(keep='first')]
        dis_file.to_csv(
            f'{hydro_storage}/q/{gauge}.csv')
    except FileNotFoundError:
        dis_file = pd.DataFrame()
        dis_file.index = pd.date_range(start='01/01/2008',
                                       end='12/31/2020')
        dis_file['date'] = pd.date_range(start='01/01/2008',
                                         end='12/31/2020')
        dis_file = dis_file.set_index('date')
        dis_file['q_cms_s'] = np.NaN
        dis_file['q_mm_day'] = np.NaN

    res_file = pd.concat([dis_file, lvl_file], axis=1)
    res_file = res_file[['lvl_sm', 'q_cms_s', 'lvl_mbs', 'q_mm_day']]
    res_file.to_csv(
        f'{hydro_storage}/q_h/{gauge}.csv')
