In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
import pandas as pd
import csv
import glob

def enable_download_headless(browser,
                             download_dir):
    browser.command_executor._commands["send_command"] = (
        "POST",'/session/$sessionId/chromium/send_command'
        )
    params = {'cmd':'Page.setDownloadBehavior',
              'params': {'behavior': 'allow', 'downloadPath': download_dir}}
    browser.execute("send_command", params)

dw_path = "/home/anton/dima_experiments/esimo_loader/test_folder/"
options = Options()
# set profile for chrome
options = Options()
options.add_argument("--disable-notifications")
options.add_argument('--no-sandbox')
options.add_argument('--verbose')
options.add_experimental_option("prefs", {
    "download.default_directory": f'{dw_path}',
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
})
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--headless')

driver = webdriver.Chrome(service=Service("/usr/bin/chromedriver"),
                          options=options)

enable_download_headless(driver, f"{dw_path}")

esimo = 'http://portal.esimo.ru'
viewer = 'dataview/viewresource?resourceId=RU_RIHMI-WDC_1325_1'
website = f'{esimo}/{viewer}'
driver.get(website)
time.sleep(240)
driver.find_element(by=By.CSS_SELECTOR,
                    value="div.portlet-form-button.portlet-icon.icon-tools").click()
time.sleep(180)
driver.find_element(by=By.CSS_SELECTOR,
                    value="#display-analytics-export-csv").click()
time.sleep(180)

driver.quit()




## Initial level observations

In [8]:
# saver
with open('./data.csv', newline='') as csvfile:
    data = csv.reader(csvfile, delimiter=',', quotechar='"')
    res_str = next(data)

res_str = [word.replace(',', '') for word in res_str]

file = pd.read_csv('./data.csv',
                   sep=',', names=res_str,
                   skiprows=1, skipfooter=1,
                   on_bad_lines='skip',
                   quotechar='"',
                   engine='python', encoding='utf-8')

group_gauge = file.groupby(
    by='Платформа: идентификатор локальный').groups

result_folder = '/home/anton/dima_experiments/geo_data/esimo_data'

for gauge_id, loc_index in group_gauge.items():

    gauge = file.loc[loc_index][['Платформа: идентификатор локальный',
                                 'Дата и время',
                                 'Уровень воды над нулем поста']]
    gauge = gauge.rename(
        columns={'Платформа: идентификатор локальный': 'gauge_id',
                 'Дата и время': 'date',
                 'Уровень воды над нулем поста': 'level'})
    gauge['date'] = pd.to_datetime(gauge['date'])
    gauge = gauge.set_index('date')
    res = gauge[['level']].groupby(by=pd.Grouper(freq='1d')).mean()
    res.to_csv(f'{result_folder}/{gauge_id}.csv')

## Extend with new upload from ESIMO

In [18]:
hdd = '/home/anton/dima_experiments'


In [20]:
not bool(glob.glob(f'{hdd}/geo_data/esimo_data/*.csv'))

True

In [21]:
bool(glob.glob(f'{hdd}/geo_data/esimo_data/*.csv'))

False

In [22]:
glob.glob(f'{hdd}/geo_data/esimo_data/*.csv')

[]

In [12]:
# merger
existed_vals = glob.glob(f'{result_folder}/*.csv')


In [13]:
with open('./dw_folder/data.csv', newline='') as csv_new_file:
    data = csv.reader(csv_new_file, delimiter=',', quotechar='"')
    res_str = next(data)

res_str = [word.replace(',', '') for word in res_str]

new_file = pd.read_csv('./test_folder/data.csv',
                       sep=',', names=res_str,
                       skiprows=1, skipfooter=1,
                       on_bad_lines='skip',
                       quotechar='"',
                       engine='python', encoding='utf-8')

group_gauge = new_file.groupby(
    by='Платформа: идентификатор локальный').groups

In [14]:
for gauge_id, loc_index in group_gauge.items():

    new_gauge = new_file.loc[loc_index][['Платформа: идентификатор локальный',
                                 'Дата и время',
                                 'Уровень воды над нулем поста']]
    new_gauge = new_gauge.rename(
        columns={'Платформа: идентификатор локальный': 'gauge_id',
                 'Дата и время': 'date',
                 'Уровень воды над нулем поста': 'level'})
    new_gauge['date'] = pd.to_datetime(new_gauge['date'])
    new_gauge = new_gauge.set_index('date')
    new_res = new_gauge[['level']].groupby(by=pd.Grouper(freq='1d')).mean()
    try:
        old_res = pd.read_csv(f'{result_folder}/{gauge_id}.csv')
        old_res['date'] = pd.to_datetime(old_res['date'])
        old_res = old_res.set_index('date')
        
        res = old_res.combine_first(new_res)
        res.to_csv(f'{result_folder}/{gauge_id}.csv')
    except FileNotFoundError:
        new_res.to_csv(f'{result_folder}/{gauge_id}.csv')
    
