# Método para calcular la altura de la marea en un instante prefijado

#### Imports

In [3]:
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import seaborn as sns
import datetime
%matplotlib notebook
import matplotlib


from matplotlib.gridspec import GridSpec
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from wordcloud import WordCloud
from calendar import monthrange
from time import process_time, sleep

import sys
from io import StringIO

url = "http://www.hidro.gob.ar/oceanografia/Tmareas/Form_Tmareas.asp"

## ETL

### Paso 1: Obtengo Datos de Mareas de SHN

In [4]:


def extract_tides_info(url):
    df_tides = pd.DataFrame()
    contador = 0
    try:
        driver = webdriver.Chrome()
        driver.get(url)

        df_tides = pd.DataFrame()

        #Obtengo botón del form
        btn_seach = driver.find_element(By.NAME, "B1")

        #Obtengo selects y sus listado de opciones
        fanio_list = []
        fmes_list = []
        location_list = []
        select_FAnio = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "FAnio"))))
        option_list_FAnio = select_FAnio.options
        for fanio in option_list_FAnio:
            fanio_list.append(fanio.text)
        select_location = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "Localidad"))))
        option_list_location = select_location.options
        for location in option_list_location:
            location_list.append(location.text)
        select_FMes = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "FMes"))))
        option_list_FMes = select_FMes.options
        for fmes in option_list_FMes:
            fmes_list.append(fmes.text)

        #Cargo DF con los registros obtenidos por año mes y localidad
        for fanio in fanio_list:
            #Debo volver a abrir el navegador porque cada cierto tiempo chrome se cierra y rompe el proceso
            driver.close()
            driver = webdriver.Chrome()
            driver.get(url)            

            #Obtengo botón del form
            btn_seach = driver.find_element(By.NAME, "B1")
            for location in location_list:

                for fmes in fmes_list:

                    select_FAnio = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "FAnio"))))
                    select_location = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "Localidad"))))
                    select_FMes = Select(WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element(By.NAME, "FMes"))))

                    print(fanio,location,fmes)
                    #Seteo valores en select
                    select_FAnio.select_by_visible_text(fanio)
                    select_location.select_by_visible_text(location)
                    select_FMes.select_by_visible_text(fmes) 

                    #Ejecuto consulta        
                    btn_seach.click()
                    driver.implicitly_wait(5)

                    iframe = driver.find_element(By.TAG_NAME, "iframe")
                    driver.switch_to.frame(iframe)
                    

                    str_table_tides=''
                    #Son dos tablas por consulta
                    for div_num in range(1, 3): 
                        #Evalúo si el primer registro corresponde a Pm(pleamar) o Bm(bajamar)
                        height_1 = driver.find_element(By.XPATH, "//*[@class='LetraMasChica']/div/div["+str(div_num)+"]/div/table/tbody/tr[1]/td[3]").text 
                        height_2 = driver.find_element(By.XPATH, "//*[@class='LetraMasChica']/div/div["+str(div_num)+"]/div/table/tbody/tr[2]/td[3]").text 

                        if height_1>height_2:
                            first_tide = 'Pm'
                        else:
                            first_tide = 'Bm'
                        
                        
                        # Obtain the number of rows in body
                        rows = len(driver.find_elements(By.XPATH, "//*[@class='LetraMasChica']/div/div["+str(div_num)+"]/div/table/tbody/tr")) 

                        # Obtain the number of columns in table 
                        cols = len(driver.find_elements(By.XPATH, "//*[@class='LetraMasChica']/div/div["+str(div_num)+"]/div/table/tbody/tr[1]/td"))

                        # Obtengo texto por fila y columna 
                        for r in range(1, rows+1): 

                            for p in range(1, cols+1):    

                                value = driver.find_element(By.XPATH, "//*[@class='LetraMasChica']/div/div["+str(div_num)+"]/div/table/tbody/tr["+str(r)+"]/td["+str(p)+"]").text 

                                if value !='':
                                    if p == 1:
                                        day=value                            
                                    str_table_tides = str_table_tides + value
                                    if p == 3:
                                        str_table_tides = str_table_tides + ';'
                                        if first_tide == 'Pm':
                                            if r%2 != 0:
                                                str_table_tides = str_table_tides + 'Pm' + '\n'
                                            else:
                                                str_table_tides = str_table_tides + 'Bm' + '\n'
                                        else:
                                            if r%2 != 0:
                                                str_table_tides = str_table_tides + 'Bm' + '\n'
                                            else:
                                                str_table_tides = str_table_tides + 'Pm' + '\n'
                                    else:
                                        str_table_tides = str_table_tides + ';'
                                else:
                                    str_table_tides = str_table_tides + day + ';'
                    
                    #Cargo tabla en DF
                    str_table_tides = 'day;hour(min);height(m);tide_type\n' + str_table_tides       
                    data_str_io = StringIO(str_table_tides)
                    tablaok=pd.read_csv(data_str_io,sep=';')
                    
                    tablaok.insert(3,'month',fmes)
                    tablaok.insert(4,'location',location)
                    tablaok.insert(5,'year',fanio)
                    driver.switch_to.default_content();
                    
                    df_tides=pd.concat([df_tides,tablaok], axis=0)
                    
    finally:
        driver.close()

    return df_tides



In [5]:
start = process_time()
df_tides=extract_tides_info(url)
end = process_time()

print("El proceso demoró: " + str(end-start))

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=122.0.6261.69)
Stacktrace:
#0 0x5624ec460793 <unknown>
#1 0x5624ec1541c6 <unknown>
#2 0x5624ec12ad7d <unknown>
#3 0x5624ec1c9c6d <unknown>
#4 0x5624ec1d058c <unknown>
#5 0x5624ec1c1398 <unknown>
#6 0x5624ec1922d3 <unknown>
#7 0x5624ec192c9e <unknown>
#8 0x5624ec4248cb <unknown>
#9 0x5624ec428745 <unknown>
#10 0x5624ec4112e1 <unknown>
#11 0x5624ec4292d2 <unknown>
#12 0x5624ec3f517f <unknown>
#13 0x5624ec44edc8 <unknown>
#14 0x5624ec44efc3 <unknown>
#15 0x5624ec45f944 <unknown>
#16 0x7f9d88c94ac3 <unknown>


In [22]:
df_tides.tail
#df_tides_ok=pd.read_csv("df_tides_ok_1.csv")
df_tides.to_csv("df_tides_ok_2.csv", encoding='utf-8')

In [5]:

#df_tides.to_csv('df_tides_ok.csv', encoding='utf-8')

In [7]:
df_tides.columns


Index(['day', 'hour(min)', 'height(m)', 'month', 'location', 'year'], dtype='object')

In [8]:
print(df_tides)

      day hour(min) height(m)      month        location  year
1   01:41      1,38        Pm      Enero         ATALAYA  2022
1   08:27      0,62        Bm      Enero         ATALAYA  2022
1   13:36      1,22        Pm      Enero         ATALAYA  2022
1   20:33      0,41        Bm      Enero         ATALAYA  2022
2   02:37      1,33        Pm      Enero         ATALAYA  2022
..    ...       ...       ...        ...             ...   ...
30  20:42      1,01        Pm  Diciembre  SANTA TERESITA  2024
31  02:50      0,66        Bm  Diciembre  SANTA TERESITA  2024
31  09:09      1,43        Pm  Diciembre  SANTA TERESITA  2024
31  16:19      0,55        Bm  Diciembre  SANTA TERESITA  2024
31  21:16      0,98        Pm  Diciembre  SANTA TERESITA  2024

[199006 rows x 6 columns]


In [17]:
df_tides_ok.tail()

Unnamed: 0,day,hour(min),height(m),tide_type,month,location,year
199001,30,20:42,101,Pm,Diciembre,SANTA TERESITA,2024
199002,31,02:50,66,Bm,Diciembre,SANTA TERESITA,2024
199003,31,09:09,143,Pm,Diciembre,SANTA TERESITA,2024
199004,31,16:19,55,Bm,Diciembre,SANTA TERESITA,2024
199005,31,21:16,98,Pm,Diciembre,SANTA TERESITA,2024


1
