In [57]:
import requests
import os
import pandas as pd

# Column names obtained from CICESE files metadata. None of this files have a header
columns=["anio","mes","dia","hora","minuto","segundo",
         "id_estacion","voltaje_sistema","nivel_mar_leveltrol","nivel_mar_burbujeador",
         "sw_1","sw_2","temperatura_agua","nivel_mar_ott_rsl", "radiacion_solar",
         "direccion_viento", "magnitud_viento", "temperatura_aire","humedad_relativa",
         "presion_atmosferica","precipitacion","voltaje_estacion_met","nivel_mar_sutron"]

# df is the dataframe that will allocate all the data
df = pd.DataFrame()

# We have data from 2011 to 2021. 
for anio in ["2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"]:
    # Define the URL of the directory containing the .dat files
    url = "http://redmar.cicese.mx/emmc/DATA/ENSM/MIN/"+anio+"/"

    # Send a GET request to the URL
    response = requests.get(url)

    # Extract the HTML content of the response
    html_content = response.content.decode('utf-8')

    # Find all the .dat file names in the HTML content
    dat_files = []
    for line in html_content.split('\n'):
        if '.dat' in line:
            filename = line.split('href="')[1][:15]
            dat_files.append(filename)

    # Create a directory to store the downloaded files
    if not os.path.exists('data'):
        os.mkdir('data')

    # Download each .dat file and save it in the data directory
    for filename in dat_files:
        file_url = url + filename
        file_path = os.path.join('data', filename)
        response = requests.get(file_url)
        with open(file_path, 'wb') as f:
            f.write(response.content)


        # Open the downloaded file and read its content
        with open(file_path, 'r') as f:
            content = f.read()


        # Read the downloaded file using pandas and concatenate it to df
        df_aux = pd.read_csv(file_path, lineterminator='\n', delim_whitespace=True, header=None)
        df = pd.concat([df,df_aux])




KeyboardInterrupt: 

In [68]:
# Rename df columns with the ones defined before
dict_columns = {}
for col, i in zip(columns, range(len(columns))):
    dict_columns[i] = col
dict_columns
df = df.rename(columns=dict_columns)

# Export csv
df.to_csv("cicese_data.csv")

In [69]:
df

Unnamed: 0,anio,mes,dia,hora,minuto,segundo,id_estacion,voltaje_sistema,nivel_mar_leveltrol,nivel_mar_burbujeador,...,nivel_mar_ott_rsl,radiacion_solar,direccion_viento,magnitud_viendo,temperatura_aire,humedad_relativa,presion_atmosferica,precipitacion,voltaje_estacion_met,nivel_mar_sutron
0,2011,1.0,1.0,0.0,1.0,0.0,101.0,12.79,1.759,2.574,...,9.999,9999.9,999.0,99.9,14.3,999.9,1018.90,999.99,99.9,9.999
1,2011,1.0,1.0,0.0,2.0,0.0,101.0,12.80,1.755,2.580,...,9.999,9999.9,999.0,99.9,14.4,999.9,1018.73,999.99,99.9,9.999
2,2011,1.0,1.0,0.0,3.0,0.0,101.0,12.79,1.766,2.569,...,9.999,9999.9,999.0,99.9,14.5,999.9,1018.76,999.99,99.9,9.999
3,2011,1.0,1.0,0.0,4.0,0.0,101.0,12.78,1.778,2.557,...,9.999,9999.9,999.0,99.9,14.6,999.9,1018.73,999.99,99.9,9.999
4,2011,1.0,1.0,0.0,5.0,0.0,101.0,12.77,1.769,2.563,...,9.999,9999.9,999.0,99.9,14.5,999.9,1018.74,999.99,99.9,9.999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,2016,10.0,1.0,23.0,55.0,0.0,101.0,14.41,1.971,1.976,...,2.672,155.7,200.0,2.2,21.0,78.3,1011.09,251.37,14.2,2.255
1436,2016,10.0,1.0,23.0,56.0,0.0,101.0,14.08,1.974,1.984,...,2.659,121.3,203.0,1.7,21.0,78.2,1011.10,251.37,14.2,2.251
1437,2016,10.0,1.0,23.0,57.0,0.0,101.0,14.53,1.978,1.988,...,2.640,226.7,200.0,1.6,21.0,78.1,1011.10,251.37,14.2,2.255
1438,2016,10.0,1.0,23.0,58.0,0.0,101.0,14.66,1.978,2.004,...,2.652,215.0,187.0,1.7,21.0,78.0,1011.10,251.37,14.2,2.261
