In [6]:
import requests
import os
import pandas as pd

# Column names obtained from CICESE files metadata. None of this files have a header
columns=["anio","mes","dia","hora","minuto","segundo",
         "id_estacion","voltaje_sistema","nivel_mar_leveltrol","nivel_mar_burbujeador",
         "sw_1","sw_2","temperatura_agua","nivel_mar_ott_rsl", "radiacion_solar",
         "direccion_viento", "magnitud_viento", "temperatura_aire","humedad_relativa",
         "presion_atmosferica","precipitacion","voltaje_estacion_met","nivel_mar_sutron"]

# df is the dataframe that will allocate all the data
df = pd.DataFrame()

# We have data from 2011 to 2021. 
for anio in ["2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"]:
    # Define the URL of the directory containing the .dat files
    url = "http://redmar.cicese.mx/emmc/DATA/ENSM/MIN/"+anio+"/"

    # Send a GET request to the URL
    response = requests.get(url)

    # Extract the HTML content of the response
    html_content = response.content.decode('utf-8')

    # Find all the .dat file names in the HTML content
    dat_files = []
    for line in html_content.split('\n'):
        if '.dat' in line:
            filename = line.split('href="')[1][:15]
            dat_files.append(filename)

    # Create a directory to store the downloaded files
    if not os.path.exists('data'):
        os.mkdir('data')

    # Download each .dat file and save it in the data directory
    for filename in dat_files:
        try:
            file_url = url + filename
            file_path = os.path.join('data', filename)
            response = requests.get(file_url)
            with open(file_path, 'wb') as f:
                f.write(response.content)


            # Open the downloaded file and read its content
            with open(file_path, 'r') as f:
                content = f.read()


            # Read the downloaded file using pandas and concatenate it to df
            df_aux = pd.read_csv(file_path, lineterminator='\n', delim_whitespace=True, header=None)
            df = pd.concat([df,df_aux])
        except:
            print(filename, "no se agregó")




ENSM2017.dat">E no se agregó


In [7]:
# Rename df columns with the ones defined before
dict_columns = {}
for col, i in zip(columns, range(len(columns))):
    dict_columns[i] = col
dict_columns
df = df.rename(columns=dict_columns)

# Export csv
df.to_csv("cicese_data.csv")

In [None]:
# Export 1000 rows to have a preview (in Github)
df.head(1000)

In [4]:
df.columns

Index(['anio', 'mes', 'dia', 'hora', 'minuto', 'segundo', 'id_estacion',
       'voltaje_sistema', 'nivel_mar_leveltrol', 'nivel_mar_burbujeador',
       'sw_1', 'sw_2', 'temperatura_agua', 'nivel_mar_ott_rsl',
       'radiacion_solar', 'direccion_viento', 'magnitud_viento',
       'temperatura_aire', 'humedad_relativa', 'presion_atmosferica',
       'precipitacion', 'voltaje_estacion_met', 'nivel_mar_sutron'],
      dtype='object')