In [None]:
import requests
import json
import pandas as pd

def fetch_weather_data(station_id, start_date, end_date):
    url = "https://inamhi.gob.ec/api_rest/station_data_hourly/data"
    payload = json.dumps({
        "id_estacion": str(station_id),
        "table_names": ["017140801h"],  # Correct table name for PRECIPITACION
        "start_date": start_date,
        "end_date": end_date
    })
    headers = {'Content-Type': 'application/json'}
    
    print(f"Fetching data for station: {station_id}")
    response = requests.post(url, headers=headers, data=payload)
    
    if response.status_code != 200:
        print(f"Failed to retrieve data: {response.status_code}")
        print(f"Response: {response.text}")
        return pd.DataFrame()
    
    data = response.json()
    print("Response Data:", data)  # Log the response data to verify
    
    # Parse the data into a DataFrame
    flattened_data = []
    for measurement in data:
        variable_name = measurement['name']
        for entry in measurement['data']:
            flattened_data.append({
                "fecha": entry.get('fecha'),
                variable_name: entry.get('valor')
            })

    df = pd.DataFrame(flattened_data)
    
    # Check if 'fecha' column exists
    if 'fecha' not in df.columns:
        print(f"No 'fecha' column found in the response for station {station_id}")
        return pd.DataFrame()
    
    df['fecha'] = pd.to_datetime(df['fecha'])
    df = df.groupby('fecha').first().reset_index()
    
    return df

# Testing for station 5 on the date 2024-10-09
station_id = 5
start_date = "2024-10-09"
end_date = "2024-10-09"

df_weather = fetch_weather_data(station_id, start_date, end_date)
print(df_weather)

In [5]:
import pandas as pd
import requests
import json
from datetime import datetime, timedelta

# Function to fetch weather data
def fetch_weather_data(start_date, end_date, station_id):
    url = "https://inamhi.gob.ec/api_rest/station_data_hourly/data"
    payload = json.dumps({
        "id_estacion": str(station_id),
        "table_names": ["017140801h"],  # Correct table name for PRECIPITACION
        "start_date": start_date,  # Ensure this is in YYYY-MM-DD format
        "end_date": end_date       # Ensure this is in YYYY-MM-DD format
    })
    headers = {'Content-Type': 'application/json'}
    response = requests.post(url, headers=headers, data=payload)

    if response.status_code == 400:
        raise Exception(f"HTTP 400 - Bad Request for station {station_id}")

    if response.status_code != 200:
        raise Exception(f"Failed to retrieve data for station {station_id}: {response.status_code}")

    data = response.json()

    if not data:
        print(f"No data returned for station {station_id}.")
        return pd.DataFrame()  # Return an empty DataFrame if no data

    flattened_data = []

    for measurement in data:
        variable_name = measurement['name']
        for entry in measurement['data']:
            flattened_data.append({
                "fecha": entry.get('fecha'),
                variable_name: entry.get('valor')
            })

    df = pd.DataFrame(flattened_data)
    
    # Filter out rows where 'fecha' does not contain time information (i.e., rows with date only)
    df['fecha'] = pd.to_datetime(df['fecha'], errors='coerce')  # Convert to datetime, invalid dates will become NaT
    df = df.dropna(subset=['fecha'])  # Remove rows with NaT values in 'fecha' column
    
    # Remove 'PRECIPITACION ACUMULADA DIARIA' column if it exists
    if 'PRECIPITACION ACUMULADA DIARIA' in df.columns:
        df = df.drop(columns=['PRECIPITACION ACUMULADA DIARIA'])

    return df

# Load station IDs from CSV files
epmaps_stations = pd.read_csv('EPMAPS_stations.csv')  # Update with your actual path
fonag_stations = pd.read_csv('FONAG_stations.csv')    # Update with your actual path

# Extract unique station IDs and their proprietary information
epmaps_station_ids = epmaps_stations['id_estacion'].unique()
fonag_station_ids = fonag_stations['id_estacion'].unique()

# Combine station IDs from both sources
all_station_ids = list(set(epmaps_station_ids) | set(fonag_station_ids))

# Loop through each station and fetch the raw data
for station_id in all_station_ids:
    try:
        start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')  # Last 30 days for example
        end_date = datetime.now().strftime('%Y-%m-%d')
        raw_data = fetch_weather_data(start_date, end_date, station_id)
        
        if not raw_data.empty:
            raw_data['station_id'] = station_id
            # Save the raw data to a CSV file for each station
            raw_data.to_csv(f'station_{station_id}_raw_data.csv', index=False)
        else:
            print(f"No data for station {station_id}")

    except Exception as e:
        print(f"Failed to fetch data for station {station_id}: {e}")


Failed to fetch data for station 64036: HTTP 400 - Bad Request for station 64036
No data returned for station 64422.
No data for station 64422
No data returned for station 64423.
No data for station 64423
No data returned for station 64424.
No data for station 64424
Failed to fetch data for station 64425: HTTP 400 - Bad Request for station 64425
Failed to fetch data for station 64426: HTTP 400 - Bad Request for station 64426
Failed to fetch data for station 64434: HTTP 400 - Bad Request for station 64434
Failed to fetch data for station 63795: HTTP 400 - Bad Request for station 63795
Failed to fetch data for station 63802: HTTP 400 - Bad Request for station 63802
Failed to fetch data for station 63804: HTTP 400 - Bad Request for station 63804
Failed to fetch data for station 63805: HTTP 400 - Bad Request for station 63805
No data returned for station 63806.
No data for station 63806
Failed to fetch data for station 63810: HTTP 400 - Bad Request for station 63810
No data returned for st