In [5]:
import pandas as pd
import numpy as np
import requests
from owslib.ogcapi.features import Features
from datetime import datetime, timedelta

In [29]:
# Initialize the Features API
oafeat = Features("https://api.weather.gc.ca/")

def create_date_range(start_date, end_date):
    return pd.date_range(start=start_date, end=end_date)

def extract_flow_data_us(station_list, start_date, end_date):
    dates = create_date_range(start_date, end_date)
    empty_df = pd.DataFrame({'Date': dates})
    station_info = []

    for station in station_list:
        empty_df[station] = np.nan
    
    for station in station_list:
        url = f"https://waterservices.usgs.gov/nwis/dv/?format=json&sites={station}&startDT={start_date}&endDT={end_date}&parameterCd=00060&statCd=00003"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if 'timeSeries' in data['value']:
                records = data['value']['timeSeries'][0]['values'][0]['value']
                flow_data = pd.DataFrame(records)
                flow_data['value'] = pd.to_numeric(flow_data['value'], errors='coerce')
                flow_data['dateTime'] = pd.to_datetime(flow_data['dateTime']).dt.date
                flow_data['dateTime'] = flow_data['dateTime'].astype(str)
                empty_df['Date'] = empty_df['Date'].astype(str)
                for date, flow in zip(flow_data['dateTime'], flow_data['value']):
                    empty_df.loc[empty_df['Date'] == date, station] = flow
                
                # Collect station information
                site_info = data['value']['timeSeries'][0]['sourceInfo']
                station_info.append({
                    'Station_Number': site_info['siteCode'][0]['value'],
                    'Station_Name': site_info['siteName'],
                    'Latitude': site_info['geoLocation']['geogLocation']['latitude'],
                    'Longitude': site_info['geoLocation']['geogLocation']['longitude'],
                    'Drainage_Area': next((prop['value'] for prop in site_info['siteProperty'] if prop['name'] == 'drain_area_va'), None)
                })
            else:
                print(f"Flow data column not found for station: {station}")
        else:
            print(f"Failed to retrieve data for station: {station}")
    
    return empty_df, station_info

def fetch_hydrometric_data_ca(station_numbers, start_date, end_date, limit=500):
    combined_data = pd.DataFrame({'Date': create_date_range(start_date, end_date)})
    station_info = []

    for station_number in station_numbers:
        offset = 0
        full_data = []

        while True:
            url = f"https://api.weather.gc.ca/collections/hydrometric-daily-mean/items"
            params = {
                'STATION_NUMBER': station_number,
                'datetime': f"{start_date}/{end_date}",
                'limit': limit,
                'offset': offset,
                'f': 'json'
            }

            response = requests.get(url, params=params)
            response_data = response.json()

            if 'features' in response_data and response_data['features']:
                full_data.extend(response_data['features'])
                offset += limit
                if len(response_data['features']) < limit:
                    break
            else:
                break

        if full_data:
            # Create data_list with dates and discharge values
            data_list = [
                {
                    'Date': feature['properties']['DATE'],
                    'value': feature['properties']['DISCHARGE'] if feature['properties']['DISCHARGE'] is not None else -1000
                }
                for feature in full_data
            ]

            # Convert data_list to DataFrame
            flow_data = pd.DataFrame(data_list)

            # Convert 'value' to numeric and 'Date' to datetime.date
            flow_data['value'] = pd.to_numeric(flow_data['value'], errors='coerce')
            flow_data['Date'] = pd.to_datetime(flow_data['Date']).dt.date
            flow_data['Date'] = flow_data['Date'].astype(str)
            combined_data['Date'] = combined_data['Date'].astype(str)

            # Create a new column for the station in combined_data
            combined_data[station_number] = np.nan

            # Populate combined_data with flow data
            for date, flow in zip(flow_data['Date'], flow_data['value']):
                combined_data.loc[combined_data['Date'] == date, station_number] = flow

            # Collect station information
            first_feature = full_data[0]['properties']
            geometry = full_data[0]['geometry']
            station_info.append({
                'Station_Number': first_feature['STATION_NUMBER'],
                'Station_Name': first_feature['STATION_NAME'],
                'Latitude': geometry['coordinates'][1],
                'Longitude': geometry['coordinates'][0],
                'Drainage_Area': first_feature.get('DRAINAGE_AREA_GROSS', None)
            })
        else:
            print(f"Flow data not found for station: {station_number}")

    return combined_data, station_info

def write_flow_data_to_file_obstxt(file_path, flow_data, site_details):
    # Replace NaN with -1.000
    flow_data = flow_data.fillna(-1.000)
    
    # Convert Date column to datetime
    flow_data['Date'] = pd.to_datetime(flow_data['Date'])

    # Prepare the text file
    with open(file_path, "w") as file_conn:
        # Write header line
        start_date = flow_data['Date'].min()
        end_date = flow_data['Date'].max()
        file_conn.write(f"Observedstreamflow\t{start_date.strftime('%Y/%m/%d')}\t{end_date.strftime('%Y/%m/%d')}\n")
        
        # Write second line
        num_stations = flow_data.shape[1] - 1
        num_days = flow_data.shape[0]
        start_year = start_date.strftime('%Y')
        start_day_of_year = start_date.timetuple().tm_yday
        file_conn.write(f"{num_stations}  {num_days}  {num_days}  24 {start_year}  {start_day_of_year} 00\n")
        
        # Write station metadata with integer lat*60 and lon*60
        for station_id in flow_data.columns[1:]:
            station_info = next((item for item in site_details if item["Station_Number"] == station_id), None)
            if station_info:
                lat = station_info['Latitude']
                lon = station_info['Longitude']
                drainage_area = station_info['Drainage_Area']
                if drainage_area is None:
                    drainage_area = -1.0
                station_name = station_info['Station_Name']
                file_conn.write(f"{int(lat * 60):4d} {int(lon * 60):4d} {lat:12.6f} {lon:12.6f} {station_id:12s} {float(drainage_area):12.3f} {station_name}\n")
        
        # Write flow data with 12.4 decimal format
        for i in range(num_days):
            flow_values = flow_data.iloc[i, 1:].values  # Exclude the Date column
            formatted_flow_values = " ".join(f"{x:12.4f}" for x in flow_values)
            file_conn.write(f"{formatted_flow_values}\n")
            
def write_flow_data_to_file_ensim(file_path, flow_data, site_details):
    # Replace NaN with -1.000
    flow_data = flow_data.fillna(-1.000)
    
    # Define header components
    header = [
        "########################################",
        ":FileType tb0  ASCII  EnSim 1.0",
        "#",
        "# DataType               Time Series",
        "#",
        ":Application             EnSimHydrologic",
        ":Version                 2.1.23",
        ":WrittenBy          PythonScript",
        f":CreationDate       {datetime.now().strftime('%Y-%m-%d')}",
        "#",
        "#---------------------------------------",
        ":SourceFile                   flow_data",
        "#",
        ":Name               streamflow",
        "#",
        ":Projection         LATLONG",
        ":Ellipsoid          WGS84",
        "#",
        f":StartTime          {flow_data['Date'].iloc[0]} 00:00:00.00000",
        "#",
        ":AttributeUnits            1.0000000",
        ":DeltaT               24",
        ":RoutingDeltaT         1",
        "#",
        ":ColumnMetaData",
        f"   :ColumnUnits             {' '.join(['m3/s' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :ColumnType             {' '.join(['float' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :ColumnName           {' '.join(flow_data.columns[1:])}",
        "   :ColumnLocationX    " + ' '.join([f"{site['Longitude'] * 60:.6f}" for site in site_details]),
        "   :ColumnLocationY    " + ' '.join([f"{site['Latitude'] * 60:.6f}" for site in site_details]),
        f"   :coeff1            {' '.join(['0.0000E+00' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :coeff2            {' '.join(['0.0000E+00' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :coeff3            {' '.join(['0.0000E+00' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :coeff4            {' '.join(['0.0000E+00' for _ in range(flow_data.shape[1] - 1)])}",
        f"   :Value1            {' '.join(['1' for _ in range(flow_data.shape[1] - 1)])}",
        ":EndColumnMetaData",
        ":endHeader"
    ]
    
    # Write header to file
    with open(file_path, "w") as file_conn:
        file_conn.write("\n".join(header) + "\n")
        
        # Write data lines, ensuring 24-space padding and equally spaced columns (12.4 format)
        for _, row in flow_data.iterrows():
            flows = row[1:].values
            flow_string = " ".join(f"{flow:12.4f}" for flow in flows)
            file_conn.write(f"{' ' * 22}{flow_string}\n")

In [87]:
# Example usage:
# Define sample lists of station IDs
station_ca = ["05GG001", "05AC012"]
station_us = ["05017500", "05020500"]

# Define date range
start_date = "1980-01-01"
end_date = "2018-01-10"

# Fetch data for the given stations and date range
combined_data_ca, station_info_ca = fetch_hydrometric_data_ca(station_ca, start_date, end_date)
combined_data_us, station_info_us = extract_flow_data_us(station_us, start_date, end_date)

# Combine data into a single DataFrame
combined_data = pd.merge(combined_data_ca, combined_data_us, on='Date', how='outer')

# Combine station info
combined_station_info = station_info_ca + station_info_us

# Write the data to a file
write_flow_data_to_file_obstxt('output.txt', combined_data, combined_station_info)
write_flow_data_to_file_ensim('output_ensim.txt', combined_data, combined_station_info)

# Print the resulting DataFrame and station information
print(combined_data.head())
print(combined_station_info)