In [5]:
import pandas as pd
import dataretrieval.nwis as nwis
import matplotlib.pyplot as plt
import os

In [6]:
import eventdetector_ts
from eventdetector_ts.metamodel.meta_model import MetaModel

### Download USGS Gage Data
[Dataretreival documentation here](https://doi-usgs.github.io/dataretrieval-python/index.html)

In [None]:
# Define the parameters
site_ids = ["04176400", "04176500", "04174490", "01401000", "01393450", "01302020", "08074500", "07381590", "07381490", "07010000", "06439000", "06461500", "09484550", "09400815", "09058000", "12119000"]  # List of USGS site IDs
start_date = "2000-01-01"  # Start date for data retrieval
end_date = "2025-03-28"  # End date for data retrieval
parameters = ["00065"]  # Gauge height (ft)

# Loop through site IDs and download data
all_data = []
for site in site_ids:
    print(f"Fetching data for site {site}...")
    df = nwis.get_record(sites=site, service="iv", start=start_date, end=end_date, parameterCd=parameters)
    df.to_csv(f"usgs-data/{site}.csv")
    print(f"Site {site} data saved.")
    all_data.append(df)

final_df = pd.concat(all_data) # Combine data from all sites

# Save to CSV
final_df.to_csv("usgs_streamflow_data.csv")
print("Data saved to usgs_streamflow_data.csv")

Fetching data for site 04176400...
Site 04176400 data saved.
Fetching data for site 04176500...
Site 04176500 data saved.
Fetching data for site 04174490...
Site 04174490 data saved.
Fetching data for site 01401000...
Site 01401000 data saved.
Fetching data for site 01393450...
Site 01393450 data saved.
Fetching data for site 01302020...
Site 01302020 data saved.
Fetching data for site 08074500...
Site 08074500 data saved.
Fetching data for site 07381590...
Site 07381590 data saved.
Fetching data for site 07381490...
Site 07381490 data saved.
Fetching data for site 07010000...
Site 07010000 data saved.
Fetching data for site 06439000...
Site 06439000 data saved.
Fetching data for site 06461500...
Site 06461500 data saved.
Fetching data for site 09484550...
Site 09484550 data saved.
Fetching data for site 09400815...
Site 09400815 data saved.
Fetching data for site 09058000...
Site 09058000 data saved.
Fetching data for site 12119000...
Site 12119000 data saved.


### Data Visualization

In [11]:
# Loop through each CSV file in the usgs-data folder
for filename in os.listdir("usgs-data"):
    if filename.endswith(".csv"):
        print(f"Plotting {filename}")
        # Read the CSV file
        df = pd.read_csv(f"usgs-data/{filename}")

        # Assuming the CSV has a 'dateTime' column and a 'value' column for gauge height
        plt.figure(figsize=(10, 5))
        plt.plot(pd.to_datetime(df['datetime']), df['00065'], label=filename)

        # Set the title and labels
        plt.title(f"Gage Height for USGS Gage {filename.replace(".csv", "")}")
        plt.xlabel("Date")
        plt.ylabel("Gage Height (ft)")
        plt.xticks(rotation=45)

        # Save the plot
        plt.tight_layout()
        plt.savefig(f"plots/{filename.replace('.csv', '.png')}")
        plt.close()


Plotting 09484550.csv
Plotting 07381490.csv
Plotting 12119000.csv
Plotting 01393450.csv
Plotting 04174490.csv
Plotting 06461500.csv
Plotting 09400815.csv
Plotting 07381590.csv
Plotting 08074500.csv
Plotting 07010000.csv
Plotting 04176400.csv
Plotting 06439000.csv
Plotting 09058000.csv
Plotting 04176500.csv
Plotting 01302020.csv
Plotting 01401000.csv


### Testing `eventdetector-ts`

In [21]:
# Create a function to process and analyze each file
def analyze_gage_data(filename):
    # Read the CSV file
    df = pd.read_csv(f"usgs-data/{filename}")

    # Convert datetime and set as index
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)

    # Create MetaModel instance
    model = MetaModel(output_dir="testing", dataset=df, events=df, width=50)

    print(df['00065'].to_list())

    # Fit the model to detect events in the gage height data
    events = model.fit(df['00065'].to_list())

    # Plot the results
    plt.figure(figsize=(15, 8))

    # Plot original data
    plt.plot(df.index, df['00065'], label='Gage Height', alpha=0.7)

    # Plot event boundaries
    for event in events:
        start = df.index[event.start]
        end = df.index[event.end]
        plt.axvline(x=start, color='r', linestyle='--', alpha=0.5)
        plt.axvline(x=end, color='g', linestyle='--', alpha=0.5)

    plt.title(f"Event Detection Results for Gage {filename.replace('.csv', '')}")
    plt.xlabel("Date")
    plt.ylabel("Gage Height (ft)")
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Save the plot
    plt.savefig(f"testing/{filename.replace('.csv', '_events.png')}")
    plt.close()

    return len(events)

print(analyze_gage_data("01302020.csv"))

# Process all files in the usgs-data folder
# for filename in os.listdir("usgs-data"):
#     if filename.endswith(".csv"):
#         print(f"Processing {filename}...")
#         num_events = analyze_gage_data(filename)
#         print(f"Detected {num_events} events in {filename}")

2025-03-31 00:51:53 [37m[INFO] eventdetector_ts.metamodel: checks if the index of the dataset is already in the datetime format.[0m
2025-03-31 00:51:53 [37m[INFO] eventdetector_ts.metamodel: Computing the time sampling and time unit of the dataset[0m
5
2025-03-31 00:51:53 [37m[INFO] eventdetector_ts.metamodel: {   'batch_size': 32,
    'delta': 735,
    'dropout': 0.3,
    'epochs': 256,
    'epsilon': 0.0002,
    'fill_nan': 'zeros',
    'hyperparams_cnn': (16, 64, 3, 8, 1, 2, 'relu'),
    'hyperparams_ffn': (1, 3, 64, 256, 'sigmoid'),
    'hyperparams_mm_network': (1, 32, 'sigmoid'),
    'hyperparams_rnn': (1, 2, 16, 128, 'tanh'),
    'hyperparams_transformer': (256, 4, 1, True, 'relu'),
    'last_act_func': 'sigmoid',
    'models': [('FFN', 2)],
    'pa': 5,
    'remove_overlapping_events': True,
    's_h': 0.05,
    'save_models_as_dot_format': False,
    'scaler': 'StandardScaler',
    't_max': 1102.5,
    't_r': 0.97,
    'test_size': 0.2,
    'time_window': None,
    'type_

TypeError: MetaModel.fit() takes 1 positional argument but 2 were given