In [None]:

from src.utils.execute_sql_script import execute_sql_script
from pathlib import Path
import src.utils.fetch_data as fd
import src.utils.duckdb_utils as du
import pandas as pd



In [None]:
du.run_sql_file(Path('../db/schema.sql'))

In [None]:
du.run_sql_file(Path('../db/views.sql'))

In [None]:
ck = fd.fetch_approval_status('09163500')

In [None]:
with du.connect_duckdb() as con:
    #ck_site_loc = con.execute('SELECT * FROM vw_site_locations').df()
    #ck_daily_agg = con.execute('SELECT * FROM vw_nwis_daily_stats_local').df()
    #ck_annual_agg = con.execute('SELECT * FROM vw_nwis_annual_stats_local').df()
    ck_approved = con.execute('SELECT * FROM vw_nwis_iv_status').df()

In [None]:
with du.connect_duckdb() as con:
    tables = con.execute("SHOW TABLES").fetchall()
    print("Tables in DuckDB:", tables)

In [None]:
with du.connect_duckdb() as con:
    con.register('site_tmp', pd.read_csv('../artifacts/site.csv'))
    con.register('source_tmp', pd.read_csv('../artifacts/source.csv'))
    con.register('parameter_tmp', pd.read_csv('../artifacts/parameter.csv', dtype={'parameter_cd': 'string'}))  # Ensure parameter_cd is read as string
    con.register('site_parameter_tmp', pd.read_csv('../artifacts/site_parameter.csv'))
    con.register('usbr_site_parameter_tmp', pd.read_csv('../artifacts/usbr_site_parameter.csv'))

    con.execute("INSERT INTO source SELECT * FROM source_tmp;")
    con.execute("INSERT INTO site SELECT * FROM site_tmp;")
    con.execute("INSERT INTO parameter SELECT * FROM parameter_tmp;")
    con.execute("INSERT INTO site_parameter SELECT * FROM site_parameter_tmp;")
    con.execute("INSERT INTO usbr_site_parameter SELECT * FROM usbr_site_parameter_tmp;")

In [None]:
with du.connect_duckdb() as con:
    result = con.execute("SELECT * FROM parameter ORDER BY parameter_id;").df()

In [None]:
print(source_tmp.columns)

In [None]:
with du.connect_duckdb() as con:
    con.register('source_tmp', source_tmp)
    con.register('site_tmp', site_tmp)
    con.register('parameter_tmp', parameter_tmp)
    con.register('site_parameter_tmp', site_parameter_tmp)
    con.register('usbr_site_parameter_tmp', usbr_site_parameter_tmp)
    # con.execute("""
    #             INSERT INTO source (source_id, agency_cd, agency_nm, source_cd, source_nm, source_url, source_dsc)
    #             SELECT * FROM source_tmp
    #             """)
    # con.execute("""
    #             INSERT INTO site (site_id, site_cd, site_nm, site_dsc, lat_dd, lon_dd, elev_m, site_type, hydro_area_cd, hydro_area_nm, source_id)
    #             SELECT site_id, site_cd, site_nm, site_dsc, lat_dd, lon_dd, elev_m, site_type, hydro_area_cd, hydro_area_nm, source_id 
    #             FROM site_tmp
    #            """)
    # con.execute("""
    #             INSERT INTO parameter (parameter_id, parameter_cd, parameter_nm, parameter_dsc, unit_cd, unit_nm)
    #             SELECT parameter_id, parameter_cd, parameter_nm, parameter_dsc, unit_cd, unit_nm 
    #             FROM parameter_tmp
    #            """)
    # con.execute("""
    #             INSERT INTO site_parameter (site_parameter_id, site_id, parameter_id, api_ingest_ind, api_ingest_notes)
    #            SELECT site_parameter_id, site_id, parameter_id, api_ingest_ind, api_ingest_notes 
    #             FROM site_parameter_tmp
    #            """)
    # con.execute("""
    #             INSERT INTO usbr_site_parameter (source_id, agency_cd, agency_nm, source_cd, source_nm, source_url, source_dsc)
    #             SELECT * FROM source_tmp
    #             """)
    

In [None]:
with du.connect_duckdb() as con:
    result = con.execute("SELECT * FROM site_parameter;").df()
    print(result.head())

In [None]:
query = """
WITH hdb_site_parameter AS (
    SELECT sp.site_parameter_id
    FROM site_parameter sp
    INNER JOIN site s ON sp.site_id = s.site_id
    INNER JOIN source so ON s.source_id = so.source_id
    WHERE so.source_cd = 'HDB'
)
SELECT sp.site_parameter_id, s.site_cd, p.parameter_cd
FROM site_parameter sp
INNER JOIN site s ON sp.site_id = s.site_id
INNER JOIN parameter p on sp.parameter_id = p.parameter_id
WHERE sp.site_parameter_id NOT IN (SELECT site_parameter_id FROM hdb_site_parameter);
"""

with du.connect_duckdb() as con:
    site_param = con.execute(query).df().sort_values(by=['site_cd', 'parameter_cd']).reset_index(drop=True)

In [None]:
with du.connect_duckdb() as con:
    result = con.execute("""
                         SELECT agency_cd, site_cd, p.parameter_cd,site_nm, hydro_area_nm, sp.parameter_id
                         FROM site AS s
                         FULL OUTER JOIN source AS so 
                            ON s.source_id = so.source_id 
                         FULL OUTER JOIN site_parameter AS sp
                            ON s.site_id = sp.site_id
                         FULL OUTER JOIN parameter AS p
                            ON sp.parameter_id = p.parameter_id
                         WHERE so.agency_cd = 'USBR';""").df()
    max_sp_id = con.execute("SELECT MAX(site_parameter_id) FROM site_parameter;").fetchone()[0]
    print(result.head())

In [None]:
with du.connect_duckdb() as con:
    join_sites = con.execute("SELECT site_id, site_cd FROM site;").df()
    join_params = con.execute("SELECT parameter_id, parameter_cd FROM parameter;").df()

In [None]:
df = pd.read_csv("https://www.usbr.gov/uc/water/hydrodata/reservoir_data/meta.csv").drop_duplicates().reset_index(drop=True)

missing_site_codes = [100010, 100089, 100032, 100049, 100053]

df_filtered = df[df['site_id'].isin(missing_site_codes)]

rename_map = {
    'site_id': 'site_cd', 'site_datatype_id': 'usbr_site_parameter_cd', 'datatype_id': 'parameter_cd',
    'site_metadata.site_name': 'site_nm', 'datatype_metadata.datatype_common_name': 'parameter_nm'
}

df_filtered = df_filtered.rename(columns=rename_map)[list(rename_map.values())]
df_filtered['site_cd'] = df_filtered['site_cd'].astype(str)
df_filtered['parameter_cd'] = df_filtered['parameter_cd'].astype(str)
missing_site_parameters = df_filtered.merge(join_sites, on='site_cd', how='left').merge(join_params, on='parameter_cd', how='left')
missing_site_parameters['site_parameter_id'] = range(max_sp_id + 1, max_sp_id + 1 + len(missing_site_parameters))
missing_site_parameters['api_ingest_ind'] = True




In [None]:
with du.connect_duckdb() as con:
    con.register('missing_site_parameters', missing_site_parameters)
    con.execute("""
                INSERT INTO site_parameter (site_parameter_id, site_id, parameter_id, api_ingest_ind)
                SELECT site_parameter_id, site_id, parameter_id, api_ingest_ind
                FROM missing_site_parameters
                """)

In [None]:
with du.connect_duckdb() as con:
    join_site_param = con.execute("""
        SELECT sp.site_parameter_id, s.site_cd, p.parameter_cd
        FROM site_parameter sp
        INNER JOIN site s ON sp.site_id = s.site_id
        INNER JOIN parameter p on sp.parameter_id = p.parameter_id
        """).df()
    


In [None]:
df_rename = df.rename(columns=rename_map)[list(rename_map.values())]
df_rename['site_cd'] = df_rename['site_cd'].astype(str)
df_rename['parameter_cd'] = df_rename['parameter_cd'].astype(str)
usbr_site_param = join_site_param.merge(df_rename, on=['site_cd', 'parameter_cd'], how='inner')
usbr_site_param['usbr_site_parameter_id'] = range(1, 1 + len(usbr_site_param))
usbr_site_param['usbr_site_parameter_cd'] = usbr_site_param['usbr_site_parameter_cd'].astype(str)
usbr_site_param = usbr_site_param[['usbr_site_parameter_id', 'site_parameter_id', 'usbr_site_parameter_cd']]
#usbr_site_param = usbr_site_param[[]]

In [None]:
with du.connect_duckdb() as con:
    con.register('usbr_site_param', usbr_site_param)
    con.execute("""
                INSERT INTO usbr_site_parameter (usbr_site_parameter_id, site_parameter_id, usbr_site_parameter_cd)
                SELECT usbr_site_parameter_id, site_parameter_id, usbr_site_parameter_cd
                FROM usbr_site_param
                """)

In [None]:
with du.connect_duckdb() as con:
    con.execute("COPY site TO '../artifacts/site.csv' WITH (HEADER TRUE);")
    con.execute("COPY source TO '../artifacts/source.csv' WITH (HEADER TRUE);")
    con.execute("COPY parameter TO '../artifacts/parameter.csv' WITH (HEADER TRUE);")
    con.execute("COPY site_parameter TO '../artifacts/site_parameter.csv' WITH (HEADER TRUE);")
    con.execute("COPY usbr_site_parameter TO '../artifacts/usbr_site_parameter.csv' WITH (HEADER TRUE);")