# Imports

## Dependencies

In [1]:

import requests
from requests.auth import HTTPBasicAuth
import pandas as pd
from io import BytesIO
import datetime
import numpy as np
import configparser
import sys

In [5]:
limits = pd.read_csv("../../src/micromet/data/Flux_variables.csv")
limits.set_index("Name").to_dict(orient="index")

{'TIMESTAMP': {'Description': 'ISO timestamp – short format',
  'Units': 'YYYYMMDDHHMM',
  'Min': nan,
  'Max': nan},
 'TIMESTAMP_END': {'Description': 'ISO timestamp end of averaging period - short format (12-digit integer)',
  'Units': 'YYYYMMDDHHMM',
  'Min': nan,
  'Max': nan},
 'TIMESTAMP_START': {'Description': 'ISO timestamp start of averaging period - short format (12-digit integer)',
  'Units': 'YYYYMMDDHHMM',
  'Min': nan,
  'Max': nan},
 'COND_WATER': {'Description': 'Conductivity (i.e., electrical conductivity) of water',
  'Units': 'µS cm-1',
  'Min': 0.0,
  'Max': 10000.0},
 'DO': {'Description': 'Dissolved oxygen in water',
  'Units': 'µmol L-1',
  'Min': 0.0,
  'Max': nan},
 'PCH4': {'Description': 'Dissolved methane (CH4) in water',
  'Units': 'nmolCH4 mol-1',
  'Min': 0.0,
  'Max': nan},
 'PCO2': {'Description': 'Dissolved carbon dioxide (CO2) in water',
  'Units': 'µmolCO2 mol-1',
  'Min': 0.0,
  'Max': 10000.0},
 'PN2O': {'Description': 'Dissolved nitrous oxide (N2O

## Micromet

In [2]:
%load_ext autoreload  
%autoreload 2
#sys.path.append("G:/Shared drives/UGS_Flux/Data_Processing/Jupyter_Notebooks/MicroMet")
#sys.path.append("C:/Users/kladig/Documents/GitHub/MicroMet")
sys.path.append("../../src")
import micromet

# Connection info and Config

In [3]:
config = configparser.ConfigParser()

config.read('../../secrets/config.ini')

from sqlalchemy import create_engine
import urllib.parse
host = config['DEFAULT']['ip']
pw = config['DEFAULT']['pw']
user = config['DEFAULT']['login']

encoded_password = urllib.parse.quote_plus(pw)

def postconn_et(encoded_password, host='localhost',user='postgres',port='5432',db='groundwater', schema = 'groundwater'):
    connection_text = "postgresql+psycopg2://{:}:{:}@{:}:{:}/{:}?gssencmode=disable".format(user,encoded_password,host,port,db)
    return create_engine(connection_text, connect_args={'options': '-csearch_path={}'.format(schema)})


engine = postconn_et(encoded_password, host=host, user=user)

# Pull From Each Datalogger Using the CS Datalogger API

https://help.campbellsci.com/crbasic/cr1000x/Content/Info/webserverapicommands1.htm

In [None]:
downloader = micromet.station_data_pull.StationDataDownloader(config)
processor = micromet.station_data_pull.StationDataProcessor(config, engine)

# Example of downloading from a single station
raw_data, pack_size, status_code = downloader.download_from_station(station='US-UTE')
if raw_data is not None:
    print(f"Successfully downloaded {len(raw_data)} records from US-UTE.")

In [None]:
loggertype = 'eddy'
station = 'US-UTE'
station_id = micromet.station_data_pull.StationDataDownloader.get_station_id(station)

last_date = processor.get_max_date(station_id, loggertype)
print(f"Last record for {station_id} in the database: {last_date}")

raw_data, pack_size, status_code = downloader.download_from_station(
    station=station_id,
    loggertype=loggertype,
    p1=f"{pd.to_datetime(last_date, format='%Y%m%d%H%M'):%Y-%m-%d %H:%M}"
)
if raw_data is not None:
    print(f"Downloaded {len(raw_data)} new records.")

In [20]:

# Downloads data for all stations
site_folders = {#'US-UTD':'Dugout_Ranch',
                'US-UTB':'BSF',
                'US-UTJ':'Bluff',
                'US-UTW':'Wellington',
                'US-UTE':'Escalante',
                'US-UTM':'Matheson',
                'US-UTP':'Phrag',
                'US-CdM':'Cedar_Mesa',
                'US-UTV':'Desert_View_Myton',
                'US-UTL':'Pelican_Lake',
                'US-UTG':'Green_River',
                }

downloaded = {}

for site, folder in site_folders.items():
    print(f"Downloading {site} data")
    try:
        downloaddata = processor.get_station_data(micromet.station_data_pull.StationDataDownloader.get_station_id(site), 
                                                    loggertype='eddy', 
                                                    reformat=True, 
                                                    drop_soil=False,
                                                    config_path = "../../src/micromet/data/reformatter_vars.yml",
                                                    var_limits_csv = "../../src/micromet/data/extreme_values.csv",
                                                    )
        print(type(downloaddata))
        downloaded[site] = downloaddata
    except Exception as e:
        print(f"Failed to download {site}: {e}")

Downloading US-UTB data
Failed to download US-UTB: unsupported format string passed to NoneType.__format__
Downloading US-UTJ data
Failed to download US-UTJ: unsupported format string passed to NoneType.__format__
Downloading US-UTW data
Failed to download US-UTW: unsupported format string passed to NoneType.__format__
Downloading US-UTE data
Failed to download US-UTE: unsupported format string passed to NoneType.__format__
Downloading US-UTM data
Failed to download US-UTM: unsupported format string passed to NoneType.__format__
Downloading US-UTP data
Failed to download US-UTP: unsupported format string passed to NoneType.__format__
Downloading US-CdM data
Failed to download US-CdM: unsupported format string passed to NoneType.__format__
Downloading US-UTV data
Failed to download US-UTV: unsupported format string passed to NoneType.__format__
Downloading US-UTL data
Failed to download US-UTL: unsupported format string passed to NoneType.__format__
Downloading US-UTG data
Failed to dow

In [None]:
dfs = {}
for key, value in downloaded.items():
    if isinstance(value[0], pd.DataFrame):
        dfs[key] = value[0]



In [38]:
for key, value in downloaded.items():
    print(value[1])

0.185215
0.198578
0.021984
0.18012899999999998
0.19860999999999998
None
0.191742
None


In [16]:
df = pd.concat(dfs,ignore_index=False)
df.reset_index(inplace=True)
df.rename(columns={'level_0':'stationid'}, inplace=True)
df.set_index(["stationid", "datetime_start"], inplace=True)
df = df.sort_index(level=[0,1])
df

NameError: name 'dfs' is not defined

In [40]:
escalante = df.loc['US-UTE']

In [44]:
def compare_sql_to_station(
    df: pd.DataFrame,
    station: str,
    engine,
    field: str = "timestamp_end",
    loggertype: str = "eddy",
) -> pd.DataFrame:
    """
    Compare station data with SQL records and filter new entries.

    Args:
        df: Station data DataFrame
        station: Station identifier
        field: Field to compare
        loggertype: Logger type

    Returns:
        Filtered DataFrame
    """
    table = f"amflux{loggertype}"
    query = f"SELECT {field} FROM {table} WHERE stationid = '{station}';"

    exist = pd.read_sql(query, con=engine)
    existing = exist["timestamp_end"].values
    print(existing)
    return remove_existing_records(df, field, existing)

def remove_existing_records(
    df: pd.DataFrame, column_to_check: str, values_to_remove: list
) -> pd.DataFrame:
    """
    Remove existing records from DataFrame.

    Args:
        df: Input DataFrame
        column_to_check: Column name to check
        values_to_remove: Values to remove

    Returns:
        Filtered DataFrame
    """
    column_variations = [
        column_to_check,
        column_to_check.upper(),
        column_to_check.lower(),
    ]

    for col in column_variations:
        if col in df.columns:
            print(f"Column '{col}' found in DataFrame")
            remaining = df[~df[col].isin(values_to_remove)]
            print(f"{len(remaining)} records remaining after filtering")
            print(f"Removing {len(df) - len(remaining)} records")
            return remaining

    raise ValueError(f"Column '{column_to_check}' not found in DataFrame")

In [41]:
compare_sql_to_station(escalante, "US-UTE", engine, field="timestamp_end", loggertype="eddy")

Column 'TIMESTAMP_END' found in DataFrame
24 records remaining after filtering
Removing 0 records


Unnamed: 0_level_0,TIMESTAMP_START,TIMESTAMP_END,CO2,CO2_SIGMA,H2O,H2O_SIGMA,FC,FC_SSITC_TEST,LE,LE_SSITC_TEST,...,LI710_diag_1,TA_1_2_1,RH_1_2_1,TA_1_3_1,TS_2_1_1,SWC_2_1_1,CH4,FCH4,FCH4_SSITC_TEST,CH4_sig_strgth_Min
datetime_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-05-19 12:30:00,202505191230,202505191300,405.3243,3.888222,4.319923,0.67727,-29.38211,0,304.1826,0,...,,,,,,,,,,
2025-05-19 13:00:00,202505191300,202505191330,404.484,3.980908,4.386743,0.72932,-30.97111,0,331.661,0,...,,,,,,,,,,
2025-05-19 13:30:00,202505191330,202505191400,405.0214,3.747729,4.177291,0.730526,-33.01447,0,338.768,0,...,,,,,,,,,,
2025-05-19 14:00:00,202505191400,202505191430,405.7571,2.923519,3.895212,0.687936,-26.07133,0,297.1538,0,...,,,,,,,,,,
2025-05-19 14:30:00,202505191430,202505191500,406.6148,2.931706,3.843118,0.682486,-25.41341,0,287.7615,0,...,,,,,,,,,,
2025-05-19 15:00:00,202505191500,202505191530,405.8199,3.294099,4.073086,0.740037,-24.24582,0,266.0978,0,...,,,,,,,,,,
2025-05-19 15:30:00,202505191530,202505191600,405.3223,2.915034,4.19291,0.660784,-24.48984,0,265.1537,0,...,,,,,,,,,,
2025-05-19 16:00:00,202505191600,202505191630,405.758,2.060637,3.739027,0.508589,-18.71434,0,241.3577,0,...,,,,,,,,,,
2025-05-19 16:30:00,202505191630,202505191700,406.1486,1.650086,3.692814,0.430534,-15.22368,0,205.0843,0,...,,,,,,,,,,
2025-05-19 17:00:00,202505191700,202505191730,406.9433,1.332869,3.536563,0.387017,-12.1917,0,187.474,0,...,,,,,,,,,,


In [43]:
pd.read_sql("SELECT * FROM amfluxeddy WHERE stationid = 'US-UTE' and timestamp_end > 202404010000" , con=engine)

Unnamed: 0,stationid,datetime_start,timestamp_start,timestamp_end,co2,co2_sigma,h2o,h2o_sigma,fc,fc_ssitc_test,...,p,file_no,datalogger_no,battery_voltage,co2_sig_strgth_min,h2o_sig_strgth_min,t_canopy,ch4,fch4,fch4_ssitc_test
0,US-UTE,2024-04-01 00:00:00,202404010000,202404010030,-9999.0000,-9999.000000,-9999.000000,-9999.000000,-9999.000000,2,...,-9999.0,-9999.0,-9999.0,-9999.00000,-9999.0,-9999.000000,,,,
1,US-UTE,2024-04-01 00:30:00,202404010030,202404010100,-9999.0000,-9999.000000,-9999.000000,-9999.000000,-9999.000000,2,...,-9999.0,-9999.0,-9999.0,-9999.00000,-9999.0,-9999.000000,,,,
2,US-UTE,2024-04-01 01:00:00,202404010100,202404010130,-9999.0000,-9999.000000,-9999.000000,-9999.000000,-9999.000000,2,...,-9999.0,-9999.0,-9999.0,-9999.00000,-9999.0,-9999.000000,,,,
3,US-UTE,2024-04-01 01:30:00,202404010130,202404010200,-9999.0000,-9999.000000,-9999.000000,-9999.000000,-9999.000000,2,...,-9999.0,-9999.0,-9999.0,-9999.00000,-9999.0,-9999.000000,,,,
4,US-UTE,2024-04-01 02:00:00,202404010200,202404010230,-9999.0000,-9999.000000,-9999.000000,-9999.000000,-9999.000000,2,...,-9999.0,-9999.0,-9999.0,-9999.00000,-9999.0,-9999.000000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16153,US-UTE,2025-03-03 12:30:00,202503031230,202503031300,401.5810,1.287875,3.571206,0.507766,4.855054,0,...,0.0,12.0,21021.0,13.46658,-9999.0,0.902363,,,,
16154,US-UTE,2025-03-03 13:00:00,202503031300,202503031330,398.5066,0.641167,4.733978,0.190853,1.468835,0,...,0.0,12.0,21021.0,13.55253,-9999.0,0.901604,,,,
16155,US-UTE,2025-03-03 13:30:00,202503031330,202503031400,399.3173,1.829701,4.138739,0.266099,6.582117,2,...,0.0,12.0,21021.0,13.50868,-9999.0,0.898258,,,,
16156,US-UTE,2025-03-03 14:00:00,202503031400,202503031430,397.8987,9.102474,4.769092,0.509273,2.216126,2,...,0.0,12.0,21021.0,13.58152,-9999.0,0.816367,,,,


In [7]:


# Process data for all stations
site_folders = {#'US-UTD':'Dugout_Ranch',
                'US-UTB':'BSF',
                'US-UTJ':'Bluff',
                'US-UTW':'Wellington',
                'US-UTE':'Escalante',
                'US-UTM':'Matheson',
                'US-UTP':'Phrag',
                'US-CdM':'Cedar_Mesa',
                'US-UTV':'Desert_View_Myton',
                'US-UTL':'Pelican_Lake',
                'US-UTG':'Green_River',
                }

manager.process_station_data(site_folders,
                             config_path = "../../src/micromet/data/reformatter_vars.yml",
                             var_limits_csv = "../../src/micromet/data/extreme_values.csv",       
                             )

US-UTB
US-UTJ


OperationalError: (psycopg2.OperationalError) connection to server at "34.106.159.40", port 5432 failed: Connection timed out (0x0000274C/10060)
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [2]:
import requests
from requests.auth import HTTPBasicAuth
import pandas as pd
from io import BytesIO
import datetime
import numpy as np
import configparser
import sys

config = configparser.ConfigParser()

config.read('../../secrets/config.ini')

from sqlalchemy import create_engine
import urllib.parse
host = config['DEFAULT']['ip']
pw = config['DEFAULT']['pw']
user = config['DEFAULT']['login']

encoded_password = urllib.parse.quote_plus(pw)

def postconn_et(encoded_password, host='localhost',user='postgres',port='5432',db='groundwater', schema = 'groundwater'):
    connection_text = "postgresql+psycopg2://{:}:{:}@{:}:{:}/{:}?gssencmode=disable".format(user,encoded_password,host,port,db)
    return create_engine(connection_text, connect_args={'options': '-csearch_path={}'.format(schema)})


engine = postconn_et(encoded_password, host=host, user=user)

table = "amfluxeddy"
query = f"SELECT * FROM {table} WHERE site_id = 'US-UTD'"
df = pd.read_sql(query, con=engine)


ProgrammingError: (psycopg2.errors.UndefinedColumn) column "site_id" does not exist
LINE 1: SELECT * FROM amfluxeddy WHERE site_id = 'US-UTD'
                                       ^

[SQL: SELECT * FROM amfluxeddy WHERE site_id = 'US-UTD']
(Background on this error at: https://sqlalche.me/e/20/f405)

In [None]:
table = "amfluxeddy"
query = f"SELECT * FROM {table} where stationid='US-UTE'"
df = pd.read_sql(query, con=engine)
df = df.set_index('datetime_start')
df = df.replace(-9999.0, np.nan)
df = df[df['et']>10]


In [14]:
df['et'].tail(10)

datetime_start
2025-03-03 10:00:00    0.033212
2025-03-03 10:30:00    0.046537
2025-03-03 11:00:00    0.053870
2025-03-03 11:30:00   -0.035644
2025-03-03 12:00:00    0.035320
2025-03-03 12:30:00   -0.103900
2025-03-03 13:00:00    0.004032
2025-03-03 13:30:00   -0.004580
2025-03-03 14:00:00    0.071820
2025-03-03 14:30:00    0.081643
Name: et, dtype: float64