In [1]:
# Constrained water and energy balance estimation 
# v4.2: adapted from v4.1 by treating SCF known, G estimated offline
# v4.3: adapted from v4.2 & 3.3 to handle to batch processing on OSC
# v4.4: adapted from v4.3 to update ipopt deprecation
# v4.5: moving σWRFG based on SCF 7/2/20 JLD
# v4.6: adding SDC to objective function: MD & JD: 9/28/20
# v4.7: adding "pseudo-valid prior" to starting points. remove SDC. MD: 1/7/21
# v4.8: tweak error parameters. MD & JD: 1/8/21
# v4.9 prior valid added to objective function. daily G capped in constraints. more outputs written 10/5/21
# v4.9.1 remove SWE from objective
# v4.9.2 added air temp
# v51 Converted to function but still uses textfile as input
# v52 Prototyping to pass the array directly to function [and fixed tab to 4 spaces]
# v53 To set tab to 4 spaces (by copying/pasting)
# v54 combine 9 separate textfile output in one txt file (due to file count limitation on Discover)
# v55 Fixing error due to missing days of data due to Polar nights
# v58 New updates by jack (Nov 2023)
# Jan 29, 2024: Due to error on obj function σWRFG becoming zero, fixed the minimum σWRFG to 25 
# Jun 21, 2024: No change for 1km run here. MODIS with UINT8 did not work likely because of missing due to no-data background and polar nights.

using NCDatasets
using Random
using JuMP
using Ipopt
using DelimitedFiles
using Statistics
Random.seed!(1234)  # seed for reproducibility


TaskLocalRNG()

In [2]:
function checkDirectoryExists(dir_path::String)
    if !isdir(dir_path)
        println("Directory does not exist. Creating: $dir_path")
        mkpath(dir_path)  # creates the directory and any necessary parent directories
    end
end


checkDirectoryExists (generic function with 1 method)

In [3]:
#function blender(i,j,SWEprior, Pprior, Gprior, SCFinst, AirT, logDir, exp_dir, mu, sigma, twindow=2)
function blender(i,j,SWEprior, Pprior, Gprior, SCFinst, AirT, logDir, exp_dir, psVal, twindow=2)
  """
  inputs
  ==============
  i,j are pixel locations
  SWEprior, Pprior, Gprior are prior estimates of SWE [m], Precipitation [m] and surface heat flux [W/m2]
  SCFinst is measured snow covered fraction
  AirT is air temperature [K]
  logdir and exp_dir are directories where log and data are written
    
  variable sizes
  =============== 
  SWE and SCF are storage terms, so will be length nt. 
  P, Melt, and G are flux terms, so will be length nt-1
  for input, all variables are length nt, so just use Pprior[1:nt-1]. Note, Gprior currently unused
  for output in section 4, will output fluxes as nt, with the last element set to 0.    

  """
    # 0 handle variable sizes
    nt = length(SCFinst)
    Pprior = Pprior[1:nt-1]
    # println("Inside Estimate_v61...")
    # println(SCFinst)

    # # 1a Fill in missing SCF [Feb 23, 2023]
    # for i=1:nt
    #     if ismissing(SCFinst[i])
    #         # make it a function of SWEprior
    #         SCFinst[i]=1  # June 20, 2024: cannot convert a value to missing for assignment. When using uint8 based data.
    #     end
    # end
    #log_filePS =  "$logDir/Pix_$(i)_$(j)_PS.txt"  # original
    # 1 Smooth SCF observations    
    # twindow = 5
    SCFobs = smoothdata(SCFinst, twindow, nt, "median")
    # SCFobs = fix_modis(SCFinst)  # apply MODIS fix developed by Jack (Feb 04, 2025)
    # twindow = 60
    SCF_smooth_season = smoothdata(SCFobs, 30, nt, "mean")  # before it was 60

    # 1.1 Calculate precipitation scalar
    #precipScalar = detPrecipScalar(SCFobs, mu, sigma, gamma,yMin,yMax,log_filePS)
    
    #Pprior = precipScalar.*Pprior;
    #SWEprior = precipScalar.*SWEprior;
    Pprior = psVal.*Pprior;
    SWEprior = psVal.*SWEprior;
    # 2 Define hyperparameters
    tmelt,tmelt_smooth,SWEmax,SWEmin_global,Meltmax,σP,σSWE,k,Melt0,L=define_hyperparameters(SCF_smooth_season, nt, Pprior, SWEprior, AirT, SCFobs)

    # 3 Solve
    m = Model(optimizer_with_attributes(Ipopt.Optimizer,"max_iter"=>5000))
    # set_silent(m)
    # define variables and bounds
    @variable(m, SWEmin_global <= SWE[i=1:nt] <= SWEmax[i], start=SWEprior[i])
    @variable(m, Precip[i=1:nt-1]>=0. ,start=Pprior[i]);
    @variable(m, 0. <= Melt[i=1:nt-1]<= Meltmax)
    @variable(m, Mcost[i=1:nt-1] >=0)
    # define constraints
    for i in 1:nt-1
      @NLconstraint(m,Mcost[i]==L/(1+exp( -k*(Melt[i]-Melt0))))
    end
    for i in 1:nt-1
      @constraint(m,SWE[i+1]==SWE[i]+Precip[i]-Melt[i])
    end
    # define objective function
    @objective(m,Min,sum((Precip-Pprior).^2 ./σP.^2) + sum((SWE-SWEprior ).^2 ./ σSWE.^2) + sum(Mcost.^2))
    log_file =  "$logDir/Pix_$(i)_$(j)_$(twindow).txt"  # original

    
    # solve
    redirect_stdio(stdout=log_file, stderr=log_file) do
      optimize!(m)
    end
    # print(termination_status(m))
    
    # 4 extract
    NODATAvalue = -9999
    SWEhat=JuMP.value.(SWE)
    Phat=zeros(nt,1)
    Phat[1:nt-1] = JuMP.value.(Precip)
    Melt_hat = zeros(nt,1)
    Melt_hat[1:nt-1] = JuMP.value.(Melt);
    
    Δt = 86400; #s/day
    ρw = 1000; #density of water
    Lf = 0.334E6; #Latent heat of fusion J/kg    
    GmeltHat = Melt_hat/Δt*Lf*ρw
    
    Ghat = ones(nt,1)*NODATAvalue
    Ushat = ones(nt,1)*NODATAvalue
    G_pv = ones(nt,1)*NODATAvalue
    U_pv = ones(nt,1)*NODATAvalue
    Gmelt_pv = ones(nt,1)*NODATAvalue
    SWEpv = ones(nt,1)*NODATAvalue
    Pprint = ones(nt,1); Pprint[1:nt-1] = Pprior;
    
    # 5 output
    out_vars = hcat(SWEhat, GmeltHat, Ghat, Phat, Pprint, Ushat, G_pv, Gmelt_pv, U_pv, SWEprior, SCFobs)
    writedlm("$(exp_dir)/Pix_$(i)_$(j)_$(twindow).txt", out_vars)
    
    # 6 clean up
    m = nothing
    GC.gc()
    return nothing    
end

# function smoothdata(SCFinst,twindow,nt,smoothfunc)
#     # println("Inside smoothdata function: $twindow, $nt, $smoothfunc")
#     SCF_smooth=zeros(nt,1)
#     for i=1:nt
#         istart = trunc(Int,i-round(twindow/2))
#         iend = trunc(Int,i+round(twindow/2))        
#         # if i < twindow || i > nt-twindow
#         if istart < 1 || iend > nt
#             SCF_smooth[i]=0  # BY?: why not keep whatever the original value was. Moreover, this is already initialized to 0 at the beginning.
#         else            
#             if smoothfunc == "mean"
#                 SCF_smooth[i] = mean(SCFinst[istart:iend])
#             elseif smoothfunc=="median"
#                 SCF_smooth[i] = median(SCFinst[istart:iend])
#             end
#         end
#     end
    
#     return SCF_smooth
# end


function smoothdata(SCFinst, twindow, nt, smoothfunc)
    """
    Smooth data using a moving average or median filter.
    Parameters:
    ============
    SCFinst: Input data to be smoothed.
    twindow: Window size for smoothing.
        example: 1 means smooth using 1 data point on either side of the current point.
        In this formulation, use half the value that was originally used by Mike (ie, 60 is now 30 etc.).
    nt: Length of the input data.
    smoothfunc: Type of smoothing function ('mean' or 'median').

    """
    # println("Inside Smoothing data...")
    # println("$twindow, $nt, $smoothfunc")
    # better to copy "SCFinst" so the smooth function doesn't modify the original data. Even better apppend border on both sides of the data.
    SCF_smooth=zeros(nt,1)
    for i=(1+twindow):(nt-twindow)
        # adding 1 (ie, 1+twindow) in the for loop because Julia is 1-based indexing
        if smoothfunc == "mean"
            SCF_smooth[i] = mean(SCFinst[i-twindow:i+twindow])
            # SCF_smooth[i] = mean(skipmissing(SCFinst[i-twindow:i+twindow]))  # use this when there is missing data
        elseif smoothfunc=="median"
            SCF_smooth[i] = mean(SCFinst[i-twindow:i+twindow])
            # SCF_smooth[i] = mean(skipmissing(SCFinst[i-twindow:i+twindow]))  # use this when there is missing data
        end
    end    
    return SCF_smooth
end

function define_uncertainty(Pprior, SWEprior, AirT, SCFobs, nt, tmelt_smooth)
    # convert air temperature K-> C
    AirT = AirT.-273.15
    
    # 2.2.1 Precipitation Uncertainty
    RelPUnc = 0.3; #[-] this applies to cumulative precipitation
    # Uncertainty for accumulation . precip is size nt-1
    σP = zeros(nt-1,1)
    σPmin = 0.001
    Pthresh = 0.001
    for i=1:nt-1
      if Pprior[i]<Pthresh
        σP[i]=σPmin
      else
        σP[i]=Pprior[i]*RelPUnc
      end
    end    
    # adjust uncertainty to apply to the number of snow days
    nsnowday = 0
    Tprecip_thresh = 1.5
    for i=1:nt-1
        if Pprior[i]>Pthresh && AirT[i] < Tprecip_thresh
            nsnowday += 1
        end
    end
    if nsnowday > 0
        σP = σP*sqrt(nsnowday);    
    end
    
    # 2.2.2 SWE Uncertainty
    fSWE = 0.4
    σSWE = SWEprior*fSWE;
    σSWEmin = 0.01
    σSWEmax = 10
    for i=1:nt
        # if SWEprior[i]>0 && SCFobs[i]==0
        if SCFobs[i]==0 || tmelt_smooth[i]>.1    
            σSWE[i] = σSWEmax
        end
        if σSWE[i] < σSWEmin
            σSWE[i] = σSWEmin
        end
    end   
    
    return σP, σSWE
end

function define_hyperparameters(SCF_smooth_season,nt,Pprior,SWEprior,AirT, SCFobs)
    """
    list of hyperparameters
    =======================
    twindow for smoothing SCF for snow on/off constraint - defined in main
    twindow for smoothing SCF for identifying melt times - defined in main
    ΔSCFthresh
    twindow for smoothing melt times
    SWEmax_global
    SWEmin_global
    Meltmax    
    k,Melt0,L
    σPmin - defined in define_uncertainty
    Pthresh -  defined in define_uncertainty
    Tprecip_thresh -  defined in define_uncertainty
    fSWE -  defined in define_uncertainty
    σSWEmin -  defined in define_uncertainty
    σSWEmax -  defined in define_uncertainty        
    
    """
    
    # 2.1 Define prior estimates
    # 2.1.1 Define times when snow is melting 
    tmelt = zeros(nt,1)
    ΔSCFthresh = -0.01
    for i=2:nt
        if SCF_smooth_season[i] - SCF_smooth_season[i-1] < ΔSCFthresh
            tmelt[i] = 1
        end
    end    
    # twindow = 30
    tmelt_smooth = smoothdata(tmelt, 15, nt, "mean")  # before it was 30
    
    # 2.2 Extreme / limit values
    # 2.2.1 SWE
    SWEmax_global = 5
    SWEmin_global = 1.0e-6 #1/1000 mm TODO (BNY) we can use this for missing values due flags such as to water etc. ie, give those pixels a zero value
    # define SWEmax as a function of time and of SCF
    #    set SWEmax to 0 if SCF is low
    SWEmax = zeros(nt,1)
    for i = 1:nt
      if SCFobs[i] == 0
        SWEmax[i] = SWEmin_global
      else
        SWEmax[i] = SWEmax_global
      end
    end    
    #2.2.2 Melt
    Meltmax = 0.075;
    
    # 2.3 Define uncertainty
    σP,σSWE = define_uncertainty(Pprior, SWEprior, AirT, SCFobs, nt, tmelt_smooth)
    # 2.4 Melt cost function parameters
    k = 500
    Melt0 = 0.05
    L = 1
    
    return tmelt, tmelt_smooth, SWEmax, SWEmin_global, Meltmax, σP, σSWE, k, Melt0, L
end

function fix_modis(SCF)
    """ This function is used to fix the MODIS snow cover fraction (SCF) data is designed by Jack Dechow.
        It corrects the SCF data by adjusting values based on the differences between consecutive days.
        The function iterates through the SCF data, calculates the differences, and applies corrections based on specific thresholds.
        The function also includes a mechanism to find the final day of snow off and adjust the SCF value accordingly.
        The function is designed to handle edge cases and ensure that the final SCF value is reasonable.            
    """
  # Define length of array (365)
  nt = length(SCF)

  # %% 1.1 Calculate deltaSCF
  # We calculate the value ΔSCF which is defined as
  # Σ[ abs[SCF(i)-SCF(i+1)] + abs[SCF(i+1)-SCF(i+2)] ]
  # Depending on value of ΔSCF we do one of three things
  
  for i in 150:nt-2  # Python uses 0-based indexing, so 150 in MATLAB is 149 in Python
      deltaSCF = abs(SCF[i] - SCF[i+1]) + abs(SCF[i+1] - SCF[i+2])
      # print(deltaSCF)
      if deltaSCF == 2  # If ΔSCF == 2, that means the SCF went from 1→0→1 which is bad
          tmp = SCF[i:i+2]  #.copy()
          tmp[tmp .== 0] .= 0.667  # In this case we find the 0 SCF day (which should be in the middle) and then we set that days SCF = 0.667
          SCF[i:i+2] = tmp
          # println(2)
      elseif deltaSCF > 1.5  # Elif ΔSCF > 1.5, we set any 0 SCF day to 1/3 of value Σ[abs(ΔSCF(i:1+2))]
          tmp = SCF[i:i+2]  #.copy()
          tmp[tmp .== 0] .= deltaSCF/3
          SCF[i:i+2] = tmp
          println(1.5)
      elseif deltaSCF > 0.9  # Elif ΔSCF > 0.9, we set any 0 SCF day to 1/2 of value Σ[abs(ΔSCF(i:1+2))]
          tmp = SCF[i:i+2] #.copy()
          tmp[tmp .== 0] .= deltaSCF/2
          SCF[i:i+2] = tmp
          # print(0.9)
      end
  end
  # %% 1.2 Find final day of snow off
  # After the loop above finishes, find the final day of snow off
  # extra to check for 
  stopIdx = 0
  for i in 150:nt
      if SCF[i] == 0  # Find a day with zero SCF
          global stopIdx = i  # Set counter value to whatever idx i is
          numSCF = sum(SCF[i:nt])  # Sum all remaining SCF values
          if numSCF == 0  # If there is no more SCF for the rest of the year, break
              break
          end
      end
  end
  # Add a single step down day to the SCF timeseries
  # This ensures if the last day of SCF is above 50% snow cover
  # We add a single extra day to the timeseries where we cut that value down
  # by 50% to add an easier downramp for the melt timeseries
  # use try/catch to avoid error if stopIdx was not assigned above
  if stopIdx > 1 && SCF[stopIdx-1] > 0.49  # error if stopIdx was not assigned above; hence, wrapping in try block
      SCF[stopIdx] = 0.5 * SCF[stopIdx-1]
  end
  return SCF
end

function detPrecipScalar(SCFinst, mu, sigma, gamma,yMin,yMax,logfile)
    """
    Calculate precip scaling values based on logistic function (tanh)
    Logistic funciton f(x) has output (y) values between [yMin yMax]
    Center of function i.e. inflection point at basinMu
    Parameters:
    ============
    SCFinst: Pixel SCF timeseries -> used to calculate annual average SCF (avgSCF)
    mu: Mean of avgSCF for all pix in basin; Also the inflection point i.e. center point of tanh();
        MUST BE NONNEGATIVE
    sigma : Standard deviation of avgSCF for all pix in basin
    gamma: Steepness parameter for logistic function, calculated separate in detGamma function
    yMin: Minimum output value of logistic function;
        MUST BE NONNEGATIVE
    yMax: Maximum output value of logistic function; 
        MUST BE GREATER THAN yMIN
    =============
    """
   
    # Throw errors for bad inputs
    if yMin < 0 || mu < 0
        throw(DomainError((x, y), "Error! Inputs yMin and basinMu must be non-negative!"))
    end

    if yMin > yMax
        throw(DomainError((x, y), "Error! Input yMax must be greater than yMin!"))
    end
    
    avgSCF = mean(SCFinst) # Average of SCFinst over entire water year for current pixel
    # Logistic function
    y_norm = 0.5 * (tanh(gamma * (avgSCF - mu)) + 1);
    y = y_norm * (yMax - yMin) + yMin;

        msg = " Precipitation Scalar Record; PSval = $y\n"
        write(logfile, msg)
    return y
end

detPrecipScalar (generic function with 1 method)

In [4]:
function callBlenderWatershed(domainName, WY, vN, scaleFlag)
    """
        This function will call and run the most recent version of Blender over a single
        four test domains (TLM | OLY | CRB | CJR). This version only runs for WY15 and WY16. 
        The following version codes vN are accepted:
    
            v0: Run current Blender over single watershed
            v1: Deprecated, same output as v0
            v2: Run Blender with precipitation scaling processing
            v3: Run Blender with precipitation scaling process; LIS inputs smoothed with gaussian filtering
    
        Scale flag = 0  |  1

        ## JLD 5/13/25
        """
    ## Assign directories
    DataDir = ("/Users/jldechow/Documents/Projects/UNC/CoReSSD/Runs/Data/" * WY * "/" * vN * "/" * domainName * "_" * WY * "_" * vN * ".nc")
    ScaleDir = ("/Users/jldechow/Documents/Projects/UNC/CoReSSD/Runs/Data/" * WY * "/" * vN * "/" * domainName * "_PrecipScalar_" * WY * ".nc")
    exp_dir = ("/Users/jldechow/Documents/Projects/UNC/CoReSSD/Runs/Out/" * vN * "/" * WY * "/" * domainName)
    logDir =  ("/Users/jldechow/Documents/Projects/UNC/CoReSSD/Runs/Logs/"  * vN * "/" * WY * "/" * domainName)

    ## Double log and out directories exist, script fails if they don't.
    checkDirectoryExists(exp_dir)
    checkDirectoryExists(logDir)
    
    ## Read in Blender input data
    ds = Dataset(DataDir, "r")
    # List all variable names (optional, for inspection)
    varnames = collect(keys(ds))
    # Var Names manually set
    varnames = ["SWE", "Precip", "AirT", "SCF", "G"]
    # Read each variable into memory (this keeps things fast in the loop)
    data = Dict(name => ds[name][:] for name in varnames)
    ## Make Structure and throw into different var type
    # Due to Julia syntax rules, this section has to be defined outside the function before it is called. Here I am laying the
    # framework for it, but I will actually define this struct outside the function in a loop when I call it.
    # struct NCData
    #     SWE::Array{Float32,3}
    #     Precip::Array{Float32,3}
    #     AirT::Array{Float32,3}
    #     SCF::Array{Float32,3}
    #     G::Array{Float32,3}
    # end
    ncdata = NCData(
        data["SWE"],
        data["Precip"],
        data["AirT"],
        data["SCF"],
        data["G"]
    )
    
    ## Get size of one of the vars for loop
    nx, ny, nt = size(ncdata.SWE) 

    # Read in scaling data if required - otherwise fill with array of ones
    if scaleFlag == 1
        ds2 = Dataset(ScaleDir,"r")  # open in read-only mode
        PrecipScalar = ds2["PrecipScalar"][:]          # read the full 2D array
        close(ds2) 
    else
        PrecipScalar = ones(int,nx, ny)
    end

    for i in 1:nx
        for j in 1:ny
            ts_var1 = ncdata.SWE[i, j, :]./1000
            ts_var2 = ncdata.Precip[i, j, :]./1000
            ts_var3 = ncdata.AirT[i, j, :]./100
            ts_var4 = ncdata.SCF[i, j, :]
            ts_var5 = ncdata.G[i, j, :]

            ts_var6 = PrecipScalar[i,j]

            # Convert from 1×1×365 SubArray to 365-element Vector aka MATLAB squeeze function
            v1 = vec(ts_var1)
            v2 = vec(ts_var2)
            v3 = vec(ts_var3)
            v4 = vec(ts_var4)
            v5 = vec(ts_var5)
            v6 = ts_var6

            #Skip if any of 5 variables are entirely NaN at this (i, j)
            if all(isnan.(v1)) || all(isnan.(v2)) || all(isnan.(v3)) ||
               all(isnan.(v4)) || all(isnan.(v5)) || isnan.(v6)
                continue  # Skip this pixel
            else
            # Call Blender on the 1D time series
            blender(i,j,v1, v2, v3, v4, v5,logDir, exp_dir, v6)
        
            end

        # Do something with result...
        end
    end
end

callBlenderWatershed (generic function with 1 method)

In [6]:
domainName = "CCA";
WY = "WY15"
vN = "V3"
scaleFlag = 1;


struct NCData
    SWE::Array{Float32,3}
    Precip::Array{Float32,3}
    AirT::Array{Float32,3}
    SCF::Array{Float32,3}
    G::Array{Float32,3}
end

callBlenderWatershed(domainName, WY, vN, scaleFlag)