In [None]:
"""
# -------------------------------------------------------------------------
# Name:        Find MERIT coordinates
# Purpose:     uses upstream area of MERIT (UPA) and GRDC station data
#              to check and correct station location
#
# Author:      PB
#
# Created:     15/05/2022
# Copyright:   (c) PB 2022

input:  grdc_2022_10577.txt   10577 station datasets >= 10km2 upstream area or no area provided
output: grdc_MERIT_1.txt: station with new location fitted to merit UPA

No: Number from 1 ...
GRDC_No: GRDC number
lat: original latitude from GRDC metafile
lon: original longituted from GRDC metafile
newlat: corrected latitude based on MERIT UPA dataset
newlon: corrected longitute based on MERIT UPA dataset
area; provided basin area from GRDC metafile
newarea: basin area based on MERIT UPA dataset
UPS_Indicator:  min error in % from MERIT UPA to provided basin area
dist_Indicator: distance to original pour point in [unit:100m]
Indicator:  ranking criteria: UPS_Indicator + 2 x dist_indicator

# ----------------------------------------------------------------------
"""

In [1]:
import numpy as np
import pandas as pd
import rasterio

In [None]:
#----------------------------------------------
# INPUT
# MERIT Yamazaki et al 2019 - upstream area in km2

STATION_FILE = "metastation_45.txt"
shapefolder = "../shape_glofas_3sec/shape_3sec_no_"
upsname = "../data/ups_danube_3sec.tif"

# OUTPUT
OUTPUT_FILE = "glofas_Merit_2.txt"

In [None]:
# --------------------------------------------------------------------------------
# cell size: 3 arcsec
cell = 0.000833333333333333333
# 1 / cell
invcell = 1200
# search range in cells: 55 = around 5km
rangexy=55

# read stations text file
# f = open(STATION_FILE, "r")
# glofas = f.readlines()
# f.close()
# header = glofas[0].rstrip()
# glofas = glofas[1:]
stations = pd.read_csv(STATION_FILE, sep='\t', index_col='GloFAS ID')

stations[['newlat', 'newlon', 'newarea']] = np.nan
# header += "\tnewlat\tnewlon\tnewarea\n"
# f = open(OUTPUT_FILE, "w")
# f.write(header)
# f.close()

In [None]:
# -----
# load upstream
print ("read ups")
src = rasterio.open(upsname, "r")
ups = src.read(1)
transform = src.transform
latlon = src.crs.to_epsg()
crs = src.crs
src.close()
print ("done read ups")

In [None]:
rows, cols = ups.shape
cols, rows = np.meshgrid(np.arange(cols), np.arange(rows))
# Convert pixel row/column index (row, col) to spatial coordinates (x, y)
lon, lat = rasterio.transform.xy(transform, rows, cols)

In [None]:
rows, cols

In [None]:
lon

In [None]:
xr.DataArray(ups, dims=['lat', 'lon'])

In [None]:
import rioxarray as rxr

In [None]:
transform

In [None]:
latlon

In [None]:
glofas['DrainingArea.km2.Provider']

In [None]:
glofas.columns

In [None]:
del col, row

In [None]:
def foo(upstream: np.ndarray, upstream_ref: int, rangexy: int = 55, col1, row1, penalty: int = 500):
    """
    """
    
    dim = rangexy * 2 + 1
    upsups = np.zeros((dim, dim))
    ind = np.zeros_like(upsups)
    upsind = np.zeros_like(upsups)
    diffind = np.zeros_like(upsups)

    colcol = np.arange(col1 - rangexy, col1 + rangexy + 1)
    rowrow = np.arange(row1 - rangexy, row1 + rangexy + 1)
    
    break
    
    for j, y in enumerate(rowrow):
        for i, x in enumerate(colcol):
            upsind[j, i] = 100 * np.abs(1 - upstream[y, x] / upstream_ref)
            upsups[j, i]= upsstream[y, x]
            # diff = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.9
            diffind[j, i] = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.92
            # if upsind> 50 diff gets a penalty
            if upsind[j, i] > 50:
                diffind[j, i] += penalty
            ind[j, i] = upsind[j, i] + 2 * diffind[j,i]

    minxy = np.where(ind == np.min(ind))
    y = minxy[0][0]
    x = minxy[1][0]
    j = rowrow[y]
    i = colcol[x]

In [None]:
# -----------------------------------
for ID, attrs in glofas.iterrows():

    # reference upstream area
    upsreal = attrs['DrainingArea.km2.Provider']
    # reference coordinates
    lat, lon = attrs[['StationLat', 'StationLon']]

    top = round(transform[5], 3)
    left = round(transform[2],3)
    nrows, ncols = ups.shape

    col1 = int((lon - left) * invcell)
    row1 = int((top - lat) * invcell)
    ups1 = ups[row1, col1]

    # middle of the 3 sec cell
    xcentre = col1 / 1200 + left + 1 / 2400
    ycentre = top - row1 / 1200 - 1 / 2400

    rangexy = 55
    dim = rangexy * 2 + 1
    upsups = np.zeros((dim, dim))
    ind = np.zeros_like(upsups)
    upsind = np.zeros_like(upsups)
    diffind = np.zeros_like(upsups)

    colcol = np.arange(col1 - rangexy, col1 + rangexy + 1)
    rowrow = np.arange(row1 - rangexy, row1 + rangexy + 1)
    
    break
    
    for j, y in enumerate(rowrow):
        for i, x in enumerate(colcol):
            upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
            upsups[j, i]= ups[y, x]
            diff = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.9
            diffind[j, i] = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.92
            # if upsind> 50 diff gets a penalty
            if upsind[j, i] > 50:
                diffind[j, i] = diffind[j, i] + 500
            ind[j, i] = upsind[j, i] + 2 * diffind[j,i]

    minxy = np.where(ind == np.min(ind))
    y = minxy[0][0]
    x = minxy[1][0]
    j = rowrow[y]
    i = colcol[x]

    #------------------------------------------------------
    # if still big error increase range
    if ind[y, x] > 50:
        print ("increase range")
        rangexy = 101
        dim = rangexy * 2 + 1
        upsups = np.zeros((dim, dim))
        ind = np.zeros_like(upsups)
        upsind = np.zeros_like(upsups)
        diffind = np.zeros_like(upsups)

        colcol = np.arange(col1 - rangexy, col1 + rangexy + 1)
        rowrow = np.arange(row1 - rangexy, row1 + rangexy + 1)

        for j, y in enumerate(rowrow):
            for i, x in enumerate(colcol):
                upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
                upsups[j, i]= ups[y, x]
                diff = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.9
                diffind[j, i] = np.sqrt((rangexy - i) ** 2 + (rangexy - j)**2) * 0.92
                # if upsind> 50 diff gets a penalty
                if upsind[j, i] > 50:
                    diffind[j, i] = diffind[j, i] + 500
                ind[j, i] = upsind[j, i] + 0.5 * diffind[j,i] # in the previous iteration 2, instead of 0.5

        minxy = np.where(ind == np.min(ind))
        y = minxy[0][0]
        x = minxy[1][0]
        j = rowrow[y]
        i = colcol[x]

    #-------------------------------------------------

    # ------------------------------------------------------
    # if still big error increase range
        if ind[y, x] > 80:
            print("increase range2")
            rangexy = 151
            dim = rangexy * 2 + 1
            upsups = np.zeros((dim, dim))
            ind = np.zeros_like(upsups)
            upsind = np.zeros_like(upsups)
            diffind = np.zeros_like(upsups)

            colcol = np.arange(col1 - rangexy, col1 + rangexy + 1)
            rowrow = np.arange(row1 - rangexy, row1 + rangexy + 1)

            for j, y in enumerate(rowrow):
                for i, x in enumerate(colcol):
                    upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
                    upsups[j, i] = ups[y, x]
                    diff = np.sqrt((rangexy - i)**2 + (rangexy - j)**2) * 0.9
                    diffind[j, i] = np.sqrt((rangexy - i) ** 2 + (rangexy - j) ** 2) * 0.92
                    # if upsind> 50 diff gets a penalty
                    if upsind[j, i] > 50:
                        diffind[j, i] = diffind[j, i] + 1000 # in the previous two iterations 500, instead of 1000
                    ind[j, i] = upsind[j, i] + 0.25 * diffind[j, i] # in the previous iterations 2 and 0.5, instead of 0.25

            minxy = np.where(ind == np.min(ind))
            y = minxy[0][0]
            x = minxy[1][0]
            j = rowrow[y]
            i = colcol[x]

    # -------------------------------------------------

    yy = ycentre + (rangexy - y) * cell
    xx = xcentre - (rangexy - x) * cell
    ups2 = ups[j, i]    #-------------------------------------------------
    s = str(stationNo)  + "\t" + str(glofas_no) + "\t" + station[7] + "\t" + station[8] + "\t"
    s = s + f"{yy:.5f}" + "\t" + f"{xx:.5f}" + "\t" + f"{ups2:.0f}"+ "\t"
    s = s + f"{lat:.5f}"+ "\t" +f"{lon:.5f}" + "\t"+f"{upsreal:.0f}"
    print (s)

    #header += "\tnewlat\tnewlon\tnewarea"
    s = glofas[stationNo].rstrip()
    s = s + "\t" + f"{yy:.6f}" + "\t" + f"{xx:.6f}" + "\t" + f"{ups2:.0f}"+ "\n"
    #s = s + "\t" + str(upsind[y,x]) +"\t"+str(diffind[y,x])+"\t"+str(ind[y,x]) + "\n"
    f = open(OUTPUT_FILE, "a")
    f.write(s)
    f.close()




print ('done')

In [None]:
# -----------------------------------
for stationNo in range(0, len(glofas)):

    station = glofas[stationNo].split("\t")
    upsreal = float(station[6])
    # upstream area from provider

    coord = [float(station[4]), float(station[5])]
    # lat lon
    glofas_no =  station[1]


    top = round(transform[5],3)

    left = round(transform[2],3)
    col = ups.shape[1]
    row = ups.shape[0]

    col1 = int((coord[1] - left) * invcell)
    row1 = int((top -coord[0]) * invcell)
    ups1 = ups[row1,col1]

    # middle of the 3 sec cell
    xcentre = col1 / 1200 + left + 1 / 2400
    ycentre = top - row1 / 1200 - 1 / 2400

    rangexy = 55
    upsups = np.zeros((rangexy*2+1,rangexy*2+1))
    ind = np.zeros((rangexy*2+1,rangexy*2+1))
    upsind = np.zeros((rangexy*2+1,rangexy*2+1))
    diffind = np.zeros((rangexy*2+1,rangexy*2+1))

    colcol = np.arange(col1-rangexy,col1+rangexy+1)
    rowrow = np.arange(row1-rangexy,row1+rangexy+1)

    j =0
    for y in rowrow:
        i = 0
        for x in colcol:
            upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
            upsups[j,i]= ups[y,x]
            diff = np.sqrt((rangexy-i)**2+(rangexy-j)**2)*0.9
            diffind[j,i] = np.sqrt((rangexy - i) ** 2 + (rangexy - j) ** 2) * 0.92
            # if upsind> 50 diff gets a penalty
            if upsind[j, i]>50:
                diffind[j, i] = diffind[j, i] + 500
            ind[j,i] = upsind[j, i] + 2 * diffind[j,i]

            i = i +1
        j = j + 1

    minxy = np.where(ind==np.min(ind))
    y=minxy[0][0]
    x=minxy[1][0]
    j = rowrow[y]
    i = colcol[x]

    #------------------------------------------------------
    # if still big error increase range
    if ind[y,x] > 50:
        print ("increase range")
        rangexy = 101
        upsups = np.zeros((rangexy*2+1,rangexy*2+1))
        ind = np.zeros((rangexy*2+1,rangexy*2+1))
        upsind = np.zeros((rangexy*2+1,rangexy*2+1))
        diffind = np.zeros((rangexy*2+1,rangexy*2+1))

        colcol = np.arange(col1-rangexy,col1+rangexy+1)
        rowrow = np.arange(row1-rangexy,row1+rangexy+1)

        j =0
        for y in rowrow:
            i = 0
            for x in colcol:
                upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
                upsups[j,i]= ups[y,x]
                diff = np.sqrt((rangexy-i)**2+(rangexy-j)**2)*0.9
                diffind[j,i] = np.sqrt((rangexy - i) ** 2 + (rangexy - j) ** 2) * 0.92
                # if upsind> 50 diff gets a penalty
                if upsind[j, i] > 50:
                    diffind[j, i] = diffind[j, i] + 500
                ind[j,i] = upsind[j, i] + 0.5 * diffind[j,i]

                i = i +1
            j = j + 1

        minxy = np.where(ind==np.min(ind))
        y=minxy[0][0]
        x=minxy[1][0]
        j = rowrow[y]
        i = colcol[x]

    #-------------------------------------------------

    # ------------------------------------------------------
    # if still big error increase range
        if ind[y, x] > 80:
            print("increase range2")
            rangexy = 151
            upsups = np.zeros((rangexy * 2 + 1, rangexy * 2 + 1))
            ind = np.zeros((rangexy * 2 + 1, rangexy * 2 + 1))
            upsind = np.zeros((rangexy * 2 + 1, rangexy * 2 + 1))
            diffind = np.zeros((rangexy * 2 + 1, rangexy * 2 + 1))

            colcol = np.arange(col1 - rangexy, col1 + rangexy + 1)
            rowrow = np.arange(row1 - rangexy, row1 + rangexy + 1)

            j = 0
            for y in rowrow:
                i = 0
                for x in colcol:
                    upsind[j, i] = 100 * np.abs(1 - ups[y, x] / upsreal)
                    upsups[j, i] = ups[y, x]
                    diff = np.sqrt((rangexy - i) ** 2 + (rangexy - j) ** 2) * 0.9
                    diffind[j, i] = np.sqrt((rangexy - i) ** 2 + (rangexy - j) ** 2) * 0.92
                    # if upsind> 50 diff gets a penalty
                    if upsind[j, i] > 50:
                        diffind[j, i] = diffind[j, i] + 1000
                    ind[j, i] = upsind[j, i] + 0.25 * diffind[j, i]

                    i = i + 1
                j = j + 1

            minxy = np.where(ind == np.min(ind))
            y = minxy[0][0]
            x = minxy[1][0]
            j = rowrow[y]
            i = colcol[x]

    # -------------------------------------------------

    yy = ycentre + (rangexy - y) * cell
    xx = xcentre - (rangexy - x) * cell
    ups2 = ups[j, i]    #-------------------------------------------------
    s = str(stationNo)  + "\t"+ str(glofas_no) + "\t" + station[7] + "\t" + station[8] + "\t"
    s = s + f"{yy:.5f}" + "\t" + f"{xx:.5f}" + "\t" + f"{ups2:.0f}"+ "\t"
    s = s + f"{coord[0]:.5f}"+ "\t" +f"{coord[1]:.5f}" + "\t"+f"{upsreal:.0f}"
    print (s)

    #header += "\tnewlat\tnewlon\tnewarea"
    s = glofas[stationNo].rstrip()
    s = s + "\t" + f"{yy:.6f}" + "\t" + f"{xx:.6f}" + "\t" + f"{ups2:.0f}"+ "\n"
    #s = s + "\t" + str(upsind[y,x]) +"\t"+str(diffind[y,x])+"\t"+str(ind[y,x]) + "\n"
    f = open(OUTPUT_FILE, "a")
    f.write(s)
    f.close()




print ('done')