### find CONUS2 x and y indices for USGS WTD sites

In [89]:
import numpy as np
import pandas as pd
# from Danielle_wtd import extract_station_indices_in_parflow_grid

In [90]:
# Read in USGS site metadata
metadata_path = '/home/dtt2/CONUS2/Validation/WTD/metadata_WTD_dataset_national_A_val_Oct_2022.csv'
usgs_metadata = pd.read_csv(metadata_path, index_col=['site_id'])
usgs_metadata = usgs_metadata.drop(columns=['Unnamed: 0'])

In [91]:
usgs_metadata.head()

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,site_query_access_date,latitude,longitude,state,huc,...,local_time_fg,reliability_cd,gw_file_cd,nat_aqfr_cd,aqfr_cd,aqfr_type_cd,well_depth_va,hole_depth_va,depth_src_cd,project_no
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
292338095063601,groundwater well,USGS,KH-65-40-707 (Galveston),292338095063601,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.394167,-95.110278,TX,12040204,...,Y,C,YYNYNYYN,S100CSLLWD,112CHCT,U,870.0,1221.0,,4648-00210
292458094534206,groundwater well,USGS,KH-64-33-920 (Texas City Extensometer),292458094534206,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.416325,-94.895003,TX,12040204,...,Y,C,YYNYNYYN,S100CSLLWD,112CHCT,M,800.0,,S,8653-00150
292628099401401,groundwater well,USGS,YP-69-35-602 (Frio Reference Well),292628099401401,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.446066,-99.664774,TX,12110106,...,Y,C,YYNYNYYN,S500EDRTRN,218EDRDA,U,237.0,237.0,S,8653-CRK51
292943098354404,groundwater well,USGS,AY-68-36-132 (Z DED),292943098354404,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.495278,-98.595556,TX,12100301,...,Y,C,YY Y Y,S500EDRTRN,218EDRD,C,596.5,616.0,D,8653-CRN52
293252098380801,groundwater well,USGS,AY-68-27-610 (Parkwood Park),293252098380801,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.547778,-98.635556,TX,12100302,...,Y,C,YYNYNYNN,S500EDRTRN,218EDRDA,U,229.0,229.0,S,8653-9BI52


In [49]:
import numpy as np
from parflow.tools.io import read_pfb

def extract_station_indices_in_parflow_grid(station_stations, Latitude_pf ,Longitude_pf):
    """
    Given a dictionary of station locations, returns the row and column indices within the matrices of latitude and longitudes
    
    :station_stations: dictionary with the station ID as key (not necessary), and the lat and lon of the station stations['ID']=[lat,lon]
    :Latitude_pf: 2D array containing latitude of each grid cell (must match dimensions Longitude_pf)
    :Longitude_pf: 2D array containing longitude of each grid cell (must match dimensions Latitude_pf)
    
    :returns: a dictionary with as many entries as station_stations (same keys) and for each the row and col indices within the 2D lat/lon arrays
    """

    #create a matrix with the same shape as the PF matrix with either the row count or column count
    
    rows_indices = np.tile(np.matrix(np.linspace(0,Latitude_pf.shape[0]-1,Latitude_pf.shape[0])).T, (1,Latitude_pf.shape[1]))
    #0 0 0 ...
    #1 1 1 ...
    #Nx Nx Nx ...

    cols_indices = np.tile(np.matrix(np.linspace(0,Latitude_pf.shape[1]-1,Latitude_pf.shape[1])), (Latitude_pf.shape[0],1))
    #0 1 2 ... ny
    #0 1 2 ... ny

    def distance(lat1, lon1, lat2, lon2):
        #function to compute distance between two lat long points
        p = 0.017453292519943295
        hav = 0.5 - np.cos((lat2-lat1)*p)/2 + np.cos(lat1*p)*np.cos(lat2*p) * (1-np.cos((lon2-lon1)*p)) / 2
        return 12742 * np.arcsin(np.sqrt(hav))

    def closest(lats,lons, lat_point, lon_point,rows_indices,cols_indices):
        #returns the row and column of the closest point
        dist_ = distance(lat_point,lon_point,lats,lons)
        return [rows_indices[dist_ == np.nanmin(dist_)], cols_indices[dist_ == np.nanmin(dist_)]]
        

    #we can actually only consider cells realistically closest to our domain (buffer of buffer_degrees degrees in lat and lon)
    #SET THIS TO BE > THAN THE MAX DISTANCE BETWEEN STATION AND PF GRIDCELL (i.e. 1000m --> transformed to degrees)
    buffer_degrees = 5
    
    dictionary_out = {}

    #loop through stations
    for key in station_stations:
            #get latitude and longitude of current station
            curr_lat = station_stations[key][0]
            curr_lon = station_stations[key][1]

            #to speed up the process, you look only within a "buffer_degree" distance from the station
            mask_subset = np.ones(Latitude_pf.shape)
            mask_subset[Latitude_pf<curr_lat-buffer_degrees] = 0
            mask_subset[Latitude_pf>curr_lat+buffer_degrees] = 0
            mask_subset[Longitude_pf<curr_lon-buffer_degrees] = 0
            mask_subset[Longitude_pf>curr_lon+buffer_degrees] = 0

            #get the lats, lons, rows, cols, of the subset region of pixels within "buffer_degree" from the station
            subset_lats  = Latitude_pf[mask_subset>0]
            subset_lons  = Longitude_pf[mask_subset>0]
            subset_rows  = np.squeeze(np.array(rows_indices[mask_subset>0]))
            subset_cols  = np.squeeze(np.array(cols_indices[mask_subset>0]))

            #find closest cell
            curr_row,curr_col = closest(subset_lats, subset_lons,curr_lat,curr_lon,subset_rows,subset_cols)
            
            found_lat = Latitude_pf[int(curr_row),int(curr_col)]
            found_lon = Longitude_pf[int(curr_row),int(curr_col)]
            print(f'STATION: {key}')
            print(f'Current: {curr_lat} {curr_lon}, Found: {found_lat} {found_lon}')
            print(int(curr_row),int(curr_col))
            print(" ")

            dictionary_out[key]=[int(curr_row),int(curr_col)]
    return dictionary_out



In [None]:
##CREATE DICT WITH THE LOCATIONS OF WTD (or adapt script if you have it as an array or anything else)

my_wtd_locations = {}

# Read in USGS site metadata
metadata_path = '/home/dtt2/CONUS2/Validation/WTD/metadata_WTD_dataset_national_A_val_Oct_2022.csv'
usgs_metadata = pd.read_csv(metadata_path)

#my_wtd_locations['ID']=[lat,lon]
for i in range(len(usgs_metadata.index)):
    my_wtd_locations[usgs_metadata['site_id'][i]]=[usgs_metadata['latitude'][i],usgs_metadata['longitude'][i]]

# my_wtd_locations['12345']=[30,-110]
# my_wtd_locations['12346']=[35,-90]
# my_wtd_locations['12347']=[40,-75]

Latitude_pf = np.squeeze(read_pfb('/hydrodata/national_mapping/CONUS2/Latitude_CONUS2.pfb'))
Longitude_pf = np.squeeze(read_pfb('/hydrodata/national_mapping/CONUS2/Longitude_CONUS2.pfb'))


dictionary_of_rows_cols = extract_station_indices_in_parflow_grid(my_wtd_locations, Latitude_pf ,Longitude_pf)

#dictionary_of_rows_cols['ID']=[row,col]

### Create dataframe from dictionary containing the CONUS2 x and y indices

In [92]:
conus2_index_for_sites = pd.DataFrame.from_dict(dictionary_of_rows_cols, orient = 'index', columns = ['CONUS_y', 'CONUS_x'])
conus2_index_for_sites.index.names = ['site_id']

In [98]:
conus2_index_for_sites.head()
conus2_index_for_sites.to_csv('USGS_sites_CONUS2_indices_WTD.csv')

In [95]:
usgs_metadata_xy = usgs_metadata.join(conus2_index_for_sites)

In [96]:
usgs_metadata_xy

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,site_query_access_date,latitude,longitude,state,huc,...,gw_file_cd,nat_aqfr_cd,aqfr_cd,aqfr_type_cd,well_depth_va,hole_depth_va,depth_src_cd,project_no,CONUS_y,CONUS_x
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
292338095063601,groundwater well,USGS,KH-65-40-707 (Galveston),292338095063601,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.394167,-95.110278,TX,12040204,...,YYNYNYYN,S100CSLLWD,112CHCT,U,870.00,1221.0,,4648-00210,510,2391
292458094534206,groundwater well,USGS,KH-64-33-920 (Texas City Extensometer),292458094534206,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.416325,-94.895003,TX,12040204,...,YYNYNYYN,S100CSLLWD,112CHCT,M,800.00,,S,8653-00150,513,2412
292628099401401,groundwater well,USGS,YP-69-35-602 (Frio Reference Well),292628099401401,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.446066,-99.664774,TX,12110106,...,YYNYNYYN,S500EDRTRN,218EDRDA,U,237.00,237.0,S,8653-CRK51,518,1949
292943098354404,groundwater well,USGS,AY-68-36-132 (Z DED),292943098354404,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.495278,-98.595556,TX,12100301,...,YY Y Y,S500EDRTRN,218EDRD,C,596.50,616.0,D,8653-CRN52,521,2053
293252098380801,groundwater well,USGS,AY-68-27-610 (Parkwood Park),293252098380801,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,29.547778,-98.635556,TX,12100302,...,YYNYNYNN,S500EDRTRN,218EDRDA,U,229.00,229.0,S,8653-9BI52,527,2049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474921093144001,groundwater well,USGS,WLN01 062N23W26CDCDAB01 Togo 000061...,474921093144001,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,47.822467,-93.244069,MN,9030005,...,YYNYNYNN,N100GLCIAL,112OTSHS,U,31.66,44.0,S,MN-0022,2516,2478
475439116503401,groundwater well,USGS,53N 04W 28CAB1,475439116503401,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,47.910650,-116.843817,ID,17010305,...,YYNYNYYN,N100PCFNWB,112OTSH,U,448.56,449.0,S,ID-00-002,2695,793
480034105195401,groundwater well,USGS,26N49E13ACAB01 Wolf Point,480034105195401,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,48.009364,-105.331008,MT,10060002,...,YYNYNYNN,N300UPCTCS,211FHHC,U,180.00,180.0,S,MT076,2561,1610
480546123100901,groundwater well,USGS,30N/04W-15H07,480546123100901,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-01,48.096111,-123.169111,WA,17110020,...,YY Y,S100PGTSND,110ALVM,U,289.00,325.0,D,CRN,2843,363


In [97]:
usgs_metadata_xy.to_csv('USGS_metadata_WTD_WITH_INDICES.csv')