In [57]:
import numpy as np
import pandas as pd
import math

import xarray as xr
import parflow as pf

In [58]:
###################################
# File locations and inputs

# Path to GHCND file with list of observations and years available
ghcnd_inventory_path = "/glade/p/univ/ucsm0002/CONUS_modern/Analysis_Validation/Validation/GHCND_MetStations/ghcnd-inventory.txt"

# Path to GHCND file with list of stations 
ghcnd_stations_path = "/glade/p/univ/ucsm0002/CONUS_modern/Analysis_Validation/Validation/GHCND_MetStations/ghcnd-stations.txt"

# Water years you want included when choosing stations
# NOTE: GHCND file inventory uses calendar years, not water years
start = 2003 # start water year
end = 2005 # end water year
yrs = list(range(start,end+1))

# Path to lat/lon grid file
CONUSlatlon = latlon = np.loadtxt('/glade/p/univ/ucsm0002/CONUS2/domain_files/CONUS2.0.Final.LatLong.sa',skiprows=1)

# Output file 
# this will be a csv containing list of stations for comparison:
# station ID, lat/lon of station, station name, station elevation,
# availability of core variables - TMIN/TAVG/TMAX/PRCP/WESD/SNOW/SNWD/AWND,
# and the CONUS index, x-index and y-index for comparison cells
outfile = "./GHCND_mapped_CONUS2.csv"

print(f'Water Years to include for GHCND: {yrs}')

Water Years to include for GHCND: [2003, 2004, 2005]


In [59]:
###################################
# Read in station data

# Reading these in as fixed width 
ghcnd_inventory = pd.read_fwf(ghcnd_inventory_path,
                           names = ["ID","LATITUDE","LONGITUDE","ELEMENT","FIRSTYEAR","LASTYEAR"])

ghcnd_stations = pd.read_fwf(ghcnd_stations_path,
                            names = ["ID","LATITUDE","LONGITUDE","ELEVATION_m","STATE","NAME","GSN_FLAG","HCN_FLAG","WMO_ID"])#,comment='$') 

In [60]:
print(ghcnd_stations.shape)
ghcnd_stations

(115081, 9)


Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION_m,STATE,NAME,GSN_FLAG,HCN_FLAG,WMO_ID
0,ACW00011604,17.1167,-61.7833,10.1,ST JOHNS COOLIDGE FLD,,,,
1,ACW00011647,17.1333,-61.7833,19.2,ST JOHNS,,,,
2,AE000041196,25.3330,55.5170,34.0,SHARJAH INTER. AIRP,,GSN,41196.0,
3,AEM00041194,25.2550,55.3640,10.4,DUBAI INTL,,,41194.0,
4,AEM00041217,24.4330,54.6510,26.8,ABU DHABI INTL,,,41217.0,
...,...,...,...,...,...,...,...,...,...
115076,ZI000067969,21.0500,29.3670,861.0,WEST NICHOLSON,,,67969.0,
115077,ZI000067975,20.0670,30.8670,1095.0,MASVINGO,,,67975.0,
115078,ZI000067977,21.0170,31.5830,430.0,BUFFALO RANGE,,,67977.0,
115079,ZI000067983,20.2000,32.6160,1132.0,CHIPINGE,,GSN,67983.0,


In [61]:
###################################
# Dataframe to fill

# N Rows = number of unique station IDs
# N Columns = 16:
# - Station ID
# - Station Name
# - Station Lat
# - Station Lon
# - Station Elevation
# - CONUS index (PF index)
# - CONUS x index
# - CONUS y index
# - TMIN availability
# - TAVG availability
# - TMAX availability
# - PRCP availability
# - WESD availability
# - SNOW availability
# - SNWD availability
# - AWND availability

# First, to make this a bit more efficient, do a cursory look at stations
# and remove all that are outside of a bounding box containing CONUS2
maxlat = math.ceil(CONUSlatlon[:,0].max())
minlat = math.floor(CONUSlatlon[:,0].min())
maxlon = math.ceil(CONUSlatlon[:,1].max())
minlon = math.floor(CONUSlatlon[:,1].min())

out_domain1 = np.where((ghcnd_stations.LATITUDE > maxlat) | (ghcnd_stations.LATITUDE < minlat))
out_domain2 = np.where((ghcnd_stations.LONGITUDE > maxlon) | (ghcnd_stations.LONGITUDE < minlon))
out_domain = np.union1d(out_domain1[0], out_domain2[0])
# stations contained within CONUS2 bounding box
ghcnd_stations = ghcnd_stations.drop(out_domain)

In [75]:
# # Start with full list of these bounding-box stations
# # Later we will remove the ones we won't use
# dat = matrix(NA,nrow=nrow(ghcnd_stations),ncol=16)
# dat = data.frame(dat)
# names(dat) = c("ID",
#                "NAME",
#                "LATITUDE",
#                "LONGITUDE",
#                "ELEVATION_m",
#                "CON_id",
#                "CON_x",
#                "CON_y",
#                "TMIN","TAVG","TMAX",
#                "PRCP",
#                "WESD","SNOW","SNWD",
#                "AWND")
