This repository has been archived by the owner on Sep 30, 2020. It is now read-only.
/
hrrr_variable_from_pando.py
311 lines (265 loc) · 13.9 KB
/
hrrr_variable_from_pando.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# Brian Blaylock
# March 10, 2017
# updated: July 26, 2017
# updated: October 31, 2017 Happy Halloween
"""
Download a single HRRR variable from the Pando archive using cURL.
More Information at: https://hrrr.chpc.utah.edu
"""
import commands
import re
from StringIO import StringIO
from datetime import date, timedelta
import os
import urllib2
def download_HRRR_variable_from_pando(DATE, variable,
hours=range(0, 24), fxx=[0],
model='hrrr', field='sfc',
more_vars=0,
outdir='./'):
"""
Download a partial grib2 file from the Pando archive (http://hrrr.chpc.utah.edu)
by specifying the variable you wish to download. These single-variable
grib2 files are about 1 MB in size. If you don't need the full grib2 file,
retrieving only the variables you need can save you a lot in disk space.
Input:
DATE - a python date object for the date you want to download
variable - a string of the variable abreviation and level that matches
the line in the .idx file you want to download from.
This string is used to search for the line in the grib2.idx
file so we can discover the byte range of the variable.
For example, if variable='TMP:2 m', we will search for that
line in the .idx file and use the byte range to do a partial
download from the full grib2 file to retrieve just that
field using cURL.
Check this URL for a sample of variable names you can match:
https://api.mesowest.utah.edu/archive/HRRR/oper/sfc/20170725/hrrr.t01z.wrfsfcf00.grib2.idx
String must be unique enough to only occure once in the .idx
file, else it will download the last instance. Thus, include
both the varibale abbreviation and the surface.
hours - a list of hours to download, within range(24).
fxx - a list of forecast hours to download, within range(19).
model - a string specifying the model you want to download.
Choose either 'hrrr', 'hrrrX', or 'hrrrAK'
field - a string specifying the filed to download from.
Choose either 'sfc' for surface file or 'prs' for the
pressure file (prs includes many more variables than the sfc
file).
more_vars- Tells how many lines to skip when looking for a byte range.abs
Default is zero, which downloads only the variable
origianlly requested. You may be interested in downloading
additional variables, which is easy if they are adjacent to
each other in the grib2 file. For example, if you want both
the U and V 10 m wind components, you would set
variable = 'UGRD:10 m', and set more_vars=1 which will
download the UGRD:10 m field and the next field, which is
the VGRD:10m field.
If you want all the variables at 500 mb,
set variable='HGT:500 mb' and set more_vars=4, which will
download 'HGT:500 mb', 'TMP:500 mb', 'DPT:500 mb', 'UGRD:500 mb', and 'VGRD:500 mb'
in the same grib2 file.
outdir - a string specifying the directory to save the files retrived
"""
# Check if the outdir exists. If not, create it.
if not os.path.exists(outdir):
os.makedirs(outdir)
print "created new directory: %s" % outdir
# Model file names are different than model directory names.
if model == 'hrrr':
model_dir = 'oper'
elif model == 'hrrrX':
model_dir = 'exp'
elif model == 'hrrrAK':
model_dir = 'alaska'
# Download for all requested hours and forecast hours
for h in hours:
for f in fxx:
# Rename the downloaded file based on the info from above
# e.g. HRRRfromPando_20170310_h00_f00_TMP_2_m.grib2
if more_vars == 0:
outfile = '%s/%sfromPando_%s_h%02d_f%02d_%s.grib2' \
% (outdir, model.upper(), DATE.strftime('%Y%m%d'), h, f, variable.replace(':', '_').replace(' ', '_'))
else:
outfile = '%s/%sfromPando_%s_h%02d_f%02d_%s-and-%s.grib2' \
% (outdir, model.upper(), DATE.strftime('%Y%m%d'), h, f, variable.replace(':', '_').replace(' ', '_'), more_vars)
# URL for the grib2 .idx metadata file.
# The metadata contains the byte range for each variable, which we
# will need for a partial download, in Step 2.
idxfile = 'https://api.mesowest.utah.edu/archive/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2.idx' \
% (model_dir, field, DATE.strftime('%Y%m%d'), model, h, field, f)
# URL to download the full grib2 file.
# We will use the cURL comand to download the variable of interest
# from this file using the byte range, in step 3.
pandofile = 'https://pando-rgw01.chpc.utah.edu/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2' \
% (model_dir, field, DATE.strftime('%Y%m%d'), model, h, field, f)
# 1) Open the Metadata URL and read the lines
# Remember, we are ignoring the certificate.
try:
try:
idxpage = urllib2.urlopen(idxfile)
except:
# Depending on your version of urllib2, you may need to
# ignore the ssl certificate before urllib2.openurl will work.
# My Python 2.7.11 needs to do this, but my Python 2.7.3 does not.
# https://stackoverflow.com/questions/19268548/python-ignore-certicate-validation-urllib2/28048260#28048260
print ">>going to ignore the ssl certificate"
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
idxpage = urllib2.urlopen(idxfile, context=ctx)
lines = idxpage.readlines()
except:
print "\n ERROR!!! Does the .idx file exist: %s \n" % idxfile
print "If is does, then something is wrong with urllib2.urlopen"
continue
# Check if the variable requested is in the .idx file.
if not any(variable in s for s in lines):
print "\n ERROR!!!"
print " Can not retrieve %s from %s." % (variable, idxfile)
print " Check that your variable name matches a line in the .idx file.\n"
return
# 2) Find the byte range for the variable requested.
# Need to first find which line the variable is located. Keep a count
# of what line we are on, gcnt, so we can get the end byte range
# from the next line.
gcnt = 0
for g in lines:
expr = re.compile(variable)
if expr.search(g):
parts = g.split(':')
rangestart = parts[1]
parts = lines[gcnt+1+more_vars].split(':')
rangeend = int(parts[1])-1
print variable+' byte range:', rangestart, rangeend
byte_range = str(rangestart) + '-' + str(rangeend)
# 3) When the byte range is discovered, use cURL to download.
try:
os.system('curl -s -o %s --range %s %s' % (outfile, byte_range, pandofile))
print 'Downloaded %s \n' % outfile
except:
print "\n ERROR !!! Does the grib2 file exists: %s \n" % pandofile
continue
gcnt += 1
"""
Note: If you don't give the variable string a unique enough name,
it will only grab the last instance of that variable.
For example, there are many 'TMP' varibles at different levels.
If you set variable='TMP' it will download all the
fields that match TMP (i.e TMP:500 mb, TMP:700 mb) and
overwrite the file with the last instance. That is why you
need to also specify the variable abreviation and surface
when you name the variable string.
"""
# =============================================================================
# Example Usage: Modify date and variable parameters
# =============================================================================
def get_single_variable_single_day():
# Download single variable from single day
DATE = date(2017, 3, 10) # Model run date
variable = 'TMP:2 m' # Must be part of a line in the .idx file
download_HRRR_variable_from_pando(DATE, variable,
hours=range(0, 24),
fxx=[0],
model='hrrr',
field='sfc',
more_vars=0,
outdir='./')
def get_adjacent_variable_single_day():
# Download single variable from single day
DATE = date(2017, 3, 10) # Model run date
variable = 'HGT:500 mb' # Must be part of a line in the .idx file
download_HRRR_variable_from_pando(DATE, variable,
hours=range(0, 24),
fxx=[0],
model='hrrr',
field='sfc',
more_vars=4,
outdir='./')
def get_single_variable_multiple_days():
# === User modify variable and date range =================================
# date range
sDATE = date(2017, 3, 10) # Start date
eDATE = date(2017, 3, 13) # End date (exclusive)
# variable string (must be part of line in .idx file)
variable = 'TMP:2 m'
# =========================================================================
# Create list of all dates
days = (eDATE-sDATE).days
DATES = [sDATE + timedelta(days=d) for d in range(days)]
# Loop through main function for all dates
for DATE in DATES:
download_HRRR_variable_from_pando(DATE, variable,
hours=range(0, 24),
fxx=[0],
model='hrrr',
field='sfc',
outdir='./')
def get_multiple_variables_multiple_days():
# === User modify variable and date range =================================
# date range
sDATE = date(2017, 3, 10) # Start date
eDATE = date(2017, 3, 13) # End date (exclusive)
# variable list (must be part of line in .idx file)
variables = ['TMP:2 m', 'DPT:2 m', 'UGRD:10 m', 'VGRD:10 m']
# =========================================================================
# Create list of all dates
days = (eDATE-sDATE).days
DATES = [sDATE + timedelta(days=d) for d in range(days)]
# Loop through main function for all dates and all variables
for variable in variables:
for DATE in DATES:
download_HRRR_variable_from_pando(DATE, variable,
hours=range(0, 24),
fxx=[0],
model='hrrr',
field='sfc',
outdir='./')
def fast_dwnld_with_multithreading():
# Fast download of HRRR grib2 files (single variable) with multithreading
from queue import Queue
from threading import Thread
def worker():
# This is where the main download function is run.
# Change the hour and fxx parameters here if needed.
while True:
item = q.get()
# Unpack the date and variable from the item sent to this worker
iDATE, ivar = item
download_HRRR_variable_from_pando(iDATE, ivar,
hours=range(0, 24),
fxx=[0],
model='hrrr',
field='sfc',
outdir='./')
q.task_done()
# ===== User Modify the Variables and date range ==========================
variables = ['TMP:2 m', 'DPT:2 m'] # List of variable strings
sDATE = date(2017, 3, 10) # Start date
eDATE = date(2017, 3, 13) # End date (exclusive)
# =========================================================================
# Create list of dates to request
days = (eDATE-sDATE).days
DATES = [sDATE + timedelta(days=d) for d in range(days)]
# Make a list of inputs to send to the worker
input_list = [[d, v] for d in DATES for v in variables]
# Multithreadding using the worker
num_of_threads = 8
q = Queue()
for i in range(num_of_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
# Run each item through the threads
for item in input_list:
q.put(item)
q.join() # block until all tasks are done
if __name__=='__main__':
from datetime import datetime
timer = datetime.now()
#get_single_variable_single_day()
get_adjacent_variable_single_day()
#get_single_variable_multiple_days()
#get_multiple_variables_multiple_days()
#fast_dwnld_with_multithreading()
print datetime.now()-timer