# Get new temperature data

Original scripts get temperature data from each of four data sources for all stations. Now, we know that stations will just use, in priority order, PRISM, GLDAS, and GLDAS nearest. So we do not need to get data for each station from all locations. Also, we (mostly) should not need to get GLDAS 2.0 data.

Adapted from Paul's scripts. Currently not using arcpy or postgres. 

Set CURRENT_YEAR in following cell. Temperature data through 2021 is checked in on a branch.

# PRISM

In [1]:
import os,sys;
import requests,csv;
import datetime;
from dateutil.rrule import rrule,DAILY,YEARLY;
from time import sleep;
import numpy as np

CURRENT_YEAR = 2021

original_start_date = datetime.datetime(1990, 1, 1)
start_date = datetime.date(CURRENT_YEAR, 1, 1)
end_date = datetime.date(CURRENT_YEAR, 12, 31)

target_dir = os.getcwd() + os.sep + str(CURRENT_YEAR) + '_prism'

if not os.path.exists(target_dir):
    os.mkdir(target_dir)



In [2]:
# get the stations without using arcpy

stations_file = os.path.dirname(os.getcwd()) + os.sep + 'resources' + os.sep + str(CURRENT_YEAR) + '_D4EM_PREC_updated.txt'
assert os.path.exists(stations_file)

print(stations_file)
with open(stations_file, 'r') as file:
    data = file.readlines()

split_data = [item.strip('\n').split('\t') for item in data]
header = split_data.pop(0)

tickers = {}

ticker_cnt = 1
ticker_size = 5
ticker_max = None

top = 49.35
left = -124.78
right = -66.95
bottom =  24.74

ticker = 1

gldas_stations = []
for row in split_data:

    outfile = None
    
    lat  = row[4]
    lon  = row[5]
    name = row[0]

    # original script used where_clause = "CONUSFlag = 'Y'
    if float(lat) <= bottom or float(lat) >= top or float(lon) <= left or float(lon) >= right:
        gldas_stations.append(row)
        continue
        
    if ticker == 1:
        tickers[ticker_cnt] = {
            "lats" : lat,
            "lons" : lon,
            "names": name
            }
            
    else:
        tickers[ticker_cnt]['lats']  = tickers[ticker_cnt]['lats']  + '|' + lat
        tickers[ticker_cnt]['lons']  = tickers[ticker_cnt]['lons']  + '|' + lon
        tickers[ticker_cnt]['names'] = tickers[ticker_cnt]['names'] + '|' + name

    ticker += 1
    if ticker == ticker_size + 1:
        ticker_cnt += 1
        ticker = 1
        
        if ticker_max is not None and ticker_cnt > ticker_max:
            break
            

c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\resources\2021_D4EM_PREC_updated.txt


In [8]:
sleeper = 1;
url = 'https://prism.oregonstate.edu/explorer/dataexplorer/rpc.php';
pickup = 'https://prism.oregonstate.edu/explorer/tmp/'
# These headers are probably utterly irrelevant to the prism servers
headers = {
     'Host'            : 'prism.oregonstate.edu' 
    ,'Connection'      : 'keep-alive'
    ,'Content-Length'  : '7434'
    ,'Pragma'          : 'no-cache'
    ,'Cache-Control'   : 'no-cache'
    ,'sec-ch-ua'       : '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"'
    ,'sec-ch-ua-mobile': '?0'
    ,'Sec-Fetch-Dest'  : 'empty'
    ,'Sec-Fetch-Mode'  : 'cors'
    ,'Sec-Fetch-Site'  : 'same-origin'
    ,'Accept'          : 'application/json, text/javascript, */*; q=0.01'
    ,'Content-Type'    : 'application/x-www-form-urlencoded; charset=UTF-8'
    ,'Accept-Language' : 'en-us'
    ,'Accept-Encoding' : 'gzip, deflate'
    ,'Origin'          : 'https://www.prism.oregonstate.edu'
    ,'Referer'         : 'https://www.prism.oregonstate.edu/explorer/bulk.php'
    ,'User-Agent'      : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    ,'X-Requested-With': 'XMLHttpRequest'  
    ,'DNT'             : '1'
}

for k,v in tickers.items():
    
    # Use this logic bypass if you need to restart following a network failure
    if k >= 0:
        
        print(" Processing " + str(k));

        target_file = target_dir + os.sep + 'batch' + str(k) + '.csv'

        if os.path.exists(target_file):
            os.remove(target_file);

        for yr in rrule(
             freq    = YEARLY
            ,dtstart = start_date
            ,until   = end_date
        ):
            payload_outer = {
                 'call'     : 'pp/daily_timeseries_mp'
                ,'proc'     : 'gridserv'
                ,'lons'     : v['lons']
                ,'lats'     : v['lats']
                ,'names'    : v['names']
                ,'spares'   : '4km'
                ,'interp'   : 0
                ,'stats'    : 'tmin tmax'
                ,'units'    : 'eng'
                ,'range'    : 'daily'
                ,'start'    : yr.strftime("%Y") + '0101'
                ,'end'      : yr.strftime("%Y") + '1231'
                ,'stability': 'stable'
            }

            r_outer = requests.post(
                 url
                ,data = payload_outer
                ,headers = headers
            );
            resp_outer = r_outer.json();

            result_url = None;
            for waiting in range(35):
                payload_inner = {
                     'call' :'pp/checkup'
                    ,'proc' : 'gridserv'
                    ,'gricket': resp_outer['gricket']
                }

                r_inner = requests.post(
                     url
                    ,data = payload_inner
                    ,headers = headers
                );
                resp_inner = r_inner.json();

                if 'delay' in resp_inner:
                    #print(resp_inner['delay']['status']);
                    pass
                elif 'result' in resp_inner:
                    result_url = resp_inner['result']['csv'];
                    break;

                sleep(sleeper);

            if result_url is None:
                raise Exception('never got back results for ' + str(k) + ' for ' + yr.strftime("%Y"));

            with open(target_file,'ab') as f:

                with requests.get(
                     pickup + result_url
                    ,stream = True
                ) as r:
                    skip = 1;
                    for line in r.iter_lines():
                        if skip > 11 and len(line) > 0:
                            f.write(line+'\n'.encode());
                        skip += 1;

 Processing 1
 Processing 2
 Processing 3
 Processing 4
 Processing 5
 Processing 6
 Processing 7
 Processing 8
 Processing 9
 Processing 10
 Processing 11
 Processing 12
 Processing 13
 Processing 14
 Processing 15
 Processing 16
 Processing 17
 Processing 18
 Processing 19
 Processing 20
 Processing 21
 Processing 22
 Processing 23
 Processing 24
 Processing 25
 Processing 26
 Processing 27
 Processing 28
 Processing 29
 Processing 30
 Processing 31
 Processing 32
 Processing 33
 Processing 34
 Processing 35
 Processing 36
 Processing 37
 Processing 38
 Processing 39
 Processing 40
 Processing 41
 Processing 42
 Processing 43
 Processing 44
 Processing 45
 Processing 46
 Processing 47
 Processing 48
 Processing 49
 Processing 50
 Processing 51
 Processing 52
 Processing 53
 Processing 54
 Processing 55
 Processing 56
 Processing 57
 Processing 58
 Processing 59
 Processing 60
 Processing 61
 Processing 62
 Processing 63
 Processing 64
 Processing 65
 Processing 66
 Processing 67
 Pro

In [3]:

target_file  = target_dir + os.sep + 'prism_summary.csv'

if os.path.exists(target_file):
    os.remove(target_file)
    
with open(target_file,'w') as outfile:

    for k,v in tickers.items():
        
        source_file = target_dir + os.sep + 'batch' + str(k) + '.csv';
        
        with open(source_file,'r') as infile:

            for line in infile:
                outfile.write(line)

# GLDAS

In [4]:
gldas_start_date = datetime.datetime(CURRENT_YEAR, 1, 1) + datetime.timedelta(hours=3)
gldas_end_date = datetime.datetime(CURRENT_YEAR, 12, 31) + datetime.timedelta(hours=24)
gldas_target_csv = 'gldas_summary.csv'
gldas_target_near = 'gldasnear_summary.csv'

gldas_target_dir = os.getcwd() + os.sep + str(CURRENT_YEAR) + '_gldas'

if not os.path.exists(gldas_target_dir):
    os.mkdir(gldas_target_dir)

raw_files_dir = gldas_target_dir + os.sep + 'raw'

if not os.path.exists(raw_files_dir):
    os.mkdir(raw_files_dir)

In [89]:
for row in gldas_stations:

    # outfile = None
    
    lat  = row[4]
    lon  = row[5]
    name = row[0]

    target_21 = raw_files_dir + os.sep + name + '_gldas21.txt'

    if not os.path.exists(target_21):
        
        url = "https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/"   \
            + "access/timeseries.cgi"                              \
            + "?variable=GLDAS2:GLDAS_NOAH025_3H_v2.1:Tair_f_inst" \
            + "&startDate=" + gldas_start_date.strftime("%Y-%m-%dT%H")   \
            + "&endDate=" + gldas_end_date.strftime("%Y-%m-%dT%H")       \
            + "&location=GEOM:POINT(" + str(lon) + ",%20" + str(lat) + ")" \
            + "&type=asc2"
        
        boo_good_2020 = False
        boo_good = True
        with open(target_21,'wb') as f:

            with requests.get(
                    url
                ,stream = True
            ) as r:
                for line in r.iter_lines():
                    if line[0:6] == b'ERROR:':
                        boo_good = False
                        break
                        
                    if line[0:19] == b'2021-12-31T21:00:00':
                        boo_good_2020 = True
                        
                    f.write(line+'\n'.encode());
                    
        if not boo_good:
            print("bad rods 21 for " + name + ", skipping for now");
            sleep(2)
            os.remove(target_21)
            
        elif not boo_good_2020:
            print("partial data 21 received for " + name + ", skipping for now");
            sleep(2)
            os.remove(target_21)
        

%history

In [4]:
assert len(os.listdir(raw_files_dir)) == len(gldas_stations)

In [29]:
gldas_target_file = gldas_target_dir + os.sep + gldas_target_csv
print(gldas_target_csv)

if os.path.exists(gldas_target_file):
    os.remove(gldas_target_file)

with open(gldas_target_file, 'w') as outcur:
    for row in gldas_stations:
        name = row[0]
                        
        source_file21 = raw_files_dir + os.sep + name + '_gldas21.txt'
        print(source_file21)
        with open(source_file21,'r') as f21:

                writeit21 = False;
                for line in f21:

                    if writeit21 and len(line) > 0:
                        (dtin21,tpin21) = line.split('\t')
                        dt21  = datetime.datetime.strptime(dtin21,"%Y-%m-%dT%H:%M:%S")
                        tpk21 = float(tpin21)
                        
                        if tpk21 > 0:
                            tpf21 = (tpk21 - 273.15) * 1.8000 + 32.00;

                            outcur.write('"' + name + '","' + str(dt21) + '",' + str(round(tpf21,8)) + '\n')
                            
                    if line == 'Date&Time               Data\n':
                        writeit21 = True

gldas_summary.csv
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\raw\USC00501466_gldas21.txt
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\raw\USC00502607_gldas21.txt
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\raw\USC00504567_gldas21.txt
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\raw\USC00505757_gldas21.txt
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\raw\USC00505880_gldas21.txt
c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Pro

In [5]:
# just do a little checking (may remove later)
with open(gldas_target_dir + os.sep + gldas_target_csv, 'r') as file:
    test_gldas_data = file.readlines()

check_ids = set([item.split(',')[0].strip('"') for item in test_gldas_data])

old_temperature_dir = os.path.dirname(os.getcwd()) + os.sep + 'resources' + os.sep + 'temperature'
assert os.path.exists(old_temperature_dir)

counter = 0
gldas_near_stations = []

for x in gldas_stations:
    if x[0] not in check_ids:
        if not os.path.exists(old_temperature_dir + os.sep + x[0] + '.txt'):
            counter += 1
        else:
            gldas_near_stations.append(x)


print(len(gldas_near_stations))

raw9999_files_dir = gldas_target_dir + os.sep + 'raw9999'

if not os.path.exists(raw9999_files_dir):
    os.mkdir(raw9999_files_dir)
    

40


In [6]:
def get_ordered_pairs(stn_lat, stn_lon):
    latitude = np.arange(-59.875, 89.875, 0.25)
    longitude = np.arange(-179.875, 179.875, 0.25)

    lats, lons = np.meshgrid(latitude, longitude)

    # stn_lat = float(station.latitude)
    # stn_lon = float(station.longitude)

    abs_lat = np.abs(lats-stn_lat)
    abs_lon = np.abs(lons-stn_lon)

    c = np.maximum(abs_lon, abs_lat)

    x, y = np.where(c == np.min(c))
    grid_lat = lats[x[0], y[0]]
    grid_lon = lons[x[0], y[0]]

    test = np.sort(c, axis=None)[:1000]

    pairs = []
    for a in test:
        x_index, y_index = np.where(c == a)
        for (x_, y_) in zip(x_index, y_index):
            if (x_, y_) not in pairs:
                pairs.append((x_, y_))

    ordered_lat_lons = []
    for pair in pairs:

        grid_lat = lats[0, pair[1]]
        grid_lon = lons[pair[0], 0]

        ordered_lat_lons.append((grid_lat, grid_lon))

    return ordered_lat_lons[0:9]

In [79]:
# print(gldas_near_stations[0][4], gldas_near_stations[0][5])
# ordered_pairs = get_ordered_pairs(float(gldas_near_stations[0][4]), float(gldas_near_stations[0][5]))
# print(ordered_pairs)

ordered_pairs = get_ordered_pairs(41.125, 80.375)
print(ordered_pairs)

1439
[(41.125, 80.375), (40.875, 80.125), (41.125, 80.125), (41.375, 80.125), (40.875, 80.375), (41.375, 80.375), (40.875, 80.625), (41.125, 80.625), (41.375, 80.625)]


In [55]:
# adapting from non-near rather than from paul's code

for row in gldas_near_stations:

    # outfile = None
    
    lat  = row[4]
    lon  = row[5]
    name = row[0]

    print(name)

    # here you would want the nearest eight grid points
    counter = 0
    ordered_pairs = get_ordered_pairs(float(lat), float(lon))
    target_21 = raw9999_files_dir + os.sep + name + '_gldas21.txt'

    if not os.path.exists(target_21):
        with open(target_21,'wb') as f:
            for pair in ordered_pairs:
                lat = pair[0]
                lon = pair[1]
                dist_msg = 'grid_distance=' + str(counter) + '\n';

                target_21 = raw9999_files_dir + os.sep + name + '_gldas21.txt'
                
                url = "https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/"   \
                    + "access/timeseries.cgi"                              \
                    + "?variable=GLDAS2:GLDAS_NOAH025_3H_v2.1:Tair_f_inst" \
                    + "&startDate=" + gldas_start_date.strftime("%Y-%m-%dT%H")   \
                    + "&endDate=" + gldas_end_date.strftime("%Y-%m-%dT%H")       \
                    + "&location=GEOM:POINT(" + str(lon) + ",%20" + str(lat) + ")" \
                    + "&type=asc2"
                
                boo_good_2020 = False
                boo_good = True

                f.write(dist_msg.encode('utf-8'))
                with requests.get(
                        url
                    ,stream = True
                ) as r:
                    for line in r.iter_lines():
                        if line[0:6] == b'ERROR:':
                            boo_good = False
                            break
                            
                        if line[0:19] == b'2021-12-31T21:00:00':
                            boo_good_2020 = True
                            
                        f.write(line+'\n'.encode());

                counter += 1
                        
            if not boo_good:
                print("bad rods 21 for " + name + ", skipping for now");
                sleep(2)
                os.remove(target_21)
                
            elif not boo_good_2020:
                print("partial data 21 received for " + name + ", skipping for now");
                sleep(2)
                os.remove(target_21)




91186022534


In [58]:
gldas_target_near_file = gldas_target_dir + os.sep + gldas_target_near

with open(gldas_target_near_file, 'w') as outcur:
    for row in gldas_near_stations:
    
        name = row[0]
                        
        source_file21 = raw9999_files_dir + os.sep + name + '_gldas21.txt'

        with open(source_file21,'r') as f21:

            writeit21 = False
            grid_distance = None
            lat = None
            lon = None

            for line in f21:
                if line[0:14] == 'grid_distance=':
                    grid_distance = int(line[14:])
                    writeit21 = False

                elif line[0:4] == 'lat=':
                    lat = float(line[4:])

                elif line[0:4] == 'lon=':
                    lon = float(line[4:])

                elif writeit21 and len(line) > 0:
                    (dtin21,tpin21) = line.split('\t')
                    dt21  = datetime.datetime.strptime(dtin21,"%Y-%m-%dT%H:%M:%S")
                    tpk21 = float(tpin21)
                    
                    if tpk21 > 0:

                        tpf21 = (tpk21 - 273.15) * 1.8000 + 32.00

                        outcur.write('"' + name + '","' + str(dt21) + '",' + str(round(tpf21,8)) \
                            + ',' + str(round(grid_distance,2)) + ',' + str(lon) + ',' + str(lat) + '\n')
                        
                if line == 'Date&Time               Data\n':
                    writeit21 = True

In [59]:
print(len(gldas_near_stations))

gldas_near_dir_files = os.listdir(raw9999_files_dir)
for i in gldas_near_stations:
    if i[0] + '_gldas21.txt' not in gldas_near_dir_files:
        print(i)

assert len(gldas_near_dir_files) == len(gldas_near_stations)

for a in [gldas_near_dir_files[0]]:
    with open(raw9999_files_dir + os.sep + a, 'r') as file:
        data = file.readlines()

assert os.path.exists(gldas_target_near_file)

ids = []
with open(gldas_target_near_file, 'r') as file:
    csv_data = csv.reader(file)
    for row in csv_data:
        ids.append(row[0])


print(len(set(ids)))

for i in gldas_near_stations:
    if i[0] not in set(ids):
        print(i)


40
30
['USC00513547', 'WdmFinal', 'USC00513547.dat', '1', '21.0946', '-157.0174', 'OBSERVED', 'PREC', "'1990/01/01'", "'2020/12/31'", '31.0', '17.43', 'KAUNAKAKAI MAU 536.5']
['USC00514400', 'WdmFinal', 'USC00514400.dat', '1', '21.1804', '-157.2317', 'OBSERVED', 'PREC', "'2002/01/01'", "'2020/12/31'", '19.0', '19.13', 'KEPUHI-SHERATON 550.2']
['VQC00670480', 'WdmFinal', 'VQC00670480.dat', '1', '17.7184', '-64.795', 'OBSERVED', 'PREC', "'1990/01/01'", "'2016/12/31'", '27.0', '59.91', 'BETH UPPER NEW WORKS']
['70308025713', 'WdmFinal', '70308025713.dat', '1', '57.155', '-170.223', 'OBSERVED', 'PREC', "'1990/01/01'", "'2021/12/31'", '32.0', '22.68', 'ST PAUL ISLAND AIRPORT']
['70383525628', 'WdmFinal', '70383525628.dat', '1', '56.575', '-169.663', 'OBSERVED', 'PREC', "'2006/01/01'", "'2020/12/31'", '15.0', '24.61', 'ST. GEORGE AIRPORT']
['72201012836', 'WdmFinal', '72201012836.dat', '1', '24.557', '-81.755', 'OBSERVED', 'PREC', "'1990/01/01'", "'2021/12/31'", '32.0', '37.61', 'KEY WEST IN

In [25]:
prism_target_file  = target_dir + os.sep + 'prism_summary.csv'

with open(prism_target_file, 'r') as file:
    prism_data = file.readlines()

temperature_dir = os.path.dirname(os.path.dirname(target_dir)) + os.sep + 'resources' + os.sep + 'temperature'

for station in split_data:
    with open(temperature_dir + os.sep + station[0] + '.txt', 'a') as file:
        for item in prism_data:
            split_prism_item = item.strip('\n').split(',')
            if split_prism_item[0] == station[0]:
                to_file = f'{split_prism_item[0]}\t{split_prism_item[4][0:4]}\t{split_prism_item[4][5:7]}\t{split_prism_item[4][-2:]}\t{split_prism_item[-1]}\t{split_prism_item[-2]}\n'
                file.write(to_file)


['USC00010008', 'WdmFinal', 'USC00010008.dat', '1', '31.5702', '-85.2482', 'OBSERVED', 'PREC', "'1990/01/01'", "'2017/12/31'", '28.0', '53.5', 'ABBEVILLE']
USC00010008,-85.2482,31.5702,400,2021-01-01,57.7,71.7



In [42]:
gldas_target_file  = gldas_target_dir + os.sep + 'gldas_summary.csv'

with open(gldas_target_file, 'r') as file:
    gldas_data = file.readlines()

temperature_dir = os.path.dirname(os.path.dirname(gldas_target_dir)) + os.sep + 'resources' + os.sep + 'temperature'


for station in gldas_stations:
    gldas_dict = {}
    for item in gldas_data:
        split_gldas_item = item.strip('\n').split(',')
        if split_gldas_item[0].strip('"') == station[0]:
            date_ = split_gldas_item[1].strip('"')
            date = datetime.datetime(int(date_[0:4]), int(date_[5:7]), int(date_[8:10]))
            if date.year > CURRENT_YEAR:
                continue
            elif date not in gldas_dict:
                gldas_dict[date] = [float(split_gldas_item[-1])]
            else:
                gldas_dict[date].append(float(split_gldas_item[-1]))

    with open(temperature_dir + os.sep + station[0] + '.txt', 'a') as file:
        for key, value in gldas_dict.items():
            to_file = f'{station[0]}\t{key.year}\t{key.month}\t{key.day}\t{max(value)}\t{min(value)}\n'
            file.write(to_file)



In [55]:
gldas_target_near_file = gldas_target_dir + os.sep + gldas_target_near
print(gldas_target_near_file)


with open(gldas_target_near_file, 'r') as file:
    gldas_near_data = file.readlines()

temperature_dir = os.path.dirname(os.path.dirname(gldas_target_dir)) + os.sep + 'resources' + os.sep + 'temperature'

already_processed = []
for station in gldas_near_stations:
    gldas_dict = {}
    for item in gldas_near_data:
        if station[0] in already_processed:
            continue
        split_gldas_item = item.strip('\n').split(',')
        if split_gldas_item[0].strip('"') == station[0]:
            date_ = split_gldas_item[1].strip('"')
            date = datetime.datetime(int(date_[0:4]), int(date_[5:7]), int(date_[8:10]))
            if date.year > CURRENT_YEAR:
                already_processed.append(station[0])
                continue
            elif date not in gldas_dict:
                gldas_dict[date] = [float(split_gldas_item[2])]
            else:
                gldas_dict[date].append(float(split_gldas_item[2]))

    with open(temperature_dir + os.sep + station[0] + '.txt', 'a') as file:
        for key, value in gldas_dict.items():
            to_file = f'{station[0]}\t{key.year}\t{key.month}\t{key.day}\t{max(value)}\t{min(value)}\n'
            file.write(to_file)


c:\Users\cbarr02\OneDrive - Environmental Protection Agency (EPA)\Profile\Desktop\GitHub\Data-Processing-for-SWC-and-SWMM-CAT\temperature\2021_gldas\gldasnear_summary.csv
