### NCEI Station Temperature Download

Goals:

1. For each SWC COOP-derived station, pull NCEI **daily-summaries** information between 1990 and 2020.
2. For each SWC ISD-derived station, pull NCEI **global-summary-of-the-day** information between 1990 and 2020.
3. Aggregate results together into CSV file.

General Statistics:

* Total Stations: 5144
* Total COOP Stations: 1851
* Total ISD Stations: 3293
* Stations with no temperature data: 796
* Stations with complete temperature data: 147


In [6]:
import arcpy;
import os,sys;
import requests,csv;

start_date  = datetime.date(1990,1,1);
end_date    = datetime.date(2020,12,31);

results_fgdb = os.getcwd() + os.sep + '..'+ os.sep + 'results.gdb';
target_dir   = os.getcwd() + os.sep + 'ncei';

if not arcpy.Exists(target_dir):
    os.mkdir(target_dir);

stations = results_fgdb  + os.sep + 'SWC_Station_Universe';
stations_cnt = arcpy.GetCount_management(stations)[0];
print("  Initial SWC Station Universe Count: " + str(stations_cnt));


  Initial SWC Station Universe Count: 5144


In [None]:
%%time

input  = results_fgdb + os.sep + 'SWC_Station_Universe';
output = target_dir   + os.sep + 'ncei_20210805';
stats  = target_dir   + os.sep + 'ncei_20210805_stats.csv';

fldsin = [
     'StationId'
    ,'StationSource'
];

print("  Downloading station data from ncei between " + str(start_date) + " and " + str(end_date) + ".");
station_count = 0;
with arcpy.da.SearchCursor(
     in_table    = input80010
    ,field_names = fldsin
) as incur:
    
    with open(output80010,'w') as outfile \
        ,open(stats80010,'w')  as statfile:
    
        outfile.write('"STATION","DATE","TMAX","TMIN","SOURCE"\n');
        statfile.write('"STATION","RECORD_COUNT"\n');
        
        for row in incur:
            
            stationID = row[0];
            stationSource = row[1];
            
            #print("   StationID: " + stationID);
            if stationSource == "COOP":
                payload = {
                     'dataset'          : 'daily-summaries'
                    ,'dataTypes'        : 'TMAX,TMIN'
                    ,'stations'         : stationID
                    ,'startDate'        : start_date.strftime("%Y-%m-%d")
                    ,'endDate'          : end_date.strftime("%Y-%m-%d")
                    ,'includeAttributes': 'false'
                    ,'format'           : 'csv'
                    ,'units'            : 'standard'
                }
            
            elif stationSource == "ISD":
                payload = {
                     'dataset'          : 'global-summary-of-the-day'
                    ,'dataTypes'        : 'MAX,MIN'
                    ,'stations'         : stationID
                    ,'startDate'        : start_date.strftime("%Y-%m-%d")
                    ,'endDate'          : end_date.strftime("%Y-%m-%d")
                    ,'includeAttributes': 'false'
                    ,'format'           : 'csv'
                }
                
            r = requests.get(
                'https://www.ncei.noaa.gov/access/services/data/v1'
                ,params = payload        
            );
            
            rec_count = 0;
            text = (line.decode('utf-8') for line in r.iter_lines());
            rows = csv.reader(text,delimiter=",",quotechar='"');
            next(rows);
            
            for line in rows:
                
                stationID   = line[0];
                readingDate = line[1];
                readingMax  = line[2];
                readingMin  = line[3];
                
                if readingMax is not None and readingMin is not None \
                and readingMax != "" and readingMin != "":
                    
                    outfile.write('"' + stationID + '","' + readingDate + '","' + readingMax + '","' + readingMin + '","' + stationSource + '"\n');
                    rec_count += 1;
                
            statfile.write('"' + stationID + '","' + stationSource + '",' + str(rec_count) + "\n");
            station_count += 1;
                        
print("  Downloaded " + str(station_count) + " stations worth of temperature data.");
