### Jacopo Sala 
### Download full time series of HYCOM (2011-2015) in selected box as netCDF file
### GOFS 3.1: 41-layer HYCOM + NCODA Global 1/12° Reanalysis
GLBb0.08-53.X

In [3]:
import xarray as xr
import csv
import pandas as pd
import pickle
import numpy as np
import os
from datetime import date, timedelta, datetime
# from datetime import datetime as dtime
import sys
import matplotlib.pyplot as plt
import netCDF4 as nc
import os.path
import webbrowser
import time
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import matplotlib as mpl
from FUNCTIONS_HYCOM import set_regions, set_regions_tags

In [4]:
# define start and end years
start_year = 2011
end_year = 2015
var_names_download = ['water_temp', 'salinity']
var_names = ['temperature', 'salinity']

HYCOM_dir =      '/Users/jacoposala/Desktop/CU/3.RESEARCH/ARGO_analysis/TC_HYCOM/DATA/HYCOM_near_TC_53x_raw_box_NEW_BOX_EPacific/'
HYCOM_save_dir = '/Users/jacoposala/Desktop/CU/3.RESEARCH/ARGO_analysis/TC_HYCOM/DATA/HYCOM_near_TC_53x_pkl_box_Global_40S_50N/'
download_dir = '/Users/jacoposala/Downloads/'
url_prefix='http://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/YEAR'
file_prefix ='?var=VARIABLE&north=NORTH&west=WEST&east=EAST&south=SOUTH&horizStride=1&time='
file_suffix='Thh%3A00%3A00Z&vertCoord=&addLatLon=true&accept=netcdf4'

z_level_star = '0'
file_extension = '.nc4'
hycom_levels = 28 # 28
max_depth_index_sel = 28 #28 # 10 or 11 depth levels 
min_wind_knots = 64
tag_file = 'HYCOM_53X'



In [5]:
# List all the regions

# lat_max, lat_min, lon_max, lon_min for every region
regions = set_regions()
regions_tags = set_regions_tags(regions)
print(regions_tags)


['EPacific', 'WPacific_3of3', 'WPacific_1of3', 'NorthAtlantic', 'WPacific_2of3']


In [6]:
# Change number between 0 and len(regions)-1 for running different regions (listed above)
region_to_run = 0
# Set coordinates based on region_to_run
lat_min = regions[region_to_run][1]
lat_max = regions[region_to_run][0]
lon_min = regions[region_to_run][3]
lon_max = regions[region_to_run][2]


In [7]:
# Define functions

def download_model_dataset_53(date, north, south, east, west, url_prefix, file_prefix, file_suffix, f_dir, file_extension):
    date_to_download = pd.to_datetime(date)
    year = str(date_to_download)[0:4]
    month = str(date_to_download)[5:7]
    day = str(date_to_download)[8:10]
    hour = str(date_to_download)[11:13]
    date = datetime(int(year), int(month), int(day), int(hour))
    year = f"{int(date_to_download.year):04d}"
    # The year at the beginning of the URL is not always consistent with the actual year of the data being downloaded:
    # Each year "folder" includes files starts at 12 UTC on Jan 1, and goes to 9 UTC of Jan 1 of the following year
    if (date_to_download.month == 1) & (date_to_download.day == 1) & (date_to_download.hour < 12):
        year_prefix = f"{int(date_to_download.year - 1):04d}"
    else:
        year_prefix = year
    month = f"{int(date_to_download.month):02d}"
    day = f"{int(date_to_download.day):02d}"
    file_suffix_hh = file_suffix.replace('hh', hour, 1)
    file_prefix_coord = file_prefix.replace('NORTH', north, 1)
    file_prefix_coord = file_prefix_coord.replace('SOUTH', south, 1)
    file_prefix_coord = file_prefix_coord.replace('EAST', east, 1)
    file_prefix_coord = file_prefix_coord.replace('WEST', west, 1)
    url = '"' + url_prefix + file_prefix_coord + year + '-' + month + '-' + day + file_suffix_hh + '"'
    name_download = year_prefix + file_prefix_coord + year + '-' + month + '-' + day + file_suffix_hh #'/Users/jacoposala/Downloads/data_' + year + file_extension
    print(name_download)
    name_download = name_download.replace('%3A', ':', 2)
    print(url)
    print(name_download)
    # Download file
    if not os.path.exists(HYCOM_dir + file_prefix_coord + year + month + day + file_suffix_hh + file_extension):
        # Download file
        !wget {url}
        # Wait until the file is downloaded
        while not os.path.exists(name_download):
            time.sleep(1)
        print('Download done!')
        # ...then rename and move file
        if os.path.isfile(name_download):
            os.rename(name_download, HYCOM_dir + file_prefix_coord + year + month + day + file_suffix_hh + file_extension)
        else:
            print('Missing ' + name_download)
        print('Renaming or warning done!')
    # Open the file
    dset_day = xr.open_dataset(f'{f_dir}{file_prefix_coord}{year}{month}{day}{file_suffix_hh}{file_extension}')
    # Remove the file
    #if os.path.isfile(f'{f_dir}{file_prefix}{year}{month}{day}{file_suffix_hh}{file_extension}'):
    #    os.remove(f'{f_dir}{file_prefix}{year}{month}{day}{file_suffix_hh}{file_extension}')
    print('Dataset open - ready to return')
    return(dset_day)


### Now download full HYCOM time series (2011-2015) within selected box
#### Note: this might not work on jupyter, but it does work on spyder

In [8]:
# Create list of time-dates that need to be downloaded
# start = datetime(2011, 1, 1, 00, 00, 00)
start = datetime(2014, 8, 16, 15, 00, 00)
end = datetime(2014, 8, 17, 21, 00, 00)
date_generated = [start + timedelta(hours=3*x) for x in range(0, (end-start).days*8)]

date_generated

[datetime.datetime(2014, 8, 16, 15, 0),
 datetime.datetime(2014, 8, 16, 18, 0),
 datetime.datetime(2014, 8, 16, 21, 0),
 datetime.datetime(2014, 8, 17, 0, 0),
 datetime.datetime(2014, 8, 17, 3, 0),
 datetime.datetime(2014, 8, 17, 6, 0),
 datetime.datetime(2014, 8, 17, 9, 0),
 datetime.datetime(2014, 8, 17, 12, 0)]

In [9]:
### Note: files are missing for 2014-12-31 12 UTC to 2015-01-10 00 UTC (probably in 2015 folder instead of the 2014 or something like that)


In [10]:
cd /Users/jacoposala/Downloads/

/Users/jacoposala/Downloads


In [11]:
# Main loop
for x in date_generated: # loop through all unique days 
    print(x)
    for namevar_download,namevar in zip(var_names_download,var_names): # loop over variables (temp or salt)
        #for i in np.arange(len(lat_max)):
        #    ilon=lon_max[i]
        #    ilat=lat_max[i]
        ilon = lon_max
        ilat = lat_max
        # Download HYCOM data
        # The year at the beginning of the URL is not always consistent with the actual year of the data being downloaded:
        # Each year "folder" includes files starts at 12 UTC on Jan 1, and goes to 9 UTC of Jan 1 of the following year
        if ((pd.to_datetime(x)).month == 1) & ((pd.to_datetime(x)).day == 1) & ((pd.to_datetime(x)).hour < 12):
            bfr_url = url_prefix.replace('YEAR', str((pd.to_datetime(x)).year -1))
        else:
            bfr_url = url_prefix.replace('YEAR', str((pd.to_datetime(x)).year))
        bfr_prefix = file_prefix.replace('VARIABLE', str(namevar_download)) 
        bfr_dset = download_model_dataset_53(x, north = str(max(np.ceil(lat_max), np.floor(lat_max))), 
                                             south = str(min(np.ceil(lat_min), np.floor(lat_min))), 
                                             east = str(max(np.ceil(lon_max), np.floor(lon_max))),
                                             west = str(min(np.ceil(lon_min), np.floor(lon_min))),
                                             url_prefix=bfr_url, file_prefix=bfr_prefix, 
                                             file_suffix=file_suffix, f_dir=HYCOM_dir, 
                                             file_extension = file_extension)

        # Longitude is -180 + 180 already, so no need to re-project


2014-08-16 15:00:00
2014?var=water_temp&north=30.0&west=-150.0&east=-100.0&south=0.0&horizStride=1&time=2014-08-16T15%3A00%3A00Z&vertCoord=&addLatLon=true&accept=netcdf4
"http://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2014?var=water_temp&north=30.0&west=-150.0&east=-100.0&south=0.0&horizStride=1&time=2014-08-16T15%3A00%3A00Z&vertCoord=&addLatLon=true&accept=netcdf4"
2014?var=water_temp&north=30.0&west=-150.0&east=-100.0&south=0.0&horizStride=1&time=2014-08-16T15:00:00Z&vertCoord=&addLatLon=true&accept=netcdf4
--2023-04-13 22:34:32--  http://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2014?var=water_temp&north=30.0&west=-150.0&east=-100.0&south=0.0&horizStride=1&time=2014-08-16T15%3A00%3A00Z&vertCoord=&addLatLon=true&accept=netcdf4
Resolving ncss.hycom.org... 144.174.97.9
Connecting to ncss.hycom.org|144.174.97.9|:80... connected.
HTTP request sent, awaiting response... 200 200
Length: 9880750 (9.4M) [application/x-netcdf4]
Saving to: '2014?var=water_temp&north=30.0&

KeyboardInterrupt: 