In [None]:
############
## end of 2/21/2023 update:
## This notebook builds upon the V1 code by synthesizing steps to automatically download data!
## It can load data from all years / TC names, or just specified cases
############

In [3]:
# import...
from urllib.request import urlopen
import urllib
import re
import numpy as np
import os
import xarray as xr

os.chdir( "/Users/etmu9498/research/code/scripts/plotting")
import auto_flight_level_plots_new_noaa_data as plotter
import fl_mean_plots_error
import fl_mean_fields_binned


In [4]:
# step 1: save all years included in noaa's flight level database to yearfiles

# get a string holding all the folder names
urlrootstr = 'https://www.aoml.noaa.gov/ftp/hrd/data/flightlevel/'
urlrootpath =urlopen( urlrootstr)
string = urlrootpath.read().decode('utf-8')

# separate the one long string into a list of year strings
stringlist = string.split("</li>")
stringlist = stringlist[ 1:-1] # get rid of meaningless header and footer


# define a pattern of files to sort for
# this looks for files with the header '<li><a href=", with 4 numbers (representing a year), with the footer /">
pattern = re.compile('<li><a href="[0-9]{4}/">') # only look in folders with year numbers

# save valid years here
yearfiles = []
# sort for the intended files and print out valid years!
# look through all potential year strings
for i, val in enumerate( stringlist):
       
    # add this case if it passes the pattern test above!
    if re.search( pattern, val):
        match = re.search( pattern, val)
        name = val [ 14 : 18 ]
        
        yearfiles.append( name)


# option 2: manually set the year list (better for faster downloads and testing)
yearfiles = [ '2019' , '2020']

print( yearfiles)

['2019', '2020']


In [5]:
# step 2: find the unique tc names for each year

# make sure there's an empty list for every year! names will be added for each year
namefiles = []
for i in range( len( yearfiles)):
    namefiles.append( [])


# do this for every year
for yeari, yearval in enumerate( yearfiles):

    # go to the current year's folder on NOAA's website
    yearstr = 'https://www.aoml.noaa.gov/ftp/hrd/data/flightlevel/' + yearval
    yearpath =urlopen( yearstr)
    string = yearpath.read().decode('utf-8')

    # separate the one long string into a list of name strings
    stringlist = string.split("</li>")
    stringlist = stringlist[ 1:-1] # get rid of meaningless header and footer
    
    # define a pattern of files to sort for
    # this code looks for files with the header '<li><a href=", with any number of letters (representing a name), 
    # with the footer /">
    pattern = re.compile('<li><a href="[a-zA-Z]*/">') # only look in folders with year numbers

    # look through all potential name strings
    for i, val in enumerate( stringlist):

        # add this case if it passes the pattern test above!
        if re.search( pattern, val):

            # trim off name header
            name = val [ 14:]            
            # find the first /, signifying the end of the name
            for j in range( len( name)):
                if name[ j] == '/':
                    endval = j
                    break
            # trim off the footer and append the name!        
            name = name [ : endval]
            namefiles[ yeari].append( name)

    print( "Year " + yearval + " complete")

# option 2: input the TC names manually! to save on data downloading times

namefiles[ 0] = ['lorenzo'] # ['dorian', 'lorenzo'] # 2019 names
namefiles[ 1] = [] # ['delta', 'isaias', 'zeta'] # 2020 names

Year 2019 complete
Year 2020 complete


In [6]:
# print out the years / names nicely!
for yeari, yearval in enumerate( yearfiles):
    print( "Year: " + yearval)
    print( "Names: " + str( namefiles[ yeari]))

Year: 2019
Names: ['lorenzo']
Year: 2020
Names: []


In [7]:
# step 3: get filenames

# save new valid filenames here
files = []
# make sure there's an empty list for every year! dataset names from all that year's TCs will be 
# placed in the appropriate list
for i in range( len( yearfiles)):
        
    files.append( [])
    
    for j  in range( len( namefiles[ i])):
        files[i].append( [])
        
# do this for each year and each name
for yeari, yearval in enumerate( yearfiles):

    print( "Get filenames for " + yearval)
    
    for namei, nameval in enumerate( namefiles[ yeari]):
        
        print( "TC " + nameval)
        
        # go to this TC's link
        urlstr = 'https://www.aoml.noaa.gov/ftp/hrd/data/flightlevel/' + yearval + '/' + nameval + '/'
        urlpath =urlopen( urlstr)
        string = urlpath.read().decode('utf-8')


        # break down the one huge string into a list of strings.
        # separate them by the </li> character
        # cut the first and last entries off the list -> not filenames!
        stringlist = string.split("</li>")
        stringlist = stringlist[ 1:-1] # get rid of meaningless header and footer

        # define a pattern of files to sort for
        # pattern = re.compile('<li><a href=".*_AC.nc">') # include any _AC.nc file (H, I, or N)
        pattern = re.compile('<li><a href=".*[HI][12]_AC.nc">.*') # only save H or I files, not N


        # sort for the intended files and save valid names!
        for i, val in enumerate( stringlist):

            if re.search( pattern, val):
                name = val[ 14 : 30 ]
                files[yeari][namei] .append( name)
        

Get filenames for 2019
TC lorenzo
Get filenames for 2020


In [8]:
# print filenames nicely!
for yeari, yearval in enumerate( yearfiles):
    for namei, nameval in enumerate( namefiles[ yeari]):
        print( "Files for year " + yearval + ", TC " + str( namefiles[ yeari][namei]) + ":")
        print( str( files[ yeari][namei] ) + "\n")

Files for year 2019, TC lorenzo:
['20190926I1_AC.nc', '20190927H1_AC.nc', '20190927I1_AC.nc', '20190928H1_AC.nc', '20190928I1_AC.nc', '20190929H1_AC.nc', '20190929I1_AC.nc', '20190930H1_AC.nc', '20190930I1_AC.nc']



In [10]:
# step 4: download the files printed out above!
###########
# new code: save files in separate year AND tcname files!
# this will make things much more readable when working with 100+ cases
# it also matches NOAA's file structure better, which is probably a good thing!
# it will also make metadata matching easier! no overlapping dates, etc
###########


total_file_count = 0 
for yeari, yearval in enumerate( yearfiles):
    for namei, nameval in enumerate( namefiles[ yeari]):
        total_file_count += len( files[ yeari][ namei])
print( "Number of netCDF datasets to download: " + str( total_file_count))


# do this for each year and each name
for yeari, yearval in enumerate( yearfiles):

    for namei, nameval in enumerate( namefiles[ yeari]):
        print( "Saving data for TC " + nameval)

        # see if there's already a year folder availible
        os.chdir("/Users/etmu9498/research/data/in-situ-noaa-full")
        output_folder = yearval + "/" + nameval

        if not os.path.isdir( output_folder):
            os.makedirs( output_folder)
            print( 'New folder created: ' + output_folder)
        
        # go to the new folder
        os.chdir("/Users/etmu9498/research/data/in-situ-noaa-full/" + output_folder)

        # save the valid datasets!
        for i, val in enumerate( files[yeari][namei]):
            
            # urllib.request.urlretrieve( urlstr + val, val)
            # print( val[0:10] + "_" + nameval + ".nc")
            print( "file " + str( i) + " downloaded")

Number of netCDF datasets to download: 9
Saving data for TC lorenzo
20190926I1_lorenzo.nc
file 0 downloaded
20190927H1_lorenzo.nc
file 1 downloaded
20190927I1_lorenzo.nc
file 2 downloaded
20190928H1_lorenzo.nc
file 3 downloaded
20190928I1_lorenzo.nc
file 4 downloaded
20190929H1_lorenzo.nc
file 5 downloaded
20190929I1_lorenzo.nc
file 6 downloaded
20190930H1_lorenzo.nc
file 7 downloaded
20190930I1_lorenzo.nc
file 8 downloaded


In [47]:
# check that downloads look good!
os.chdir("/Users/etmu9498/research/data/in-situ-noaa-full/2019" )
data = xr.open_dataset( '20190919H1_AC.nc', decode_times=False)

In [48]:
# data

In [1]:
#################
## code taken from "2023-02-01 flight level rmws new nc datasets"
## results: the auto code works really well on the automatically downloaded data!
#################

import os
os.chdir( "/Users/etmu9498/research/code/scripts/plotting")
import auto_flight_level_plots_new_noaa_data as plotter


plotter.plot( tc='2019', ylims=True, filepaths='New')
#fl_mean_fields_binned.plot_all_eyes( tc='2019', max_v_requirement=40, filepaths='New')
#fl_mean_plots_error.make_plot( tc='2019', max_v_requirement=40, filepaths='New')


['2019', '2020', '2021', '2022']
['jerry', 'lorenzo']
['laura', 'sally', 'teddy']
['ida', 'larry', 'sam']
['fiona']
52
Total Number of plots to be created: 0



In [2]:
###############
## 2/22/23 update
## testing new code with updated file structures! a little more complicated because of all the subfolders
###############
#import os
#os.chdir( "/Users/etmu9498/research/code/scripts-winter2023/fl-data-compositing")
#import fl_time_series_new_noaa_data
#fl_time_series_new_noaa_data.plot( tc='all', ylims=True, filepaths='New')
