## Download Data

prerequisite packages: cdsapi, xarray, netcdf4, dask, scipy, bottleneck, numpy, pandas, zarr

In [188]:
import cdsapi

#select and name the lat/lon locations you want the data for
COORDS = {
        #"Springfield":[37.235,  -93.402],  
        "Joplin":[37.156, -94.503],  
        #"West Plains":[36.879, -91.905],   
        "Vichy":[38.131, -91.764] 
        }

In [266]:
#select your year and month in YYYY and M format (i.e. 1990, 1)

selected_year = 2020
selected_month = 12

In [267]:
#convert your chosen month to strings for the downloader to parse and to
#name the file and whatever 

import datetime
#remove the 0 after the f for months 10,11, and 12, add back for single digits
input = f'{selected_month}/{selected_year}'
my_date = datetime.datetime.strptime(input, "%m/%Y")
convert_my_date = datetime.datetime.strftime(my_date, '%Y_%B')

#add a zero in front of the month number so the downloader takes it
month_conversion = f'0{selected_month}'

In [268]:
#could also add a function to add, adjust, change any of the following:
#dataset used, variables pulled, pressure levels, or format of output

#could also add a dictionary for pressure specifically if wanting to add that to
#the file name

In [269]:
#retrieve the data
c = cdsapi.Client()
    
for station in COORDS:
  station_point_coord = COORDS[station]*2 # duplicate it
  c.retrieve(
      'reanalysis-era5-pressure-levels',
      {
          'product_type': 'reanalysis',
          'variable': 'temperature',
          'pressure_level': [
            '850', '925',
        ],
          'year': f'{selected_year}',
          'month': f'{month_conversion}',
          'day': [
              '01', '02', '03',
              '04', '05', '06',
              '07', '08', '09',
              '10', '11', '12',
              '13', '14', '15',
              '16', '17', '18',
              '19', '20', '21',
              '22', '23', '24',
              '25', '26', '27',
              '28', '29', '30',
              '31',
          ],
          'time': '12:00',
          'area': station_point_coord,
          'format': 'netcdf',
      },
      f'{station}_{convert_my_date}.nc')

#you'll know it ran correctly if a pink box appears underneath and the status says queued
#and the output will be available in the active directory if it gives you a download rate
#after a link showing the proper file name

#...if that makes sense

2024-07-23 13:48:39,550 INFO Welcome to the CDS
2024-07-23 13:48:39,551 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-07-23 13:48:39,774 INFO Request is queued
2024-07-23 13:48:40,947 INFO Request is running
2024-07-23 13:48:48,565 INFO Request is completed
2024-07-23 13:48:48,565 INFO Downloading https://download-0011-clone.copernicus-climate.eu/cache-compute-0011/cache/data7/adaptor.mars.internal-1721760527.4578876-13850-17-2268e86b-d59a-437b-92c3-3ca86834fcc9.nc to Joplin_2020_December.nc (1.5K)
2024-07-23 13:48:49,251 INFO Download rate 2.2K/s                                                                      
2024-07-23 13:48:49,575 INFO Welcome to the CDS
2024-07-23 13:48:49,584 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-07-23 13:48:49,784 INFO Request is queued
2024-07-23 13:48:50,955 INFO Request is running
2024-07-23 13:48:58,575 INFO Request

##### Convert to csv

In [270]:
#local_storage_directory should be the location you're actively running
local_storage_directory = 'C:/Users/sgfsad/netcdf/'

#netcdf_dir should be where the netcdf is saved to
#uncomment (delete the #) before ' + 'directory/' if you don't have a different
#folder for the netcdf rather than your working directory
netcdf_dir = local_storage_directory #+ 'netcdf/'

#csv_dir is where the converted file should be saved to, which is the shared drive
csv_dir = 'S:/Science_and_Research/ERA5_Reanalysis_1990-2020/'

In [271]:
#imports
import xarray as xr
import os

In [272]:
# Replace the file name in quote by the file name of the netcdf file (.nc) you want to convert to csv

for station in COORDS: 
    #Set variables names for the input file.nc (netcdf_file_in) and the output file.csv (`csv_file_out`)
    netcdf_file_name = f'{station}_{convert_my_date}.nc'
    netcdf_file_in = netcdf_dir + netcdf_file_name
    csv_file_out = csv_dir + netcdf_file_name[:-3] + '.csv'
    #convert
    ds = xr.open_dataset(netcdf_file_in)
    df = ds.to_dataframe()
    df.to_csv(csv_file_out)

# Your converted CSV file should be in the S drive folder! Yay!