# Script for Downloading ERA5 Global Precipitation Average Data 
<b>Writes to json file rainfall_avgs.json</b>

In [1]:
# Initialize notebook environment.
%matplotlib inline
import logging
import boto3
from botocore.exceptions import ClientError
import botocore
import datetime
import matplotlib.pyplot as plt
import os.path
import xarray as xr
import json
import numpy as np
import pandas as pd 
import requests

### S3 Access

In [2]:
era5_bucket = 'era5-pds'

# AWS access / secret keys required
# s3 = boto3.resource('s3')
# bucket = s3.Bucket(era5_bucket)

# No AWS keys required
client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

In [3]:
def create_presigned_url(bucket_name, object_name, expiration=3600):
    """Generate a presigned URL to share an S3 object

    :param bucket_name: string
    :param object_name: string
    :param expiration: Time in seconds for the presigned URL to remain valid
    :return: Presigned URL as string. If error, returns None.
    """

    # Generate a presigned URL for the S3 object
    s3_client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
    try:
        response = s3_client.generate_presigned_url('get_object',
                                                    Params={'Bucket': bucket_name,
                                                            'Key': object_name},
                                                    ExpiresIn=expiration)
    except ClientError as e:
        logging.error(e)
        return None

    # The response contains the presigned URL
    return response

In [4]:
# url = create_presigned_url(era5_bucket, '1979/01/main.nc')
# print(url);
# response = requests.get(url)
# print(response)
# ds = xr.open_dataset(url)
# print(ds)

### User Defined Functions for ERA5 Querying

In [5]:
# TODO: Direct file downloads to a separate folder to avoid clutter

In [6]:
# Uncomment ds_meta.info() to have main.nc file contents printed
def downloadERA5Data(prefix, file_type, year, month):
    metadata_file = file_type 
    metadata_key = prefix + file_type
    #print(metadata_file)
    #print(metadata_key)
    client.download_file(era5_bucket, metadata_key, metadata_file)
    ds_meta = xr.open_dataset(file_type, decode_times=False)
    #ds_meta.info() 
    
    # select date and variable of interest
    date = datetime.date(year,month,1)
    #print("Getting main.nc file for ", date)
    var = 'precipitation_amount_1hour_Accumulation'

    # file path patterns for remote S3 objects and corresponding local file
    s3_data_ptrn = '{year}/{month}/data/{var}.nc'
    data_file_ptrn = '{year}{month}_{var}.nc'

    year = date.strftime('%Y')
    month = date.strftime('%m')
    s3_data_key = s3_data_ptrn.format(year=year, month=month, var=var)
    data_file = data_file_ptrn.format(year=year, month=month, var=var)

    if not os.path.isfile('DataFiles/'+ data_file): # check if file already exists
        print("Downloading %s from S3..." % s3_data_key)
        client.download_file(era5_bucket, s3_data_key, data_file)
        return data_file
    else:
#         print("File already exists on machine")
        return data_file

### Looking at top level prefixes of S3 Data Bucket for ERA5 Data

In [7]:
paginator = client.get_paginator('list_objects')
result = paginator.paginate(Bucket=era5_bucket, Delimiter='/')
data_files = []

In [8]:
for prefix in result.search('CommonPrefixes'):
    keys = []
    year = prefix.get('Prefix').split('/')[0]
    #print(year)
    response = client.list_objects_v2(Bucket=era5_bucket, Prefix=year)
    response_meta = response.get('ResponseMetadata')
    
    if response_meta.get('HTTPStatusCode') == 200:
        contents = response.get('Contents')
        if contents == None:
            print("No objects are available for %s" % date.strftime('%B, %Y'))
        else:
            for obj in contents:
                keys.append(obj.get('Key'))
            # Getting the first month available for the current year 
            month = keys[0].split('/', 2)[1];
            for k in keys:
                path_split = k.split('/', 3)
                current_month = path_split[1]

                if(len(path_split) >= 4):
                    file_type = path_split[3] # index of file type/extension
                    if(file_type == 'precipitation_amount_1hour_Accumulation.nc'):
                        # There is precipitation accumulation data set for the current month ==> download this months dataset to query data
                        #print(k)
                        prefix = year + "/" + month + "/"
                        data_file = downloadERA5Data(prefix, 'main.nc', int(year), int(month.lstrip("0"))) # Having errors querying precipitation data directly
                        data_files.append(data_file);
                if(current_month != month):
                    month = current_month
                    #print(month)
#             break; # currently breaking query after 1 year 
    else:
        print("There was an error with your request.")

### Creating a list of locations to gather data from for varying El Nino affects 
<b> Access locations with locs_(lr, d, or fr) </b>

In [9]:
# locations w/ lower than average rainfall
locs_lr = [
    {'name': 'el_salvador', 'lon': 13.7942, 'lat': 88.8965},
    {'name': 'honduras', 'lon': 15.2000, 'lat': 86.2419},
    {'name': 'nicaragua', 'lon': 12.8654, 'lat': 85.2072},
    {'name': 'haiti', 'lon': 18.9712, 'lat': 72.2852},
    {'name': 'se_brazil', 'lon': 20.3332, 'lat': 46.2092},
]

# locations w/ drought risk level rainfall
locs_d = [
    {'name': 'ethiopia', 'lon': 9.1450, 'lat': 40.4897},
    {'name': 'kenya', 'lon': 0.0236, 'lat': 37.9062},
    {'name': 'somalia', 'lon': 5.1521, 'lat': 46.1996},
    {'name': 'malawi', 'lon': 13.2543, 'lat': 34.3015},
    
]

# locations w/ flood risk 
locs_fr = [
    {'name': 'argentina', 'lon': 38.4161, 'lat': 63.6167},
    {'name': 'guatemala', 'lon': 15.7835, 'lat': 90.2308},
    {'name': 'peru', 'lon': 9.1900, 'lat': 75.0152},
    {'name': 'botswana', 'lon': 22.3285, 'lat': 24.6849},
    {'name': 'zimbabwe', 'lon': 19.0154 , 'lat': 29.1549},
    {'name': 'mozambique', 'lon': 18.6657, 'lat': 35.5296},
    {'name': 'south_africa', 'lon': 30.5595, 'lat': 22.9375},
]


# convert westward longitudes to degrees east
print("Locations with lower than average detected rainfall")
for l in locs_lr:
    print(l)
    if l['lon'] < 0:
        l['lon'] = 360 + l['lon']
  
print('---------------------------------------------------')
print("Locations with drought level rainfall")
for l in locs_d:
    print(l)
    if l['lon'] < 0:
        l['lon'] = 360 + l['lon']

print('---------------------------------------------------')
print("Locations with higher than average detected rainfall")
for l in locs_fr:
    print(l)
    if l['lon'] < 0:
        l['lon'] = 360 + l['lon']

Locations with lower than average detected rainfall
{'name': 'el_salvador', 'lon': 13.7942, 'lat': 88.8965}
{'name': 'honduras', 'lon': 15.2, 'lat': 86.2419}
{'name': 'nicaragua', 'lon': 12.8654, 'lat': 85.2072}
{'name': 'haiti', 'lon': 18.9712, 'lat': 72.2852}
{'name': 'se_brazil', 'lon': 20.3332, 'lat': 46.2092}
---------------------------------------------------
Locations with drought level rainfall
{'name': 'ethiopia', 'lon': 9.145, 'lat': 40.4897}
{'name': 'kenya', 'lon': 0.0236, 'lat': 37.9062}
{'name': 'somalia', 'lon': 5.1521, 'lat': 46.1996}
{'name': 'malawi', 'lon': 13.2543, 'lat': 34.3015}
---------------------------------------------------
Locations with higher than average detected rainfall
{'name': 'argentina', 'lon': 38.4161, 'lat': 63.6167}
{'name': 'guatemala', 'lon': 15.7835, 'lat': 90.2308}
{'name': 'peru', 'lon': 9.19, 'lat': 75.0152}
{'name': 'botswana', 'lon': 22.3285, 'lat': 24.6849}
{'name': 'zimbabwe', 'lon': 19.0154, 'lat': 29.1549}
{'name': 'mozambique', 'lon

### Method for getting data on each location from a provided dataset

In [22]:
def getPrecipitationData(ds):
    var = 'precipitation_amount_1hour_Accumulation' # Var to be changed to name of corresponding country 
    ds_locs_fr = xr.Dataset()
    ds_locs_lr = xr.Dataset()
    ds_locs_d = xr.Dataset()
    
    # interate through the locations and create a dataset
    # containing the temperature values for each location
    for l in locs_fr:
        name = l['name']
        lon = l['lon']
        lat = l['lat']
        var_name = name

        ds2 = ds.sel(lon=lon, lat=lat, method='nearest')

        lon_attr = '%s_lon' % name
        lat_attr = '%s_lat' % name

        ds2.attrs[lon_attr] = ds2.lon.values.tolist()
        ds2.attrs[lat_attr] = ds2.lat.values.tolist()
        ds2 = ds2.rename({var : var_name}).drop(('lat', 'lon'))

        ds_locs_fr = xr.merge([ds_locs_fr, ds2])

    ds_locs_fr.data_vars

    for l in locs_d:
        name = l['name']
        lon = l['lon']
        lat = l['lat']
        var_name = name

        ds2 = ds.sel(lon=lon, lat=lat, method='nearest')

        lon_attr = '%s_lon' % name
        lat_attr = '%s_lat' % name

        ds2.attrs[lon_attr] = ds2.lon.values.tolist()
        ds2.attrs[lat_attr] = ds2.lat.values.tolist()
        ds2 = ds2.rename({var : var_name}).drop(('lat', 'lon'))

        ds_locs_d = xr.merge([ds_locs_d, ds2])

    ds_locs_d.data_vars

    for l in locs_lr:
        name = l['name']
        lon = l['lon']
        lat = l['lat']
        var_name = name

        ds2 = ds.sel(lon=lon, lat=lat, method='nearest')

        lon_attr = '%s_lon' % name
        lat_attr = '%s_lat' % name

        ds2.attrs[lon_attr] = ds2.lon.values.tolist()
        ds2.attrs[lat_attr] = ds2.lat.values.tolist()
        ds2 = ds2.rename({var : var_name}).drop(('lat', 'lon'))

        ds_locs_lr = xr.merge([ds_locs_lr, ds2])

    ds_locs_lr.data_vars

#     print(ds_locs_fr.data_vars)
#     print(ds_locs_lr.data_vars)
#     print(ds_locs_d.data_vars)
    return [ds_locs_fr, ds_locs_lr, ds_locs_d]

In [23]:
data_sets = []
for file in data_files:
    ds = xr.open_dataset('DataFiles/'+file)
    data_sets.append(ds)

## Storing monthly rainfall averages for at risk areas in 'rainfall_avgs' dictionary
<b> Structure outlined below (DO NOT RUN UNLESS AN UPDATE TO JSON IS NECESSARY) </b>

In [24]:
# rainfally_avgs = {
#     1979: {
#         01: {
#             higher_avg: {
#                 loc1: mean rainfall
#                 loc2: ''
#                 ...
#                 locn: '' 
#             }, 
#             lower_avg: {...},
#             drought: {...}
#         }
#         ..
#         12: {...}
#     }
#     ...
#     2018: {...}
# }


In [28]:
# rainfall_avgs = {}
# # data_entry = {}
# for ds in data_sets: # Loop through array indexes with data for each month
#     data_entry = {}
#     precip_ds = getPrecipitationData(ds)
#     i = 0
#     for m in precip_ds: # Loop through locs_fr/lr/d for current month
#         year = ''
#         month = ''
#         if(i == 0):
#             risk = 'higher_avg'
#         elif(i == 1):
#             risk = 'lower_avg'
#         elif(i == 2):
#             risk ='drought_risk'
#         else:
#             break
#         i+=1 
        
#         for val in m.coords: # Loop through coordinate values to get associated time 
#             data_entry[risk] = {}
#             #print(data_entry)
#             m_mean = m.mean()
#             date = m[val].values[0]
#             year = pd.to_datetime(date).year
#             month = pd.to_datetime(date).month

#             if not year in rainfall_avgs:
#                 rainfall_avgs[year] = {month: {}}
            
#             for loc in m_mean.data_vars:
#                 location = loc
#                 avg_rainfall = m_mean[loc].values * 24 * 1488
#                 data_entry[risk][location] = avg_rainfall
#                 rainfall_avgs[year][month] = data_entry
# #                 print(year, month)
# #                 print(location , avg_rainfall, 'm')
# #                 print(rainfall_avgs[year][month])
# #                 print("***********************************************************")
# #                 if(month > 1):
# #                      print(rainfall_avgs[year][month-1])

In [None]:
oni = pd.read_csv('https://query.data.world/s/72sjhekyakcsiehjtd7yjuhccedtae')
oni
print(json.dumps(rainfall_avgs))

In [29]:
with open('rainfall_avgs.json', 'w') as fp:
    json.dump(rainfall_avgs, fp)