In [1]:
#rain precipitation from TRMM/GPM has been normalized into HDF5 by IMERG
#example
#wget https://arthurhouhttps.pps.eosdis.nasa.gov/gpmallversions/V06/2012/10/21/imerg/3B-HHR.MS.MRG.3IMERG.20121021-S170000-E172959.1020.V06B.HDF5
#

from ipynb.fs.full.utils import *
from glob import glob
import os
import requests
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import re
import csv
import h5py
import numpy as np
diameter = 5



In [9]:
def date_to_nth_day(date, fmt='%Y%m%d'):
    date = datetime.datetime.strptime(date, fmt)
    new_year_day = datetime.datetime(year=date.year, month=1, day=1)
    return (date - new_year_day).days + 1

def parse_html(html_file):
    '''
    parse html to get file list
    '''       
    with open(html_file, 'r') as input:
        soup = BeautifulSoup(input, "html.parser").find_all(lambda t: t.name == "a" and t.text.startswith('3B'))
        filelist = []
        for it in soup:
            filelist.append(it["href"])
        return filelist
    

def download_filelist(folder, url):
    print ('downloading to ', folder)
    
    username_file = open("/home/fun/profile/imerg_username.txt", "r")
    password_file = open("/home/fun/profile/imerg_password.txt", "r")
    username = username_file.readline()
    password = password_file.readline()
    
    filename = folder + 'index.html'
    
    r = requests.get(url, auth = (username, password))

    if r.status_code == 200:
        print ('writing to', filename)
        with open(filename, 'wb') as out:
            for bits in r.iter_content():
                out.write(bits)
                
        file_list = parse_html(filename)
        
        return file_list
        
def download_file(folder, url, filename):
     
      username_file = open("/home/fun/profile/imerg_username.txt", "r")
    password_file = open("/home/fun/profile/imerg_password.txt", "r")
    username = username_file.readline()
    password = password_file.readline()
    
    
    print ('downloading file ', url + filename)
    
    r = requests.get(url + filename, auth = (username, password))
    if r.status_code == 200:
        print ('writing to', folder + filename)
        with open(folder + filename, 'wb') as out:
            for bits in r.iter_content():
                out.write(bits)
    else:
        print ('download error ', r.status_code)
    
     
def generate_imerg_url(datestr):
    '''
    compose url using date  'YYYYMMDD'
    '''
    #url = 'https://arthurhouhttps.pps.eosdis.nasa.gov/gpmallversions/V06/' + datestr[0:4] + '/' + datestr[4:6] + '/' + datestr[6:8] + '/imerg/'
    
    url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/'+ datestr[0:4] + '/' + str(date_to_nth_day(datestr)).zfill(3) + '/'
    
    print (url)
    return url

    
def get_target_file(filelist, timestr):
    '''
    return file that matches timestamp
    '''
    key = 'S' + timestr
    print (key)
    for it in filelist:
        if key in it:
            return it
    return None

    
def download_imerg(folder):
    json_profile = folder + '/profile.json'
    json_file = open(json_profile)
    d = json.load(json_file)
        
    for it in d['path']:
        print(it)
        
        url = generate_imerg_url(str(it['date']))
        
        filelist = download_filelist(folder, url)
        
        if filelist is None:
            continue
        
        filename = get_target_file(filelist, str(it['time']))  # without path
        if filename is None:
            continue
        
        download_file(folder, url, filename)
        
        #this is the downloaded file
        full_path_filename = folder + filename
        post_processing(it, folder, full_path_filename, it['lg'], it['lt'])

def post_processing(param, folder, full_path_filename, lonstr, latstr): # lon and lat should be read from the path name or given externally
    '''
        1) pick the data only in bounding  box
        2) output to CSV with date_time
        3) remove the downloaded hdf5 files
    '''    
    lon = 0
    lat = 0
    sign = -1.0 if 'W' in lonstr else 1.0
    lon = sign * float(lonstr[:-1])
    sign = -1.0 if 'S' in latstr else 1.0
    lat = sign * float(latstr[:-1])
    
    hdf_array = h5py.File(full_path_filename, 'r')
    group = hdf_array['Grid']
    
    longitude = group['lon'][()] # len 3600, x-axis
    latitude = group['lat'][()] # len 1800, y-axis
    rain = group['precipitationCal'][()] # array of shape (1, 3600, 1800)
            
    x = int((lon-longitude[0])*10)
    y = int((lat-latitude[0])*10)
    
    
    data = hdf_array['Grid']['precipitationCal'][:, x-50:x+50, y-50:y+50] # 100 by 100, one hundred square degrees (#1)

    datestr = str(param['date'])
    timestr = str(param['time'])
    final = np.array([data])

    pathname = folder + 'imerg_precipitation_' + datestr + '_' + timestr + '.npy'
    np.save(pathname, final) # write np array to .npy file with pre-existing file name + datestr + timestr (#2)

    os.remove(full_path_filename) # remove previous .HDF5 file (#3)
  

In [11]:
#getDayPosition('/home/fun/data/AL182012/profile.json')

hurricanes_folders = glob("/home/fun/data/AL312005/", recursive = True)


for it in hurricanes_folders:
    download_imerg(it)



{'date': 20051230, 'time': '0000', 'category': 'TD', 'lt': '23.9N', 'lg': '35.6W', 'maxwind': '30', 'maxpres': '1009', '34ktne': '0', '34ktse': '0', '34ktsw': '0', '34ktnw': '0', '50ktne': '0', '50ktse': '0', '50ktsw': '0', '50ktnw': '0', '64ktne': '0', '64ktse': '0', '64ktsw': '0', '64ktnw': '0', 'maxwindradius': '0'}
https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2005/364/
downloading to  /home/fun/data/AL312005/
writing to /home/fun/data/AL312005/index.html
S0000
downloading file  https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2005/364/3B-HHR.MS.MRG.3IMERG.20051230-S000000-E002959.0000.V06B.HDF5
writing to /home/fun/data/AL312005/3B-HHR.MS.MRG.3IMERG.20051230-S000000-E002959.0000.V06B.HDF5
{'date': 20051230, 'time': '0600', 'category': 'TS', 'lt': '24.2N', 'lg': '36.1W', 'maxwind': '40', 'maxpres': '1005', '34ktne': '125', '34ktse': '125', '34ktsw': '125', '34ktnw': '0', '50ktne': '50', '50ktse': '0', '50ktsw': '0', '50ktnw': '0', '64ktne': '0', '

{'date': 20060101, 'time': '1800', 'category': 'TS', 'lt': '25.0N', 'lg': '38.6W', 'maxwind': '55', 'maxpres': '994', '34ktne': '130', '34ktse': '80', '34ktsw': '80', '34ktnw': '30', '50ktne': '90', '50ktse': '60', '50ktsw': '0', '50ktnw': '0', '64ktne': '60', '64ktse': '0', '64ktsw': '0', '64ktnw': '0', 'maxwindradius': '0'}
https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/001/
downloading to  /home/fun/data/AL312005/
writing to /home/fun/data/AL312005/index.html
S1800
downloading file  https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/001/3B-HHR.MS.MRG.3IMERG.20060101-S180000-E182959.1080.V06B.HDF5
writing to /home/fun/data/AL312005/3B-HHR.MS.MRG.3IMERG.20060101-S180000-E182959.1080.V06B.HDF5
{'date': 20060102, 'time': '0000', 'category': 'TS', 'lt': '24.6N', 'lg': '38.9W', 'maxwind': '55', 'maxpres': '994', '34ktne': '140', '34ktse': '60', '34ktsw': '60', '34ktnw': '25', '50ktne': '90', '50ktse': '60', '50ktsw': '0', '50ktnw': '0', '64ktne': 

{'date': 20060104, 'time': '1200', 'category': 'TS', 'lt': '21.9N', 'lg': '43.6W', 'maxwind': '45', 'maxpres': '1000', '34ktne': '125', '34ktse': '50', '34ktsw': '50', '34ktnw': '25', '50ktne': '125', '50ktse': '0', '50ktsw': '0', '50ktnw': '0', '64ktne': '0', '64ktse': '0', '64ktsw': '0', '64ktnw': '0', 'maxwindradius': '0'}
https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/004/
downloading to  /home/fun/data/AL312005/
writing to /home/fun/data/AL312005/index.html
S1200
downloading file  https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/004/3B-HHR.MS.MRG.3IMERG.20060104-S120000-E122959.0720.V06B.HDF5
writing to /home/fun/data/AL312005/3B-HHR.MS.MRG.3IMERG.20060104-S120000-E122959.0720.V06B.HDF5
{'date': 20060104, 'time': '1800', 'category': 'TS', 'lt': '21.7N', 'lg': '44.6W', 'maxwind': '40', 'maxpres': '1002', '34ktne': '125', '34ktse': '50', '34ktsw': '50', '34ktnw': '25', '50ktne': '60', '50ktse': '0', '50ktsw': '0', '50ktnw': '0', '64ktne': 

writing to /home/fun/data/AL312005/3B-HHR.MS.MRG.3IMERG.20060107-S060000-E062959.0360.V06B.HDF5
{'date': 20060107, 'time': '1200', 'category': 'LO', 'lt': '24.8N', 'lg': '54.2W', 'maxwind': '25', 'maxpres': '1014', '34ktne': '0', '34ktse': '0', '34ktsw': '0', '34ktnw': '0', '50ktne': '0', '50ktse': '0', '50ktsw': '0', '50ktnw': '0', '64ktne': '0', '64ktse': '0', '64ktsw': '0', '64ktnw': '0', 'maxwindradius': '0'}
https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/007/
downloading to  /home/fun/data/AL312005/
writing to /home/fun/data/AL312005/index.html
S1200
downloading file  https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHH.06/2006/007/3B-HHR.MS.MRG.3IMERG.20060107-S120000-E122959.0720.V06B.HDF5
writing to /home/fun/data/AL312005/3B-HHR.MS.MRG.3IMERG.20060107-S120000-E122959.0720.V06B.HDF5
{'date': 20060107, 'time': '1800', 'category': 'LO', 'lt': '26.3N', 'lg': '55.7W', 'maxwind': '25', 'maxpres': '1016', '34ktne': '0', '34ktse': '0', '34ktsw': '0', '3

In [15]:
#test hdf5 imerg

#imerg_df = pd.read_hdf('/home/fun/data/imerg/3B-HHR.MS.MRG.3IMERG.20121021-S023000-E025959.0150.V06B.HDF5')  

import h5py

f = h5py.File('/home/fun/data/AL022004/3B-HHR.MS.MRG.3IMERG.20040803-S180000-E182959.1080.V06B.HDF5', 'r')

for key in f.keys():
    print(key) #Names of the root level object names in HDF5 file - can be groups or datasets.
    print(type(f[key])) # get the object type: usually group or dataset
    
#Get the HDF5 group; key needs to be a group name from above
group = f['Grid']

#Checkout what keys are inside that group.
for key in group.keys():
    print(key)
    
longitude = group['lon'][()] # len 3600, x-axis
latitude = group['lat'][()] # len 1800, y-axis
rain = group['precipitationCal'][()] # array of shape (1, 3600, 1800)

lon = -77.4
lat = 14.3

#print(latitude, clos_coords)
i = int((lat-latitude[0])*10)
j = int((lon-longitude[0])*10)
print(latitude[i], longitude[j], i, j)

rain = rain.reshape(3600, 1800)

sliced = rain[j-5:j+5, i-5:i+5]
sliced.shape

OSError: Unable to open file (unable to open file: name = '/home/fun/data/AL022004/3B-HHR.MS.MRG.3IMERG.20040803-S180000-E182959.1080.V06B.HDF5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [12]:
import numpy as np
filename = '/home/fun/data/AL022004/3B-HHR.MS.MRG.3IMERG.20040803-S180000-E182959.1080.V06B.HDF5'

lon = -77.4
lat = 14.3

#print(latitude, clos_coords)
x = int((lon-longitude[0])*10)
y = int((lat-latitude[0])*10)

hdf_array = h5py.File(filename, 'r')
data = hdf_array['Grid']['precipitationCal'][:, x-5:x+5, y-5:y+5]

data

NameError: name 'longitude' is not defined

In [85]:
file = '/home/fun/data/AL022004/3B-HHR.MS.MRG.3IMERG.20121021-S000000-E002959.0000.V06B.HDF5'
len(file)

name = file[:len(file)-5]
name

'/home/fun/data/AL022004/3B-HHR.MS.MRG.3IMERG.20121021-S000000-E002959.0000.V06B'

In [179]:
array = np.load('/home/fun/data/AL022004/imerg_precipitation_20040803_1200.npy')
array

array([[[[ 0.06247815,  0.29016453,  1.0588278 ,  2.2504165 ,
           2.343059  ,  2.4157019 , 10.181723  , 12.900776  ,
          26.127796  , 30.346048  ],
         [ 0.37743443,  0.28462726,  0.28016454,  2.1891775 ,
           2.1970284 ,  9.379759  ,  8.414813  , 19.552551  ,
          24.830242  , 23.657272  ],
         [ 0.39421082,  0.9118869 ,  1.2785861 ,  2.6486425 ,
           2.723851  ,  5.715255  , 10.913759  , 25.128246  ,
          24.038334  , 25.0345    ],
         [ 1.0279177 ,  0.93866324,  1.2685862 ,  1.2039589 ,
           2.5193882 ,  4.1288385 ,  5.5526223 ,  6.9541235 ,
          11.752715  , 21.643759  ],
         [ 1.1708999 ,  1.1753626 ,  1.3354396 ,  1.3797377 ,
           1.9213829 ,  4.1697483 ,  5.4864163 ,  7.268257  ,
           9.392678  , 11.334448  ],
         [ 1.6964267 ,  1.4473368 ,  1.3636196 ,  1.3289921 ,
           1.7290797 ,  2.1481695 ,  2.4771826 ,  2.4582572 ,
           2.5453625 ,  6.917589  ],
         [ 3.4169512 ,  2.0160208 

NameError: name 'latitude' is not defined