In [16]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import requests as req
import state_locations

In [2]:
# Will download the data using the urls from the isimip webpage. 

def get__isimip_data(path):
    #The Url + path find in the ISIMIP web page
    url = 'https://files.isimip.org/' + path
    #Extract the name of the file
    file_name = path.split('/')[-1]
    # Get the file
    with req.get(url, stream = True) as response:
        # Get the status code
        if response.status_code == 200:
            print('Success!')
        elif response.status_code == 404:
            print('Not Found.')
        #Write the response in a file.
        with open(file_name, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
                
    return file_name

In [17]:
#### Loads MERRA netcdf file and important variables ####
# Note: operates over one day of data based on the daily nature of each MERRA file # 
def organize_MERRAvar(MERRA_file_path):
    data = Dataset(MERRA_file_path)
    
    ### Extracts location variables and other important variables from "data" ###
    lons = data.variables['lon'][:]
    lats = data.variables['lat'][:]
    hurs = data.variables['hurs'][:,:,:]
    
    # Arturo se la come
    
    ### print the variables saved on the dataset
    ## print(data.variables)
    
    ### Defines time variable ###
    
    time = np.arange(1,hurs.shape[0]+1)
    print(time)
    ### Consolidates time, location, and variables of interest (i.e. temp) into one dataframe ###
    names = ['t','y','x']
    index = pd.MultiIndex.from_product([time,lats,lons], names=names)
    df = pd.DataFrame({'hurs': hurs.flatten()}, index=index)['hurs']
    df.index.names = ['time', 'lats', 'lons']
    df = df.reset_index(level=['time', 'lats', 'lons'])
    df = pd.DataFrame.to_numpy(df)
   

    # manage data for the state of indiana here
    
   
    
    ### coordinates from locations are in the following order: W, E, S, N
    W, E, S, N = state_locations.get_state_loc('IN')
    
    ### create matrix for the coordinates from the state lat = vertical (north to south), lon = horizontal (west to east)
    
        
    
    # Adds temperature columns (one for each timestamp) # 
    nlons = lons.shape[0]
    nlats = lats.shape[0]
    blockt0 = nlons*nlats
    
    
    
    # Create temp matrix with the columns for each time + appends to df_new #
    df_new = df[0:blockt0,1:3]
    for count, value in enumerate(time,start=0):    
        t_append = df[(count)*blockt0:(count+1)*blockt0,3]
        df_new = np.column_stack((df_new,t_append))
    
    
    
    return df_new, time, lats, lons, nlons

In [4]:

# Just the first path to test 
'''paths = [ 'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20060101-20101231.nc4']'''


# All the paths
paths = [ 'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20060101-20101231.nc4'
         ,'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20110101-20201231.nc4'
         ,'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20210101-20301231.nc4'
         ,'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20310101-20401231.nc4'
         ,'ISIMIP2b/SecondaryInputData/GCM_atmosphere/biascorrected/global/rcp45/HadGEM2-ES/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20410101-20501231.nc4']


# Get each nc4 file
for path in paths:
    # Open the netCDF4 file and read the data
    get__isimip_data(path)

    
    
    



Success!
Success!
Success!
Success!
Success!


In [18]:
path = "hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20060101-20101231.nc4"
organize_MERRAvar(path)
print(time)

[   1    2    3 ... 1824 1825 1826]


KeyboardInterrupt: 