In [1]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset

In [4]:
#### Loads MERRA netcdf file and important variables ####
# Note: operates over one day of data based on the daily nature of each MERRA file # 
def organize_MERRAvar(MERRA_file_path):
    data = Dataset(MERRA_file_path)
    
    ### Extracts location variables and other important variables from "data" ###
    lons = data.variables['lon'][:]
    lats = data.variables['lat'][:]
    hurs = data.variables['hurs'][:,:,:]
    
    ### print the variables saved on the dataset
    print(data.variables)
    
    ### Defines time variable ###
    time = np.floor(np.linspace(1,hurs.shape[0],num=hurs.shape[0]))
    
    ### Consolidates time, location, and variables of interest (i.e. temp) into one dataframe ###
    names = ['t','y','x']
    index = pd.MultiIndex.from_product([time,lats,lons], names=names)
    df = pd.DataFrame({'hurs': hurs.flatten()}, index=index)['hurs']
    df.index.names = ['time', 'lats', 'lons']
    df = df.reset_index(level=['time', 'lats', 'lons'])
    df = pd.DataFrame.to_numpy(df)
    # Adds temperature columns (one for each timestamp) # 
    nlons = lons.shape[0]
    nlats = lats.shape[0]
    blockt0 = nlons*nlats
    # Create temp matrix with the columns for each time + appends to df_new #
    df_new = df[0:blockt0,1:3]
    for count, value in enumerate(time,start=0):    
        t_append = df[(count)*blockt0:(count+1)*blockt0,3]
        df_new = np.column_stack((df_new,t_append))
    
    return df_new, time, lats, lons, nlons

In [None]:
organize_MERRAvar("/Users/inunezfe/Documents/hurs_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20060101-20101231.nc4")

{'lon': <class 'netCDF4._netCDF4.Variable'>
float32 lon(lon)
    axis: X
    standard_name: longitude
    long_name: longitude
    units: degrees_east
unlimited dimensions: 
current shape = (720,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'lat': <class 'netCDF4._netCDF4.Variable'>
float32 lat(lat)
    axis: Y
    standard_name: latitude
    long_name: latitude
    units: degrees_north
unlimited dimensions: 
current shape = (360,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'height': <class 'netCDF4._netCDF4.Variable'>
float64 height()
    standard_name: height
    long_name: height
    units: m
    positive: up
    axis: Z
unlimited dimensions: 
current shape = ()
filling on, default _FillValue of 9.969209968386869e+36 used, 'time': <class 'netCDF4._netCDF4.Variable'>
float64 time(time)
    standard_name: time
    units: days since 2006-1-1 00:00:00
    calendar: proleptic_gregorian
    axis: T
    long_name: time
unlimited dimensions: time
current