In [None]:
import pandas as pd
import numpy as np
import xarray as xr
import glob 
import h5py

In [None]:
cpath = '/home/pankaj/mls/'
fpath = '/home/pankaj/mls/ozo/'
opath = '/home/pankaj/mls/ozh/'

In [None]:
def read_lat_lon(dfile):
    field = dfile.split('-')[2].split('_')[0]
    data = h5py.File(dfile, 'r+')['HDFEOS']['SWATHS'][field]    
    # Geolocation fields
    lat = data['Geolocation Fields']['Latitude'].value 
    lon = data['Geolocation Fields']['Longitude'].value  
    lev = data['Geolocation Fields']['Pressure'].value  
    lsa = data['Geolocation Fields']['LineOfSightAngle'].value 
    lst = data['Geolocation Fields']['LocalSolarTime'].value  
    oga = data['Geolocation Fields']['OrbitGeodeticAngle'].value
    sza = data['Geolocation Fields']['SolarZenithAngle'].value 
    tim = data['Geolocation Fields']['Time'].value  
    chn = data['Geolocation Fields']['ChunkNumber'].value 
    return lat,lon,lev,lsa,lst,oga,sza,tim,chn

def read_data(dfile):
    field = dfile.split('-')[2].split('_')[0]
    data = h5py.File(dfile, 'r+')['HDFEOS']['SWATHS'][field]    
    #Data fields
    con = data['Data Fields']['Convergence'].value  
    prc = data['Data Fields']['L2gpPrecision'].value
    mol = data['Data Fields']['L2gpValue'].value
    qua = data['Data Fields']['Quality'].value
    sta = data['Data Fields']['Status'].value
    nlev = data['nLevels'].value; nlev = len(nlev)
    ntim = data['nTimes'].value; ntim = len(ntim)
    return field,con,prc,mol,qua,sta,nlev,ntim 

def correction(name, pre):
    do = (pre>260) & (pre<262); eo = (pre<0.018)
    ll = (pre>314) & (pre<318); hl = (pre>=0.0018) & (pre<0.0025)
    
    field,con,prc,mol,qua,sta,nlev,ntim = read_data(name)
    mg = np.nan*np.zeros_like(mol); mp = np.nan*np.zeros_like(prc)
    
    if field in ['GPH', 'Temperature']:
        criteria1 = (con<1.03) & (qua>0.2) & (sta%2==0)
        criteria2 = (pre>0.001) & (pre<261)
        criteria3 = pre[criteria2]>100
        criteria4 = (qua[criteria1]<0.9) 
        mg[criteria1,:][:,criteria2][:, ~criteria3][~criteria4,:] = mol[criteria1,:][:,criteria2][:, ~criteria3][~criteria4,:]
        mp[criteria1,:][:,criteria2][:, ~criteria3][~criteria4,:] = prc[criteria1,:][:,criteria2][:, ~criteria3][~criteria4,:]
        mg[:,ll] = np.nan; mp[:,ll] = np.nan
        
    elif field == 'H2O':
        criteria1 = (con<2.0) & (qua>1.45) & (sta%2==0) & (sta!=16) & (sta!=32)
        criteria2 = (pre>0.002) & (pre<317)
        mg[criteria1,:][:,criteria2] = mol[criteria1,:][:,criteria2] * (1.0e6)
        mp[criteria1,:][:,criteria2] = prc[criteria1,:][:,criteria2]
        
    elif field == 'O3':
        criteria1 = (con<1.03) & (qua>1.0) & (sta%2==0)
        criteria2 = (pre>0.02) & (pre<261)
        mg[criteria1,:][:,criteria2] = mol[criteria1,:][:,criteria2] * (1.0e6)
        mp[criteria1,:][:,criteria2] = prc[criteria1,:][:,criteria2]
        mg[:,ll] = np.nan; mp[:,ll] = np.nan
        mg[:,eo] = np.nan; mp[:,eo] = np.nan
        
    mg[mp<0] = np.nan  
    return field,con,mp,mg,qua,sta,nlev,ntim   

def create_ozh(ozfiles):
    cols = ['ozo', 't', 'water', 'gph']
    day = str(ozfiles.split('.')[0].split('_')[-1])
    date = pd.to_datetime(int(day.split('d')[0]), format='%Y') + pd.to_timedelta(int(day.split('d')[1]) - 1, unit='d')
    date = str(date.date())
    lat,lon,pre,lsa,lst,oga,sza,tim,chn = read_lat_lon(ozfiles)
    data = np.zeros((lat.shape[0], pre.shape[0], len(cols)))
    for i, var in enumerate(cols):
        files = glob.glob(cpath + var +'/*.he5'); files = sorted(files)
        name = [file for file in files if day in file][0]
        field,con,prc,mol,qua,sta,nlev,ntim = correction(name, pre); data[:,:,i] = mol
    ds = xr.DataArray(data, coords = [lat, pre, cols], dims = ['Latitude', 'Pressure', 'Variables'])
    ds.attrs['discription'] = 'Ozone, Temperature, Water, Geopotential Height for ' + date    
    dss = xr.Dataset({'ozh':ds})
    dss.coords['Longitude'] = lon
    #dss = dss.assign_coords(Longitude=lon)
    ofile = opath + date + '.nc'; dss.to_netcdf(ofile)
    return dss   

In [1]:
%%time
ozfiles = glob.glob(fpath+'/*.he5')
ozfiles = sorted(ozfiles)[:1]
for count, filename in enumerate(ozfiles):
    print(count+1)
    try:
        ds = create_ozh(filename)
        print(ds)
    except:
        print(filename)            
print 'done'

NameError: name 'glob' is not defined