In [1]:
#import libraries
import numpy.ma as MA
import datetime as dt
from datetime import datetime, timedelta
import xarray as xr
import numpy as np
import pandas as pd
import os
from netCDF4 import Dataset  # http://code.google.com/p/netcdf4-python/

#subroutine to check for bad values
def checkValue(value):
     # Check if value should be a float
     # or flagged as missing
    if value == "999.0" or value == "99.0":
        value = MA.masked
    else:
        value = float(value)
    return value

#subroutine to read .txt buoy data files
def readData(fname):
    f = open(fname)
    # Ignore header
    for i in range(0):
        f.readline() 
    col_names = f.readline().split()
    print(col_names)
    col_units = f.readline().split()
    data_block = f.readlines()
    f.close()
    data={}
    for col_name in col_names:
        data[col_name] = MA.zeros(len(data_block), 'f', fill_value = 999.999)
        # Loop through each value: append to each column
    for (line_count, line) in enumerate(data_block):
        items = line.split()
        for (col_count, col_name) in enumerate(col_names):
            value = items[col_count]
            data[col_name][line_count] = checkValue(value)
    ilen=len(data["#YY"])
    data['dtime'] = MA.zeros(len(data_block),  'datetime64[s]')
    for i in range(0,ilen):
        data["dtime"][i]=dt.datetime(data["#YY"][i],data["MM"][i],data["DD"][i],data["hh"][i],data["mm"][i])

    return data

#subroutine to read the high resolution CMAN data files
def readData_highres(fname):
    print('reading:',fname)
    ds_buoy=Dataset(fname)
    #hourly data
    buoy_time=ds_buoy.variables['time'][:]
    time_index=np.array(buoy_time).astype('datetime64[s]')

    #payload 1
    tempgrp = ds_buoy.groups['payload_1']

    tempgrp2 = tempgrp.groups['ocean_temperature_sensor_1']
    data=tempgrp2.variables['sea_surface_temperature'][:]
    df_buoy = pd.DataFrame({'sst1' : data,'time' : time_index}).set_index(['time'])
    df_buoy['sst1qc']=tempgrp2.variables['sea_surface_temperature_qc'][:]
    gattrs={}
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['sst1'].attrs=gattrs

    tempgrp2 = tempgrp.groups['anemometer_1']
    df_buoy['wnd1']=tempgrp2.variables['wind_speed'][:]
    df_buoy['wnd1qc']=tempgrp2.variables['wind_speed_qc'][:]
    df_buoy['wdir1']=tempgrp2.variables['wind_direction'][:]
    df_buoy['wdir1qc']=tempgrp2.variables['wind_direction_qc'][:]
#    data=tempgrp2.variables['continuous_wind_speed'][:]
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['wnd1'].attrs=gattrs

    tempgrp2 = tempgrp.groups['anemometer_2']
    df_buoy['wnd2']=tempgrp2.variables['wind_speed'][:]
    df_buoy['wnd2qc']=tempgrp2.variables['wind_speed_qc'][:]
    df_buoy['wdir2']=tempgrp2.variables['wind_direction'][:]
    df_buoy['wdir2qc']=tempgrp2.variables['wind_direction_qc'][:]
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['wnd2'].attrs=gattrs

    tempgrp2 = tempgrp.groups['air_temperature_sensor_1']
    df_buoy['air1']=tempgrp2.variables['air_temperature'][:]
    df_buoy['air1qc']=tempgrp2.variables['air_temperature_qc'][:]
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['air1'].attrs=gattrs

    tempgrp2 = tempgrp.groups['barometer_1']
    df_buoy['air_pres1']=tempgrp2.variables['air_pressure'][:]
    df_buoy['air_pres1qc']=tempgrp2.variables['air_pressure_qc'][:]
    df_buoy['air_pres_sea_level1']=tempgrp2.variables['air_pressure_at_sea_level'][:]
    df_buoy['air_pres_sea_level1qc']=tempgrp2.variables['air_pressure_at_sea_level_qc'][:]
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['air_pres1'].attrs=gattrs

    tempgrp2 = tempgrp.groups['barometer_2']    
    df_buoy['air_pres2']=tempgrp2.variables['air_pressure'][:]
    df_buoy['air_pres2qc']=tempgrp2.variables['air_pressure_qc'][:]
    df_buoy['air_pres_sea_level2']=tempgrp2.variables['air_pressure_at_sea_level'][:]
    df_buoy['air_pres_sea_level2qc']=tempgrp2.variables['air_pressure_at_sea_level_qc'][:]
    gattrs['install_date'] = tempgrp2.install_date
    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
    df_buoy['air_pres2'].attrs=gattrs

    
    tempgrp2 = tempgrp.groups['gps_1']
    df_buoy['lat']=tempgrp2.variables['latitude'][:]
    df_buoy['latqc']=tempgrp2.variables['latitude_qc'][:]
    df_buoy['lon']=tempgrp2.variables['longitude'][:]
    df_buoy['lonqc']=tempgrp2.variables['longitude_qc'][:]

    #payload 2
    tempgrp = ds_buoy.groups['payload_2']

    tempgrp2 = tempgrp.groups['ocean_temperature_sensor_1']
    test=tempgrp2.variables['sea_surface_temperature'][:]
    df_buoy['sst2']=tempgrp2.variables['sea_surface_temperature'][:]
    df_buoy['sst2qc']=tempgrp2.variables['sea_surface_temperature_qc'][:]
#    gattrs['install_date'] = tempgrp2.install_date
#    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
#    df_buoy['sst2'].attrs=gattrs

    tempgrp2 = tempgrp.groups['anemometer_1']
    df_buoy['wnd3']=tempgrp2.variables['wind_speed'][:]
    df_buoy['wnd3qc']=tempgrp2.variables['wind_speed_qc'][:]
    df_buoy['wdir3']=tempgrp2.variables['wind_direction'][:]
    df_buoy['wdir3qc']=tempgrp2.variables['wind_direction_qc'][:]
#    gattrs['install_date'] = tempgrp2.install_date
#    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
#    df_buoy['wnd3'].attrs=gattrs

    tempgrp2 = tempgrp.groups['anemometer_2']
    df_buoy['wnd4']=tempgrp2.variables['wind_speed'][:]
    df_buoy['wnd4qc']=tempgrp2.variables['wind_speed_qc'][:]
    df_buoy['wdir4']=tempgrp2.variables['wind_direction'][:]
    df_buoy['wdir4qc']=tempgrp2.variables['wind_direction_qc'][:]
#    gattrs['install_date'] = tempgrp2.install_date
#    gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
#    df_buoy['wnd4'].attrs=gattrs

    tempgrp2 = tempgrp.groups['air_temperature_sensor_1']
    df_buoy['air2']=tempgrp2.variables['air_temperature'][:]
    df_buoy['air2qc']=tempgrp2.variables['air_temperature_qc'][:]    
 #   gattrs['install_date'] = tempgrp2.install_date
 #   gattrs['height_of_instrument'] = tempgrp2.height_of_instrument
 #   df_buoy['air2'].attrs=gattrs
    ds_buoy.close()  # close the new file
    xr_buoy=xr.Dataset(df_buoy) 
    return xr_buoy

#subroutine to read the 10 min CMAN data
def readData_highres10(fname):
    ds_buoy=Dataset(fname)

   #hourly data
    buoy_time=ds_buoy.variables['time'][:]
    time_index=np.array(buoy_time).astype('datetime64[s]')

    #10min data
    buoy_time10=ds_buoy.variables['time10'][:]
    tem=np.array(buoy_time10).astype('datetime64[s]')
    time10_index=tem

    #payload 1
    tempgrp = ds_buoy.groups['payload_1']
    
    tempgrp2 = tempgrp.groups['anemometer_1']
    data=tempgrp2.variables['continuous_wind_speed'][:]
    df_buoy10 = pd.DataFrame({'wnd1' : data,'time' : time10_index}).set_index(['time'])
    df_buoy10['wnd1qc']=tempgrp2.variables['continuous_wind_speed_qc'][:]
    df_buoy10['wdir1']=tempgrp2.variables['continuous_wind_direction'][:]
    df_buoy10['wdir1qc']=tempgrp2.variables['continuous_wind_direction_qc'][:]

    tempgrp2 = tempgrp.groups['anemometer_2']
    df_buoy10['wnd2']=tempgrp2.variables['continuous_wind_speed'][:]
    df_buoy10['wnd2qc']=tempgrp2.variables['continuous_wind_speed_qc'][:]
    df_buoy10['wdir2']=tempgrp2.variables['continuous_wind_direction'][:]
    df_buoy10['wdir2qc']=tempgrp2.variables['continuous_wind_direction_qc'][:]

    #get gps data at lower resolution and map onto highresolution data
    tempgrp2 = tempgrp.groups['gps_1']
#    df_buoy['lat']=tempgrp2.variables['latitude'][:]
    data=tempgrp2.variables['latitude'][:]
    df_buoy = pd.DataFrame({'lat' : data,'time' : time_index}).set_index(['time'])
    df_buoy['latqc']=tempgrp2.variables['latitude_qc'][:]
    df_buoy['lon']=tempgrp2.variables['longitude'][:]
    df_buoy['lonqc']=tempgrp2.variables['longitude_qc'][:]

    #payload 2
    tempgrp = ds_buoy.groups['payload_2']

    tempgrp2 = tempgrp.groups['anemometer_1']
    df_buoy10['wnd3']=tempgrp2.variables['continuous_wind_speed'][:]
    df_buoy10['wnd3qc']=tempgrp2.variables['continuous_wind_speed_qc'][:]
    df_buoy10['wdir3']=tempgrp2.variables['continuous_wind_direction'][:]
    df_buoy10['wdir3qc']=tempgrp2.variables['continuous_wind_direction_qc'][:]

    tempgrp2 = tempgrp.groups['anemometer_2']
    df_buoy10['wnd4']=tempgrp2.variables['continuous_wind_speed'][:]
    df_buoy10['wnd4qc']=tempgrp2.variables['continuous_wind_speed_qc'][:]
    df_buoy10['wdir4']=tempgrp2.variables['continuous_wind_direction'][:]
    df_buoy10['wdir4qc']=tempgrp2.variables['continuous_wind_direction_qc'][:]
    
    ds_buoy.close()  # close the new file
    xr_buoy=xr.Dataset(df_buoy)
    xr_buoy10=xr.Dataset(df_buoy10)

    time10=xr_buoy10.time
    xr_buoy10['lat']=xr_buoy.lat.interp(time=time10)
    xr_buoy10['latqc']=xr_buoy.latqc.interp(time=time10)
    xr_buoy10['lon']=xr_buoy.lon.interp(time=time10)
    xr_buoy10['lonqc']=xr_buoy.lonqc.interp(time=time10)
    
    return xr_buoy10

In [3]:
dir_in = 'https://www.ncei.noaa.gov/thredds-ocean/catalog/ndbc/cmanwx/'
files = []
for root, dirs, files in os.walk(dir_in, topdown=False):
    for name in files:
        if name.startswith("NDBC_45005") and name.endswith("v00.nc"):
            #continue
            fname_in=os.path.join(root, name)
            #print(fname)
            #fname='F:/data/cruise_data/saildrone/baja-2018/buoy_data/NDBC_46011_201804_D5_v00.nc'
            #in this data time = hourly  time10 = 10 min data
            files.append(fname_in)
print(files)

[]


In [None]:
            xr_buoy=readData_highres(fname_in)
            #print(xr_buoy)
            fname_out=fname_in[:-3]+'hrly_xrformat.nc'
            xr_buoy.to_netcdf(fname_out)
            xr_buoy10=readData_highres10(fname_in)
            fname_out=fname_in[:-3]+'10min_xrformat.nc'
            xr_buoy10.to_netcdf(fname_out)
