In [None]:
import glob
import datetime as dt
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import xarray as xr

%matplotlib inline

: 

TOP

In [None]:
ascii_files = glob.glob('*ASC*')
ascii_files

: 

In [None]:
columns = ['datetime', 'ensemble_number', 'number_of_ensembles', 
           'pitch', 'roll', 'corrected_heading', 'adcp_temp']
columns += ['v_bt_x', 'v_bt_y', 'v_bt_z', 'v_bt_err', 
            'depth_snd', 'gga_alt', 'gga_dalt', 'gga_hdop',
            'depth_beam1', 'depth_beam2', 'depth_beam3', 'depth_beam4']
columns += ['total_elapsed_dist', 'total_elapsed_time', 'total_dist_n', 'total_dist_e', 'total_dist_mg']
columns += ['lat', 'lon', 'invalid', 'fixed_value_not_used']
columns += ['Q_middle', 'Q_top', 'Q_bot', 
            'start_shore_dist_est', 'start_dist', 'end_shore_dist_est', 'end_dist',
            'start_depth', 'end_depth']
columns += ['nbins', 'unit', 'vel_ref', 'intensity_units', 'intensity_scale_fac', 'sound_abs_fac']
columns

: 

In [None]:
df_cols = ['depth', 'vmag', 'vdir', 'vx', 'vy', 'vz', 'verr', 'bs1', 'bs2', 'bs3', 'bs4', 'pctg', 'Q']

: 

Definitions

In [None]:
def parse_header(f):
    row1 = next(f).strip().split()
    row1[0] = '20' + row1[0] if '20' not in row1[0] else row1[0]
    dt = pd.datetime(*tuple(map(int, row1[:6])), int(int(row1[6]) * 1e4))
    dt64 = np.datetime64(dt)
    ensemble_number, ne = map(int, row1[7:9])

    data = [dt] + [ensemble_number, ne] + list(map(float, row1[9:]))
    for i in range(4):
        data += list(map(float, next(f).strip().split()))
    row6 = next(f).strip().split()
    nbins = int(row6[0])
    data += row6[1:4]
    data += list(map(float, row6[4:]))
    return ensemble_number, data, nbins, dt

def ascii2pd(ascii_file, make_geometries=False):
    f = open(ascii_file)
    while True:
        line = next(f).strip().split()
        if len(line) == 0:
            continue
        else:
            break
    file_info = map(int, line)
    depth_cell_len, blank_after_transmit, adcp_depth_from_cn, n_depth_cells, n_pings, dt, mode = file_info
    
    data = {}
    dfs = {}
    
    try:
        while True:
            n, d, nbins, dt = parse_header(f)
            data[n] = d
            df = pd.DataFrame([map(float, next(f).strip().split()) for b in range(nbins)], columns=df_cols)
            #df.index = df.depth
            dfs[dt] = df
    except StopIteration:
        pass
    
    df = pd.DataFrame.from_dict(data, orient='index')
    df.columns = columns
    df.index = pd.to_datetime(df.datetime)

    if make_geometries:
        df['geometry'] = [Point(r.lon, r.lat) for i, r in df.iterrows()]
        df['geometry'] = projectdf(df, '+init=epsg:4269', '+init=epsg:26715')
        df['X'] = [p.x for p in df.geometry]
        df['Y'] = [p.y for p in df.geometry]
    else:
        df['X'] = df.lon
        df['Y'] = df.lat
    
    # make a data panel of the velocity data
    pn = pd.Panel(dfs)  # this line doesn't work right. With Python 3.6 it 
 
    return df, pn

def stack(df, pn, vmin, vmax, freq, make_geometries=False):
    pnr = pn.dropna(axis='columns', how='all').resample(freq, axis=0).mean().copy()

    inds = (pnr.ix[:, :, 'vx'].values < vmin) | (pnr.ix[:, :, 'vx'].values > vmax)
    pnrs = pnr.copy()
    pnrs.ix[:, :, 'vx'].mask(inds, inplace=True)
    pnrs.ix[:, :, 'vy'].mask(inds, inplace=True)

    dfr = df[['X', 'Y', 'datetime']].resample(freq).mean()
    if make_geometries:
        dfr['geometry'] = [Point(r.X, r.Y) for i, r in dfr.iterrows()]
    dfr['vx'] = pnrs.ix[:, :, 'vx'].mean()
    dfr['vy'] = pnrs.ix[:, :, 'vy'].mean()
    dfr.dropna(axis=0, inplace=True)
    return dfr

: 

### Read in each file and output the results to csv files

In [None]:
outpath = 'output'
if not os.path.isdir(outpath):
    os.makedirs(outpath)
ascii_files
for ascii_file in ascii_files:
    if os.path.getsize(ascii_file) == 0:
        continue
    print(ascii_file)
    df, pn = ascii2pd(ascii_file)  # this line is the problem as it tries to create pn in the function above
    dfr = stack(df, pn, vmin=-1000, vmax=1000, freq='10s')
    
    # flatten the panel to a dataframe
    dfall = pn.swapaxes(0, 2).to_frame()
    
    # write the csvs
    dfall.to_csv('{}_alldata.csv'.format(ascii_file))
    df.to_csv('{}_header_info.csv'.format(ascii_file), index=False)

: 

### Dataframe of header information

In [None]:
df.head()

: 

In [None]:
df.columns

: 

### panel of backscatter data

In [None]:
pn.axes

: 

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
plt.imshow(pn.ix[:, :10., 'vy'], vmin=-10, vmax=10, interpolation='None')
plt.gca().set_aspect(500)
plt.colorbar()

: 

### process single ADCP panel

In [None]:
vmin, vmax = -1000, 1000 # valid range of velocities
freq = '10s' # resampling frequency

pnr = pn.dropna(axis=1, how='all').resample(freq, axis=0).mean().copy()

inds = (pnr.ix[:, :, 'vx'].values < vmin) | (pnr.ix[:, :, 'vx'].values > vmax)
pnrs = pnr.copy()
pnrs.ix[:, :, 'vx'].mask(inds, inplace=True)
pnrs.ix[:, :, 'vy'].mask(inds, inplace=True)

: 

In [None]:
bs = pnr.ix[:, :10, ['bs1', 'bs2', 'bs3', 'bs4']].mean(axis=2)
bs = pn.ix[:, :10, ['bs1', 'bs2', 'bs3', 'bs4']].mean(axis=2)
bs.head()

: 

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
plt.imshow(bs, vmin=70, vmax=100)
plt.gca().set_aspect(200)
plt.colorbar()

: 

### Plot velocity component across panel

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
plt.imshow(pnr.ix[:, :10, 'vy'], vmin=-10, vmax=10)
plt.gca().set_aspect(40)
plt.colorbar()

: 

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
plt.imshow(pnrs.ix[:, :10, 'vy'], vmin=-10, vmax=10)
plt.gca().set_aspect(40)
plt.colorbar()

: 

### Slice panel to get dataframe of single measurement

In [None]:
pn.ix[:'2016-08-24 19', :, 'vy']

: 

### reduce backscatter panel to dataframe 

In [None]:
dfall = pn.swapaxes(0, 2).to_frame()
dfall.index.levels[0].name = 'depth_bin'
dfall.index.levels[1].name = 'datetime'
dfall.head()

: 

### write dataframe to csv and then read it back in

In [None]:
dfall.to_csv('alldata.csv')

: 

In [None]:
dfall2 = pd.read_csv('alldata.csv')

: 

In [None]:
dfall2.head()

: 

### write csv of header information

In [None]:
df.to_csv('header_info.csv', index=False) # write it without the index, since it duplicates the datetime column

: 

In [None]:
df

: 

: 