# Demo 03 - Process All Files in One Go

This notebook consolidates the processing routines into a single function for mass data processing

In [None]:
# %matplotlib agg

import os
import time
import glob
import numpy as np
import scipy as sp
import scipy.signal
import matplotlib
import matplotlib.patheffects
import matplotlib.pyplot as plt
import random

from tqdm.notebook import tqdm, trange

import toshi
import cspec

# plt.style.use('./darkmode.style')
plt.style.use('./lightmode.style')
zmap = matplotlib.colors.LinearSegmentedColormap.from_list('colors', cspec.colormap.zmap()[:, :3])
vmap = matplotlib.colors.LinearSegmentedColormap.from_list('colors', cspec.colormap.vmap()[:, :3], 64)

path_effects = [
    matplotlib.patheffects.Stroke(linewidth=1.6, foreground=(0, 0, 0, 0.8)),
    matplotlib.patheffects.Normal()
]

In [None]:
# Waveform table
waveform = toshi.tx_waveform()
pc_nfft = 1024
wf = sp.fft.fft(waveform / np.sqrt(np.sum(np.abs(waveform) ** 2)), n=pc_nfft)

# WTC table
wtc_loc = toshi.wtc_loc_from_csv()
turbines = np.array([[pt[2], pt[1]] for pt in wtc_loc], dtype=np.single)

# Raw data tree
data_home = os.path.expanduser('~/Downloads/toshiba/AKITA_IQ_TO_OU/IQdata')
files = glob.glob('{}/**/*.dat'.format(data_home), recursive=True)
files = sorted(files)

In [None]:
def proc_file(filename,  nfft=256, use_window=True, use_pulse_pair=False, use_dc_removal=False, 
              snr_thres=3, g=20, m=3, n=4, verbose=0):
    out = {}
    out['filename'] = filename
    out['filesize'] = os.path.getsize(filename)
    if verbose:
        print('{} - {:,.0f} B'.format(filename, out['filesize']))

    tic = time.time()
    all_ray_pulses, all_cpi_headers = toshi.read(filename)
    toc = time.time()
    if verbose:
        print('{} read in {:.2f} s'.format(os.path.basename(filename), toc - tic))

    # Go through the pulses for azimuth
    az = np.zeros(len(all_ray_pulses), dtype=np.single)
    for k, pulses in enumerate(all_ray_pulses):
        az[k] = pulses[0].azimuth

    # Choose ray 2 to whatever that completes the 360-deg coverage
    k = np.argmin(np.abs(az[3:] - az[2])) + 1
    az = az[2:k+2]
    ray_pulses = all_ray_pulses[2:k+2]
    cpi_headers = all_cpi_headers[2:k+2]
    out['az'] = az
    out['ray_pulses'] = ray_pulses
    out['cpi_headers'] = cpi_headers
    if len(az) < 100:
        print('{} not a PPI'.format(os.path.basename(filename)))
        return None

    # Dimensions
    naz = len(ray_pulses)
    ngate_short_hi = ray_pulses[0][0].cpi_header.num_range_short_hi
    ngate_short_lo = ray_pulses[0][0].cpi_header.num_range_short_lo
    ngate_long = ray_pulses[0][0].cpi_header.num_range_long_hi
    ngate = ngate_long
    out['ngate_short_hi'] = ngate_short_hi
    out['ngate_short_lo'] = ngate_short_lo
    out['ngate'] = ngate_long

    # Elevation assumed to be flat from the very first pulse
    scan_time = time.strptime(os.path.basename(filename)[:15], '%Y%m%d_%H%M%S')
    scan_el = ray_pulses[0][0].elevation
    out['scan_time'] = scan_time
    out['scan_el'] = scan_el

    # Sampling code from the CPI header
    fs = 1.0e6 * (1 << cpi_headers[0].fs_code)
    dr = 3.0e8 / fs / 2
    r = 1.0e-3 * (np.arange(0, ngate, dtype=np.single) * dr + 0.5 * dr)
    out['fs'] = fs
    out['dr'] = dr
    out['r'] = r

    # Edge of range cells
    wid_a = np.mean(sorted(np.diff(az))[int(0.3 * len(az)):int(0.6 * len(az))])
    end_a = az[-1] + wid_a
    if end_a >= 360.0:
        end_a -= 360.0
    ae = np.append(az, end_a)
    re = 1.0e-3 * np.arange(0, ngate + 1, dtype=np.single) * dr

    # Radar cell locations
    ce = np.cos(np.deg2rad(scan_el))
    rr, aa = np.meshgrid(re, np.deg2rad(ae))
    out['rr'] = rr
    out['aa'] = aa
    out['xx'] = rr * ce * np.sin(aa)
    out['yy'] = rr * ce * np.cos(aa)

    # Noise estimate, try azimuth 0, around 20-25 km
    # ia = np.argmin(np.abs(a[3:] - 0.0)) + 1
    # ir, er = np.argmin(np.abs(r - 20.0)), np.argmin(np.abs(r - 25.0))
    # samples = np.zeros((len(ray_pulses[ia]), er-ir), dtype=np.csingle)

    # Gather the samples. Ignore phase code since we are only interested in amplitude
    # for k, pulse in enumerate(ray_pulses[ia]):
    #     samples[k, :] = pulse.h_long_hi[ir:er]
    # noise = np.mean(np.abs(samples)) ** 2

    # Hard code noise estimate to be about 24 (eye ball)
    noise = 24
    if verbose:
        print('fs = {:,.0f} Hz'.format(fs))
        print('dr = {:.1f} m'.format(dr))
        print('k = {} -> 2 ... {} ({})'.format(k, k+2, naz))
        print('noise = {:.4f} (16-bit ADU)'.format(noise))

    s = np.zeros((naz, ngate), dtype=np.single)
    v = np.zeros((naz, ngate), dtype=np.single)

    omega = np.arange(-nfft/2, nfft/2) / nfft * np.pi                          # Frequency axis

    # Go through the pulses
    cpuls = []
    for k, (pulses, cpi_header) in enumerate(zip(ray_pulses, cpi_headers)):
        npulse = len(pulses)

        # Decode the long pulse, then compress using wf
        p = np.zeros((npulse, ngate_long), dtype=np.csingle)
        for j, pulse in enumerate(pulses):
            p[j, :] = pulse.h_long_hi * np.exp(-1j * pulse.phase_h_long)       # Phase decoding
        pf = sp.fft.fft(p, n=pc_nfft, axis=1)                                  # Pulse compression in Fourier domain
        pc = sp.fft.ifft(pf * wf, n=pc_nfft, axis=1)                           # Return to time domain

        # Gather the short pulses and the compressed long pulses
        p = np.zeros((npulse, ngate_long), dtype=np.csingle)
        for j, pulse in enumerate(pulses):
            c = np.exp(-1j * pulse.phase_h_short)                              # Phase code
            p[j, :ngate_long] = pc[j, :ngate_long]                             # Long only
            p[j, :ngate_short_hi] = pulse.h_short_hi * c                       # Short hi
            p[j, :ngate_short_lo] = pulse.h_short_lo * c                       # Short lo        

        # DC removal if specified (hardware should have done this already)
        if use_dc_removal:
            p = p - np.mean(p, axis=0)                                         # Remove DC if desired

        cpuls.append(p)                                                        # Keep a copy of the compressed pulse

        # Data windowing
        if use_window:
            w = scipy.signal.get_window('blackmanharris', p.shape[0])
        else:
            w = np.ones((p.shape[0],))
        w /= np.sqrt(np.sum(w ** 2)) / np.sqrt(p.shape[0])                     # Normalize to non-windowed gain
        ww = np.repeat(np.expand_dims(w, axis=1), ngate, axis=1)               # Make same shape
        p *= ww                                                                # Windowing

        # Pulse pair of spectral processing
        if use_pulse_pair:
            pp = p[1:, :] * np.conj(p[:-1, :])                                 # x(n) * x'(n-1)
            s[k, :] = np.mean(np.abs(p) ** 2, axis=0)                          # s(n) = E[x(n) * x'(n)]
            v[k, :] = np.angle(np.sum(pp, axis=0))                             # r(1) = E[x(n) * x'(n-1)]
        else:
            spec = np.fft.fft(p, nfft, axis=0)                                 # FFT -> Periodogram
            s[k, :] = np.mean(np.abs(spec) ** 2, axis=0) / p.shape[0]          # Signal power
            v[k, :] = np.angle(np.fft.ifft(spec * np.conj(spec), axis=0)[1])   # IFT -> ACF[1]

    # Gather the compressed pulse for output
    out['cpuls'] = cpuls

    # Signal
    s -= noise
    s[s <= 0] = 1.0e-6                                                         # Avoid log(0)
    snr = 10 * np.log10(s / noise)                                             # Signal-to-noise ratio in dB
    z = 10 * np.log10(s * (r + 0.5e-3 * dr) ** 2) - 40                         # Estimated ZCal = -40 
    z[:, :ngate_short_hi] += 15                                                # ~15-dB on the short waveform, perhaps?

    # Proper velocity unfolding is not of concern here so just replace odd rays with even rays
    v[1::2, :] = v[::2, :]

    # Thresholding at SNR ~ 0 dB
    o = snr < -1
    z[o] = np.nan
    v[o] = np.nan
    
    out['snr'] = snr
    out['s'] = s
    out['z'] = z
    out['v'] = v
    
    # Wind turbine cluster tags
    tags = cspec.wtc_tags(turbines, az, r, n=n)

    sc = s.copy()
    vc = v.copy()
    last_tag = np.max(tags.flatten())

    # Fixing random state for reproducibility
    random.seed(20210301)

    use = 0
    for itag in range(1, last_tag+1):
        emask = tags == itag
        fmask = cspec.dilate(emask, n=n)
        gmask = cspec.dilate(fmask, n=2) ^ fmask
        gmask[snr < snr_thres] = False
        dirty_cells = cspec.mask2cells(fmask)
        clean_cells = cspec.mask2cells(gmask)

        # Gather all clean cells, shuffle the order
        ii = np.arange(len(clean_cells))
        random.shuffle(ii)
        x_clean_cells = []
        for ic in ii[:g]:
            x_clean_cells.append(clean_cells[ic])
        selected_mask = cspec.cell2mask(emask.shape, x_clean_cells)

        # Composite spectrum of the selected clean cells
        spec = np.zeros(nfft, dtype=np.single)
        for (ia, ir) in x_clean_cells:
            p = cpuls[ia][:, ir]
            w = scipy.signal.get_window('blackmanharris', len(p))
            spec += np.abs(np.fft.fft(p * w, nfft))

        # Add in a DC filter / GMAP-like zeroing
        spec /= np.sqrt(np.sum(spec ** 2))                                            # Normalize by noise gain
        c = int(nfft * 0.02)
        spec[:c+1] = 1.0e-1
        spec[-c:] = 1.0e-1

        # m-tap circular averaging
        filt = spec.copy()
        for k in range(1, m):
            filt += np.roll(spec, k)
        filt = np.roll(filt, -int(m/2))                                               # Compensate for running-average lag
        filt /= m                                                                     # Normalize by tap length
        if verbose:
            print('cluster #{} filter gain = {:.2f} dB'.format(
                itag, 10.0 * np.log10(np.sum(filt ** 2))))

        # Go through the dirty cells
        for (ia, ir) in dirty_cells:
            p = cpuls[ia][:, ir]
            if use_window:
                w = scipy.signal.get_window('blackmanharris', len(p))
            else:
                w = np.ones((len(p),))
            w /= np.sqrt(np.sum(w ** 2)) / np.sqrt(len(p))                            # Normalize to non-windowed gain
            spec = np.fft.fft(p * w, nfft)                                            # FFT -> Periodogram
            spec *= filt                                                              # Filtering in Fourier domain
            tmp = np.mean(np.abs(spec) ** 2) / len(p)                                 # Signal power
            tmp -= noise
            # Raplace the cell if the filtered spectrum has lower power
            if s[ia, ir] > tmp:
                sc[ia, ir] = tmp
                vc[ia, ir] = np.angle(np.fft.ifft(spec * np.conj(spec), axis=0)[1])   # IFT -> ACF[1]
                use += 1

    if verbose:
        print('use = {}'.format(use))
    out['use'] = use

    # Signal
    sc[sc <= 0] = 1.0e-6                                                              # Avoid log(0)
    snr = 10 * np.log10(sc / noise)                                                   # Signal-to-noise ratio in dB
    zc = 10 * np.log10(sc * (r + 0.5e-3 * dr) ** 2) - 40                              # Estimated ZCal = -40 
    zc[:, :ngate_short_hi] += 15                                                      # ~15-dB on the short waveform, perhaps?

    # Proper velocity unfolding is not of concern here so just replace odd rays with even rays
    vc[1::2, :] = vc[::2, :]

    # Thresholding
    o = snr < -1
    zc[o] = np.nan
    vc[o] = np.nan
    out['zc'] = zc
    out['vc'] = vc

    # Evaluate
    emask = np.logical_and(tags > 0, np.isfinite(z))
    c2s = 10 * np.log10(sc[emask]) - 10 * np.log10(s[emask])
    o = c2s < 0
    c = np.sum(o)
    c2s = c2s[o]
    out['delta'] = c2s
    out['delta_std'] = np.std(c2s)
    out['delta_mean'] = np.mean(c2s)
    if verbose:
        print('Suppression = {:.2f} dB +/- {:.2f} dB ({} cells)'.format(out['delta_mean'], out['delta_std'], c))

    return out

In [None]:
def show_chart_duo(xx, yy, im1, im2, sym1, sym2, title, label,
                   cmap=zmap, show_turbine=False,
                   xlim=(-18, 18), ylim=(-18, 18), clim=(0.75, 6.75),
                   grid_color=(0.5, 0.5, 0.5)):
    dpi = 144
    w, h = 500, 500
    fw, fh = (2 * w + 150) / dpi, (h + 200) / dpi
    fig = plt.figure(figsize=(fw, fh), dpi=144)
    rects = [
        np.array([75 / fw, 150 / fh, w / fw, h / fh]) / dpi,
        np.array([(125 + w) / fw, 150 / fh, w / fw, h / fh]) / dpi
    ]
    ims = []
    for rect, im, sym in zip(rects, [im1, im2], [sym1, sym2]):
        ax = fig.add_axes(rect)
        im = ax.pcolormesh(xx, yy, im, vmin=clim[0], vmax=clim[1], cmap=cmap, shading='flat')
        ax.plot(ring_x, ring_y, color=grid_color, linewidth=0.75)
        for i in range(len(ring_r) - 1):
            ax.text(ring_x[9, i], ring_y[9, i], '{} km'.format(ring_r[i]), 
                    fontsize=9, fontweight='bold', color='white', path_effects=path_effects,
                    ha='center', va='center', rotation=-17)
        if len(ims) > 0:
            ax.set_yticklabels([])
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.text(0.95 * xlim[0] + 0.05 * xlim[1],
                0.95 * ylim[0] + 0.05 * ylim[1],
                sym, fontsize=24, color='white', path_effects=path_effects)
        if show_turbine:
            ax.plot(x_turb, y_turb, 'xk', markersize=4)
        ims.append(im)
    # Colorbar
    rect = np.array([325 / fw, 45 / fh, 550 / fw, 20 / fh]) / dpi
    cax = fig.add_axes(rect)
    fig.colorbar(ims[0], cax=cax, orientation='horizontal')
    cax.set_title(label, fontsize=10)
    # Main title
    fig.text(rects[1][0] - 25 / fw / dpi, 0.96, title, ha='center', fontsize=13, weight='bold')
    return fig

def show_results(sweep):
    grid_color = (0, 0, 0, 0.6)
    title = '{} - ({:.2f} dB)'.format(time.strftime('%Y/%m/%d %H:%M:%S UTC', sweep['scan_time']), sweep['delta_mean'])
    figz = show_chart_duo(sweep['xx'], sweep['yy'], sweep['z'], sweep['zc'], 'Z', 'Zc', title, 'Reflectivity (dBZ)', cmap=zmap, clim=(-32, 96), grid_color=grid_color)
    figv = show_chart_duo(sweep['xx'], sweep['yy'], sweep['v'], sweep['vc'], 'V', 'Vc', title, 'Velocity (m/s)', cmap=vmap, clim=(-5, 5), grid_color=grid_color)
    return figz, figv

In [None]:
# Rings and crosses
ring_r = np.arange(5, 21, 5)
ring_a = np.deg2rad(np.arange(0, 361, 2))
ring_x = np.outer(np.sin(ring_a), ring_r)
ring_y = np.outer(np.cos(ring_a), ring_r)
cross_r = np.array([5, 50, np.nan])
cross_a = np.deg2rad(np.arange(0, 360, 45))
cross_x = np.outer(np.cos(cross_a), cross_r).flatten()
cross_y = np.outer(np.sin(cross_a), cross_r).flatten()

# Turbine locations
a_turb = np.deg2rad(turbines[:, 0])
r_turb = turbines[:, 1]
x_turb = r_turb * np.sin(a_turb)
y_turb = r_turb * np.cos(a_turb)

In [None]:
snr_thres = 3    # SNR threshold for clean cell selection
g = 30           # Maximum gates for clean cell selection
m = 3            # Tap count on spectral smoothing
n = 4            # Dilation recursion count

filename = files[0]
sweep = proc_file(filename, snr_thres=snr_thres, g=g, m=m, n=n, verbose=1)
figz, figv = show_results(sweep)

In [None]:
# sweep = proc_file(files[0], snr_thres=snr_thres, g=g, m=m, n=n, use_pulse_pair=False, verbose=1)
# figz, figv = show_results(sweep)

In [None]:
fig = plt.figure()
plt.plot(sweep['delta'], '.')
plt.plot([0, len(sweep['delta'])], np.outer([1, 1], sweep['delta_mean']), '--')
plt.text(0.98 * len(sweep['delta']), -97, os.path.basename(filename), ha='right')
plt.ylim((-100, 0))
plt.xlabel('Cell Id')
plt.ylabel('Suppression (dB)')
plt.title('Clutter Suppression - mean = {:.2f} dB'.format(sweep['delta_mean']))

In [None]:
raw_deltas = []
for k in trange(len(files)):
    file = files[k]
    sweep = proc_file(file, snr_thres=snr_thres, g=g, m=m, n=n, verbose=0)
    if sweep is None:
        continue
    figz, figv = show_results(sweep)
    dest = os.path.expanduser('~/Downloads/toshiba/figs')
    tstr = time.strftime('%Y%m%d-%H%M%S', sweep['scan_time'])
    figz.savefig('{}/{}-Z.png'.format(dest, tstr), dpi=144)
    figv.savefig('{}/{}-V.png'.format(dest, tstr), dpi=144)
    plt.close('all')
    delta = (os.path.basename(sweep['filename']), sweep['delta_mean'], sweep['use'])
    raw_deltas.append(sweep['delta'])

In [None]:
p = [x[1] for x in raw_deltas]
print(np.mean(p))

In [None]:
with open('blob/results.csv', 'w') as fid:
    fid.write('Filename, Use, Mean, Std\n')
    for filename, delta in zip(files, raw_deltas):
        fid.write('{}, {}, {}, {}\n'.format(os.path.basename(filename), len(delta), np.mean(delta), np.std(delta)))

In [None]:
deltas = np.hstack(raw_deltas)

In [None]:
mean_delta = np.mean(deltas)

fig = plt.figure()
n, _, patches = plt.hist(deltas, bins=np.arange(-95, -5, 1), alpha=0.6)
for p in patches:
    p.set_facecolor('grey')
    p.set_edgecolor('white')
plt.plot(np.outer([1, 1], mean_delta), [0, 2700], '--')
plt.text(mean_delta - 2, 1000, 'Avg = {:.2f} dB'.format(mean_delta), ha='center', va='baseline', rotation=90)
plt.xlabel('Reflectivity Values (dBZ)')
plt.ylabel('Count')
plt.title('Distribution of Suppresion Values')

o = deltas > -50
mean_delta = np.mean(deltas[o])

fig = plt.figure()
n, _, patches = plt.hist(deltas[o], bins=np.arange(-40, -5, 1), alpha=0.6)
for p in patches:
    p.set_facecolor('grey')
    p.set_edgecolor('white')
plt.plot(np.outer([1, 1], mean_delta), [0, 2700], '--')
plt.text(mean_delta - 1, 1000, 'Avg = {:.2f} dB'.format(mean_delta), ha='center', va='baseline', rotation=90)
plt.xlabel('Reflectivity Values (dBZ)')
plt.ylabel('Count')
plt.title('Distribution of Suppresion Values')

In [None]:
np.savez('blob/deltas.npz', deltas)