# Aquistore Data Processing and Figures for 2025 Annual SEP Meeting: Aquistore DAS First Look Report

<br>

Author: Thomas Cullison, Stanford University

<br>

## Evironment Setup

### Auto Import

In [1]:
%load_ext autoreload
%autoreload 2

### Verify Python EXE Path

In [None]:
from sys import executable as mypyexe
print(mypyexe)

In [None]:
!which {mypyexe}

### Common Imports

In [4]:
# import pynmea2
import folium
import utm
import re

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from copy import deepcopy
from time import time
from functools import partial

from IPython.display import display, HTML

from matplotlib.ticker import FixedLocator
from matplotlib.ticker import FormatStrFormatter

from scipy.ndimage import median_filter
from scipy.signal import detrend as sci_detrend

from pathlib import Path as plib_path
from os.path import join as os_p_join

from copy import deepcopy as dcopy

### Import SeisBear functions

In [5]:
from seisbear.core import _write_seisbear, _read_segyio, _read_seisbear, _merge_file_headers
from seisbear.mapping import get_html_folium_map,  _make_marker_dict, _make_folium_group_from_dict, _make_folium_base_map, _get_latlot_means, _add_folium_groups_to_map

### Import Aquistore functions

In [6]:
from aquistore.core import _read_sp1, _load_aquistore_das_seisbear

## Read SP1 file and save to DataFrame

<br>
This file has coordinate locations for all boreholes

In [7]:
sp1_path = "/shared/data/aquistore/Aquistore_4D_Fibre_Locations_NRCAN_8313.Sp1"

In [None]:
sp1_df = _read_sp1(sp1_path)
display(sp1_df)

### Show attrs for SP1 DataFrame

In [None]:
display(sp1_df.attrs['header'])
display(sp1_df.attrs['easting-northing units'])
display(sp1_df.attrs['zone'])

## Read DAS Data Files

### Set Path to Data Files

In [10]:
data_path = '/shared/data/aquistore'

### Read DAS Data from seisbear Files

In [11]:
hset_df, dlst = _load_aquistore_das_seisbear(data_path)

### Show File and Channel (Trace) Headers

In [None]:
hset_df

In [None]:
hset_df.attrs['EBCDIC_Headers'][0]

### Get All Source Coordinates

In [None]:
src_latlon_df = hset_df.drop_duplicates(subset=['SourceLat', 'SourceLon'])[['SourceLat', 'SourceLon', 'SourceX', 'SourceY']]
src_latlon_df

### Get Channel (receiver) Coordinates

In [None]:
rec_latlon_df = hset_df.drop_duplicates(subset=['GroupLat', 'GroupLon'])[['GroupLat', 'GroupLon', 'GroupX', 'GroupY']]
rec_latlon_df

### Get Borehole Cordinates Where There is DAS Fiber

In [16]:
idas_overlap_beg = 38 #TAC: starting index where das and boreholes haverlap coordinates (empirically determined)
boredas_df = sp1_df[idas_overlap_beg:]

#### SEGY Headers for Water-Depth Has "is borehole" Groupings

In [None]:
borehdr_df = hset_df[hset_df['FileIndex']==0]
borehdr_df = borehdr_df[borehdr_df["GroupWaterDepth"] != 0] # TAC: ignore channels with WaterDepth = 0
bhdr_latlon_df = borehdr_df.drop_duplicates(subset=['GroupLat', 'GroupLon'])[['GroupLat', 'GroupLon', 'GroupX', 'GroupY']]
bhdr_latlon_df['BoreIndex'] = range(len(bhdr_latlon_df))
bhdr_latlon_df.iloc[:4]

## Show Overview Field Map

In [18]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
bore_d = _make_marker_dict(bhdr_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='black',mname='BORE',symbol="▽",sympt='12')
bore_g = _make_folium_group_from_dict(bore_d,gname='DAS Boreholes')
abore_d = _make_marker_dict(sp1_df,kw_lat='Latitude',kw_lon='Longitude',color='black',mname='BORE',symbol="▼",sympt='12')
abore_g = _make_folium_group_from_dict(abore_d,gname='All Boreholes')
src_d = _make_marker_dict(src_latlon_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='All Sources')
rec_d = _make_marker_dict(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol='▽',sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='All Channels')

glst.append(src_g)
glst.append(bore_g)
glst.append(abore_g)
glst.append(rec_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

### Convert Folium Map to HTML and Display

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

### Save Map to HTML File

In [None]:
 map_fname = data_path + '/field_overview_map.html'
print(map_fname)

In [21]:
# my_map.save(map_fname)

## Get Borehole Channel Indices

In [22]:
file_df = hset_df[hset_df['FileIndex'] == 0] #TAC: We just need one file

In [None]:
file_df[file_df["GroupWaterDepth"] != 0]

In [None]:
is_bore = file_df["GroupWaterDepth"] != 0
is_bore = is_bore.to_numpy()
isbl = is_bore[:-1]
isbr = is_bore[1:]
is_bore_p1 = is_bore.copy()
is_bore_p1[1:] = is_bore_p1[1:] | isbl
is_bore_p1[:-1] = is_bore_p1[:-1] | isbr
is_surf = ~is_bore
is_surf_p1 = ~is_bore_p1
ibore_mask = file_df.index.to_numpy()[is_bore]
isurf_mask = file_df.index.to_numpy()[is_surf]
ibore_mask_p1 = file_df.index.to_numpy()[is_bore_p1]
isurf_mask_p1 = file_df.index.to_numpy()[is_surf_p1]
print(len(ibore_mask))
print(len(isurf_mask))
assert len(isurf_mask)+len(ibore_mask) == len(file_df)

## Preprocess DAS Data

In [25]:
from scipy import signal

def butter_bandpass(data,fs=None,b0=None,bN=None,axis=-1,order=5,**kwargs):

    bmode = 'bandpass'
    bands = (b0,bN)

    if b0 is None and bN is None:
        raise Exception('b0 and bN are both None')
    if b0 is None:
        bmode = 'lowpass'
        bands = (bN)
    elif bN is None:
        bmode = 'highpass'
        bands = (b0)

    sos = signal.butter(order, bands, bmode, fs=fs, output='sos')
    return signal.sosfiltfilt(sos,data,axis)


In [26]:
proc_lst = []
for data in dlst:
    dmed_data = sci_detrend(data,type='constant',axis=-1)
    dlin_data = sci_detrend(dmed_data,type='linear',axis=-1)
    butter_data = butter_bandpass(data,fs=1000,b0=0.5,bN=50)
    rms_norm = np.sqrt(np.mean(butter_data**2, axis=1, keepdims=True))
    norm_data = butter_data/rms_norm
    proc_lst.append(norm_data)

### Get Data for Two Shots

In [None]:
fid = 0
file_proc = proc_lst[fid]
print(file_proc.shape)

fid2 = 201
file_proc2 = proc_lst[fid2]
print(file_proc2.shape)

### Get Source Coordinates for Each File (shot)

In [None]:
src_df = hset_df[hset_df['FileIndex']==fid]
src_df = src_df[['SourceLat','SourceLon']].drop_duplicates()
src_df

In [None]:
src2_df = hset_df[hset_df['FileIndex']==fid2]
src2_df = src2_df[['SourceLat','SourceLon']].drop_duplicates()
src2_df

### Get Metadata for Data Plotting

In [30]:
nt = hset_df.attrs['SampleCount'][fid]
dt = hset_df.attrs['TimeDelta'][fid]
times = dt*np.arange(nt)

## Plot Shot for First File

### Show Field Map

In [31]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
bore_d = _make_marker_dict(bhdr_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='black',mname='BORE',symbol="▽",sympt='12')
bore_g = _make_folium_group_from_dict(bore_d,gname='DAS Boreholes')
src_d = _make_marker_dict(src_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='Source')
rec_d = _make_marker_dict(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol='▽',sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='All Channels')

glst.append(src_g)
glst.append(bore_g)
glst.append(rec_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

In [None]:
src_map_fname = data_path + '/src_map.html'
print(src_map_fname)

In [34]:
# my_map.save(src_map_fname)

### Plot All Channels

In [35]:
#TAC: uncomment after all initial plots (Cloud Workstation Cluster Problem Sometimes)
%matplotlib widget

### Zoom Box

In [36]:
z_anch = (540, 2.3)
x_zlim = (540,740)
y_zlim = (2.3,3.3)
zoom_rect = patches.Rectangle(
    z_anch,                          # anchor
    x_zlim[1] - x_zlim[0],           # channels
    y_zlim[1] - y_zlim[0],           # time range
    linewidth=4,
    edgecolor='red',
    facecolor='none'
)

In [37]:
AC, TA = np.meshgrid(np.arange(file_proc.shape[0]), times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TA, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (All Channels)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot All Channels with Borehole Channel Markers Overlain (indicated in channel headers)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TA, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

for ic in ibore_mask:
    ax.axvspan(ic-0.5, ic+0.5, color='yellow', alpha=0.3)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Borehole Overlay)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot All Channels with Borehole Channel Markers Overlain (header locations plus nearest channel neighbors)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TA, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

for ic in ibore_mask_p1:
    ax.axvspan(ic-0.5, ic+0.5, color='yellow', alpha=0.3)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Borehole +1 Overlay)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Borehole Channels Only (as indecated by channel headers)

In [41]:
BC1, TB1 = np.meshgrid(ibore_mask, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc[ibore_mask,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(BC1, TB1, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Boreholes Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Borehole Channels Plus Nearest Neighbor Channels

In [43]:
BC2, TB2 = np.meshgrid(ibore_mask_p1, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc[ibore_mask_p1,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(BC2, TB2, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Boreholes +1 Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Zoom Plot All vs Borehole (vertical-loop) Channels

In [45]:
plt_data1 = file_proc.T
plt_data2 = file_proc[ibore_mask,:].T
plt_data3 = file_proc[ibore_mask_p1,:].T

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
xlim_zoom = x_zlim
ylim_zoom = y_zlim
tick_positions_x = np.linspace(*xlim_zoom, 10)
tick_positions_y = np.linspace(*ylim_zoom, 10)[::-1]  # Reversed for inverted axis


fig, axes = plt.subplots(nrows=3, figsize=(15, 15), sharex=True, sharey=True)

axes[0].pcolormesh(AC, TA, plt_data1, cmap='gray',vmin=-vmax,vmax=vmax)
axes[0].set_xlim(*x_zlim)
axes[0].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[0].set_xticks(tick_positions_x)
axes[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[0].set_yticks(tick_positions_y)
axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[0].set_title(f'a) Zoom: All Channels')
axes[0].set_ylabel('time (s)')
axes[0].set_xlabel('channel')

axes[1].pcolormesh(BC1, TB1, plt_data2, cmap='gray',vmin=-vmax,vmax=vmax)
axes[1].set_xlim(*x_zlim)
axes[1].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[1].set_xticks(tick_positions_x)
axes[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[1].set_yticks(tick_positions_y)
axes[1].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[1].set_title(f'b) Zoom: Boreholes Only')
axes[1].set_ylabel('time (s)')
axes[1].set_xlabel('channel')

axes[2].pcolormesh(BC2, TB2, plt_data3, cmap='gray',vmin=-vmax,vmax=vmax)
axes[2].set_xlim(*x_zlim)
axes[2].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[2].set_xticks(tick_positions_x)
axes[2].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[2].set_yticks(tick_positions_y)
axes[2].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[2].set_title(f'c) Zoom: Boreholes + 1 Only')
axes[2].set_ylabel('time (s)')
axes[2].set_xlabel('channel')

fig.suptitle("Zoomed View: All Channels vs Borehole Channels", fontsize=16, y=1.0)

plt.tight_layout()
plt.show()

### Plot Surface Channels Only (as indecated by channel headers)

In [47]:
SC1, TS1 = np.meshgrid(isurf_mask, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc[isurf_mask,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC1, TS1, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Surface Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Surface Channels without Borehole Channels and without Borehole Nearest Neighbors

In [49]:
SC2, TS2 = np.meshgrid(isurf_mask_p1, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc[isurf_mask_p1,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC2, TS2, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

ax.add_patch(dcopy(zoom_rect))

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Surface Only Minus BH Nearest Neighbors)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Zoom Plot All vs Surface Channels

In [51]:
plt_data1 = file_proc.T
plt_data2 = file_proc[isurf_mask,:].T
plt_data3 = file_proc[isurf_mask_p1,:].T

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
xlim_zoom = x_zlim
ylim_zoom = y_zlim
tick_positions_x = np.linspace(*xlim_zoom, 10)
tick_positions_y = np.linspace(*ylim_zoom, 10)[::-1]  # Reversed for inverted axis


fig, axes = plt.subplots(nrows=3, figsize=(15, 15), sharex=True, sharey=True)

axes[0].pcolormesh(AC, TA, plt_data1, cmap='gray',vmin=-vmax,vmax=vmax)
axes[0].set_xlim(*x_zlim)
axes[0].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[0].set_xticks(tick_positions_x)
axes[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[0].set_yticks(tick_positions_y)
axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[0].set_title(f'a) Zoom: All Channels')
axes[0].set_ylabel('time (s)')
axes[0].set_xlabel('channel')

axes[1].pcolormesh(SC1, TS1, plt_data2, cmap='gray',vmin=-vmax,vmax=vmax)
axes[1].set_xlim(*x_zlim)
axes[1].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[1].set_xticks(tick_positions_x)
axes[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[1].set_yticks(tick_positions_y)
axes[1].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[1].set_title(f'b) Zoom: Surface Only')
axes[1].set_ylabel('time (s)')
axes[1].set_xlabel('channel')

axes[2].pcolormesh(SC2, TS2, plt_data3, cmap='gray',vmin=-vmax,vmax=vmax)
axes[2].set_xlim(*x_zlim)
axes[2].set_ylim(*y_zlim[::-1])  # y-axis inverted
axes[2].set_xticks(tick_positions_x)
axes[2].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
axes[2].set_yticks(tick_positions_y)
axes[2].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
axes[2].set_title(f'c) Zoom: Surface Only Minus BH + 1')
axes[2].set_ylabel('time (s)')
axes[2].set_xlabel('channel')

fig.suptitle("Zoomed View: All Channels vs Surface-Only Channels", fontsize=16, y=1.0)

plt.tight_layout()
plt.show()


## Plot Shot for Second File

### Show Field Map

In [53]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
bore_d = _make_marker_dict(bhdr_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='black',mname='BORE',symbol="▽",sympt='12')
bore_g = _make_folium_group_from_dict(bore_d,gname='DAS Boreholes')
src_d = _make_marker_dict(src2_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='Source')
rec_d = _make_marker_dict(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol='▽',sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='All Channels')

glst.append(src_g)
glst.append(bore_g)
glst.append(rec_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

In [None]:
src_map_fname = data_path + '/src2_map.html'
print(src_map_fname)

In [56]:
# my_map.save(src_map_fname)

### Plot All Channels

In [57]:
AC, TT = np.meshgrid(np.arange(file_proc2.shape[0]), times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (All Channels)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot All Channels with Borehole Channel Markers Overlain (indicated in channel headers)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

for ic in ibore_mask:
    ax.axvspan(ic-0.5, ic+0.5, color='yellow', alpha=0.3)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Borehole Overlay)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot All Channels with Borehole Channel Markers Overlain (header locations plus nearest channel neighbors)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(AC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

for ic in ibore_mask_p1:
    ax.axvspan(ic-0.5, ic+0.5, color='yellow', alpha=0.3)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Borehole +1 Overlay)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Borehole Channels Only (as indecated by channel headers)

In [61]:
BC, TT = np.meshgrid(ibore_mask, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2[ibore_mask,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(BC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Boreholes Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Borehole Channels Plus Nearest Neighbor Channels

In [63]:
BC, TT = np.meshgrid(ibore_mask_p1, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2[ibore_mask_p1,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(BC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Boreholes +1 Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Surface Channels Only (as indecated by channel headers)

In [65]:
SC, TT = np.meshgrid(isurf_mask, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2[isurf_mask,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Surface Only)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Surface Channels without Borehole Channels and without Borehole Nearest Neighbors

In [67]:
SC, TT = np.meshgrid(isurf_mask_p1, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc2).max()
plt_data = file_proc2[isurf_mask_p1,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid2} (Surface Only Minus BH Nearest Neighbors)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

## Remove Surface Slack Channels

#### Emerically Searched for Channels

In [None]:
#TAC: emerically found
ifrom = 46
ito = 47

slack_df = bhdr_latlon_df[(ifrom <= bhdr_latlon_df['BoreIndex']) &  (bhdr_latlon_df['BoreIndex'] <= ito)]
display(slack_df.iloc[:4])

In [70]:
sortx = np.sort(slack_df['GroupX'].to_numpy())
sorty = np.sort(slack_df['GroupY'].to_numpy())
gdx = 0.5*abs(sortx[1] - sortx[0])
gdy = 1
min_gx = sortx[0] - gdx
max_gx = sortx[1] + gdx
min_gy = sorty[0] + gdy
max_gy = sorty[1] - gdy

#### Add Column to Channel (receiver) DataFrame for "Is_Slack" Mask

In [None]:
slack_mask = (
    (hset_df['GroupX'] >= min_gx) & (hset_df['GroupX'] <= max_gx) &
    (hset_df['GroupY'] >= min_gy) & (hset_df['GroupY'] <= max_gy)
)
tagslack_df = hset_df.copy()
tagslack_df.loc[:,'Is_Slack'] = slack_mask
tagslack_df

In [72]:
noslack_df = tagslack_df[tagslack_df['FileIndex'] == 0] #TAC: We just need one file

#### Slice/Remove Channels Part of the Surface Slack (non-entrenched) Cable Portion

In [None]:
rec_noslack_df = noslack_df.drop_duplicates(subset=['GroupLat', 'GroupLon'])[['SourceX','SourceY','OffsetX','OffsetY','GroupLat', 'GroupLon', 'GroupX', 'GroupY','Is_Slack']]
rec_noslack_df = rec_noslack_df[~rec_noslack_df['Is_Slack']]
rec_noslack_df

### Show Field Overview Map Without the Slack Channels

In [74]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
bore_d = _make_marker_dict(bhdr_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='black',mname='BORE',symbol="▽",sympt='12')
bore_g = _make_folium_group_from_dict(bore_d,gname='DAS Boreholes')
abore_d = _make_marker_dict(sp1_df,kw_lat='Latitude',kw_lon='Longitude',color='black',mname='BORE',symbol="▼",sympt='12')
abore_g = _make_folium_group_from_dict(abore_d,gname='All Boreholes')
src_d = _make_marker_dict(src_latlon_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='All Sources')
rec_d = _make_marker_dict(rec_noslack_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol='▽',sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='Channels')

glst.append(src_g)
glst.append(bore_g)
glst.append(abore_g)
glst.append(rec_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

In [None]:
noslack_map_fname = data_path + '/noslack_overview_field_map.html'
print(noslack_map_fname)

In [77]:
# my_map.save(noslack_map_fname)

## Plot Surface Channels With Slack Channels Set to Zero and Dropped

In [None]:
is_slack = noslack_df['Is_Slack'].to_numpy()
is_not_slack = ~is_slack
is_not_slack

#### Zero-out Slack Channels

In [79]:
nos_file_proc = file_proc.copy()
nos_file_proc[is_slack,:] = 0.

#### To "Drop" Slack Channels

In [None]:
isurf_nos_mask = file_df.index.to_numpy()[is_surf & is_not_slack]
isurf_nos_mask

### Plot Zero-out Slack Surface Channels

In [81]:
SC, TT = np.meshgrid(isurf_mask, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = nos_file_proc[isurf_mask,:].T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Surface Only - Zero-Out Slack)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

### Plot Dropped Slack Surface Channels

In [83]:
SC, TT = np.meshgrid(isurf_nos_mask, times) #TAC: Drop slack instead

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(file_proc).max()
plt_data = file_proc[isurf_nos_mask,:].T #TAC: Drop slack instead

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(SC, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

ax.invert_yaxis()

ax.set_title(f'Preprocessed File-{fid} (Surface Only - Dropped Slack)')
ax.set_ylabel('time (s)')
ax.set_xlabel('channel')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

## Plot Channel (Receiver) Gathers: Surface Only (no slack), and Boreholes Only

#### Sort By Offset

In [None]:
hset_df['SignSortX'] = np.sign(hset_df['OffsetX']) * hset_df['OffsetMag']
hset_df['SignSortY'] = np.sign(hset_df['OffsetY']) * hset_df['OffsetMag']
hset_df['SignSort'] = np.sign(hset_df['OffsetX']) * np.sign(hset_df['OffsetY']) * hset_df['OffsetMag']
sorted_df = hset_df.sort_values(by=['RecGatherID', 'SignSort'], ascending=[True, False])

sorted_df[['FileIndex','DataIndex','GroupX','GroupY','OffsetX','OffsetY','OffsetMag','RecGatherID','SignSort']]

#### Construct Rec Gather Header

In [None]:
rec_id = 0
rec_df = sorted_df[sorted_df['RecGatherID'] == rec_id]
print(len(rec_df))
rec_df[['FileIndex','DataIndex','GroupX','GroupY','OffsetX','OffsetY','OffsetMag','RecGatherID','SrcGatherID']]

In [None]:
# rec_df[['GroupX','GroupY']].drop_duplicates()
rec_df.drop_duplicates(subset=['GroupLat', 'GroupLon'])[['GroupLat', 'GroupLon', 'GroupX', 'GroupY']]

### Show Field Map For Receiver (channel)

In [88]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
bore_d = _make_marker_dict(bhdr_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon',color='black',mname='BORE',symbol="▽",sympt='12')
bore_g = _make_folium_group_from_dict(bore_d,gname='DAS Boreholes')
src_d = _make_marker_dict(src_latlon_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='All Sources')
rec_d = _make_marker_dict(rec_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol="▼",sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='Channel')

glst.append(rec_g)
glst.append(src_g)
glst.append(bore_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

In [None]:
rgath_map_fname = data_path + '/rec-gather_field_map.html'
print(rgath_map_fname)

In [91]:
# my_map.save(rgath_map_fname)

#### Get Offsets for Plotting

In [None]:
index_pairs = list(zip(rec_df['FileIndex'], rec_df['DataIndex']))
rec_data = np.array([proc_lst[fid][tid] for fid, tid in index_pairs])
offsets = rec_df['SignSort'].to_numpy()
offsets

### Plot Receiver (channel) Gather

In [93]:
HH, TT = np.meshgrid(offsets, times)

In [None]:
pclip = 0.8
vmax = (1.-pclip)*np.abs(rec_data).max()
plt_data = rec_data.T

fig, ax = plt.subplots(figsize=(15,9))
im = ax.pcolormesh(HH, TT, plt_data, cmap='gray',vmin=-vmax,vmax=vmax)

xlim = ax.get_xlim()
tick_positions = np.linspace(xlim[0], xlim[1], 10)
ax.set_xticks(tick_positions)

ax.invert_yaxis()

ax.set_title(f'Preprocessed REC-Gather-{rec_id} (All Sources)')
ax.set_ylabel('time (s)')
ax.set_xlabel('Offset (m)')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.tight_layout()
plt.show()

## Create Midpoint-Bin Map

### Create the Midpoint-Bin Headers

In [95]:
mid_noslack_df = tagslack_df[~tagslack_df['Is_Slack']].copy()

In [96]:
mbin_size = 50

mid_noslack_df['MidpointBinX'] = mbin_size*np.round((mid_noslack_df['SourceX'] - 0.5*mid_noslack_df['OffsetX'])/mbin_size)
mid_noslack_df['MidpointBinY'] = mbin_size*np.round((mid_noslack_df['SourceY'] - 0.5*mid_noslack_df['OffsetY'])/mbin_size)

### Calculate Lat-Lon Bins from Easting-Northing Bins

In [97]:
mid_eastnorth = list(mid_noslack_df[['MidpointBinX', 'MidpointBinY']].itertuples(index=False, name=None))

zone_number = 13  # looked up
zone_letter = 'N'

mid_lats,mid_lons = zip(*[utm.to_latlon(e, n, zone_number, zone_letter) for e, n in mid_eastnorth])

### Add Midpoint Lat-Lon Bins to Header

In [98]:
mid_noslack_df['MidpointBinLat'] = mid_lats
mid_noslack_df['MidpointBinLon'] = mid_lons

### Slice/Filter for Lat-Lon Bin Coordinate Headers

In [None]:
mid_latlon_df = mid_noslack_df[['MidpointBinLat', 'MidpointBinLon']].drop_duplicates()
display(mid_latlon_df)

### Show Field Map with Midpoint Lat-Lon Bins

In [100]:
m_width = 1200
m_height = (1/1.66666)*m_width

glst = []
src_d = _make_marker_dict(src_latlon_df,kw_lat='SourceLat',kw_lon='SourceLon',color='red',mname='SRC',symbol='★')
src_g = _make_folium_group_from_dict(src_d,gname='All Sources')
rec_d = _make_marker_dict(rec_noslack_df,kw_lat='GroupLat',kw_lon='GroupLon',color='blue',mname='REC',symbol='▽',sympt='12')
rec_g = _make_folium_group_from_dict(rec_d,gname='Channels')
mid_d = _make_marker_dict(mid_latlon_df,kw_lat='MidpointBinLat',kw_lon='MidpointBinLon',color='yellow',mname='CMP',symbol='X',sympt='12')
mid_g = _make_folium_group_from_dict(mid_d,gname='Midpoint-Bins')

glst.append(rec_g)
glst.append(src_g)
glst.append(mid_g)

clat,clon = _get_latlot_means(rec_latlon_df,kw_lat='GroupLat',kw_lon='GroupLon')

base_map = _make_folium_base_map(clat,clon,zoom_start=14)

my_map = _add_folium_groups_to_map(glst,base_map)

In [None]:
m_html = get_html_folium_map(my_map,width=m_width,height=m_height)
display(m_html)

In [None]:
mid_map_fname = data_path + '/midpt-bin_field_map.html'
print(mid_map_fname)

In [103]:
# my_map.save(mid_map_fname)