In [None]:
import pandas as pd
import holoviews as hv
import datashader as ds
from holoviews.operation.datashader import datashade
import matplotlib.cm as mpl_cm
import matplotlib.colors as mpl_colors

hv.extension('bokeh')

# Load + parse times (existing)
df = pd.read_csv('/home/chopp/index_all_nanometrics.csv', usecols=['station','channel','starttime', 'archive_type'], dtype=str)
df['dt'] = pd.to_datetime(df['starttime'], format='%Y%m%d.%H%M%S', errors='coerce')
df['sta_chan'] = df['station'] + '_' + df['channel']
df = df.dropna(subset=['dt'])

# Limit to top N station+channel combinations for clarity (by count)
N = 200
top_sta_chan = df['sta_chan'].value_counts().index[:N].tolist()
df = df[df['sta_chan'].isin(top_sta_chan)].copy()

# helper to split and order by first char, station, channel
def split_sta_chan(s):
    parts = s.split('_', 1)
    sta = parts[0]
    chan = parts[1] if len(parts) > 1 else ''
    return sta, chan

# Sort by the uppercase first character (case-insensitive), then station, then channel
levels = sorted(
    top_sta_chan,
    key=lambda sc: (split_sta_chan(sc)[0][0].upper(), split_sta_chan(sc)[0].upper(), split_sta_chan(sc)[1])
)

df['sta_chan'] = pd.Categorical(df['sta_chan'], categories=levels, ordered=True)
df = df[df['sta_chan'].notna()].copy()
y_map = {lab: i for i, lab in enumerate(levels)}
df['y'] = df['sta_chan'].astype(str).map(y_map)

# derive first character (uppercase) for categorical colouring and grouping
df['first_char'] = df['station'].astype(str).str[0].str.upper()

# --- NEW: derive end time (dt_end) so we can draw bars ---
# Prefer explicit endtime/duration columns if present, otherwise use archive_type mapping
if 'endtime' in df.columns:
    df['dt_end'] = pd.to_datetime(df['endtime'], format='%Y%m%d.%H%M%S', errors='coerce')
elif 'dt_end' in df.columns:
    df['dt_end'] = pd.to_datetime(df['dt_end'], errors='coerce')
elif 'duration' in df.columns:
    df['dt_end'] = df['dt'] + pd.to_timedelta(df['duration'].astype(float), unit='s')
else:
    # Use archive_type if available: continuous -> 1800s, triggered -> 14s
    if 'archive_type' in df.columns:
        atype = df['archive_type'].astype(str).str.lower().fillna('')
        df['length_sec'] = atype.map({'continuous': 1800.0, 'triggered': 14.0}).fillna(1.0)
    else:
        # fallback single-second bars
        df['length_sec'] = 1.0
    df['dt_end'] = df['dt'] + pd.to_timedelta(df['length_sec'], unit='s')

# drop rows without valid start/end times
df = df.dropna(subset=['dt', 'dt_end']).copy()

# create vertical extents so each rectangle has thickness
bar_halfheight = 0.4   # tweak to make bars thicker/thinner (0.5 fills whole row)
df['y0'] = df['y'] - bar_halfheight
df['y1'] = df['y'] + bar_halfheight

# color key by first_char
unique_chars = sorted(df['first_char'].unique())
cmap = mpl_cm.get_cmap('tab20', max(1, len(unique_chars)))
color_key = {ch: mpl_colors.to_hex(cmap(i)) for i, ch in enumerate(unique_chars)}

# Build Rectangles (x0, y0, x1, y1) and include 'first_char' for categorical coloring
rects = hv.Rectangles(df, ['dt', 'y0', 'dt_end', 'y1'], ['first_char'])

# sizing
width = 2200
height = max(900, int(len(levels) * 8))

# datashade rectangles; count_cat will color by first_char
shaded = datashade(
    rects,
    aggregator=ds.count_cat('first_char'),
    color_key=color_key,
    width=width,
    height=height
)

yticks = [(i, levels[i]) for i in range(len(levels))]

# Use explicit pixel size (or compute height from number of levels)
shaded = shaded.opts(
    width=2200,
    height=max(900, int(len(levels) * 8)),   # match the datashade canvas size
    min_width=1200,
    min_height=800,
    title="Seismic Availability (bars = file duration)",
    xlabel="Time",
    ylabel="Station_Channel",
    yticks=yticks,
    tools=['pan', 'wheel_zoom', 'box_zoom', 'save', 'xwheel_zoom'],
    toolbar='above'
)

# If VSCode output still constrains size, save to a standalone HTML and open in a browser:
hv.save(shaded, 'seismic_availability.html', fmt='html')

shaded

         archive_type station channel        starttime                  dt  \
0          continuous   BPB05     BHE  20150314.021340 2015-03-14 02:13:40   
1          continuous   BPB05     BHN  20150314.021340 2015-03-14 02:13:40   
2          continuous   BPB05     BHZ  20150314.021340 2015-03-14 02:13:40   
3          continuous   BPB08     BHE  20150314.021340 2015-03-14 02:13:40   
4          continuous   BPB08     BHN  20150314.021340 2015-03-14 02:13:40   
...               ...     ...     ...              ...                 ...   
19857583    triggered   BRP04     BHN  20190517.185458 2019-05-17 18:54:58   
19857584    triggered   BRP04     BHZ  20190517.185458 2019-05-17 18:54:58   
19857585    triggered   BRP09     BHE  20190517.185458 2019-05-17 18:54:58   
19857586    triggered   BRP09     BHN  20190517.185458 2019-05-17 18:54:58   
19857587    triggered   BRP09     BHZ  20190517.185458 2019-05-17 18:54:58   

           sta_chan  
0         BPB05_BHE  
1         BPB05_BHN

  cmap = mpl_cm.get_cmap('tab20', max(1, len(unique_chars)))


BokehModel(combine_events=True, render_bundle={'docs_json': {'b83a4cad-ddad-4213-bb57-25daec462392': {'version…