In [1]:
from __future__ import annotations

from pathlib import Path

import pandas as pd
from functions.tools import get_list_files as glf


In [2]:
date_start = "2024-03-01"
date_end = "2025-03-02"

init_dates = pd.date_range(date_start, date_end, freq="1D")

In [3]:
# ===========================================
# # Creating the file lists
# ===========================================

brphys_mask = (
    "/storage/research/actual01/disk1/urban/obs/LiDAR/Bristol/BRPHYS/%Y/%Y%m/%Y%m%d"
)

file_vad_mask = "VAD*.hpl"
file_ver_mask = "Stare*.hpl"
file_user_mask = "User*.hpl"
file_wind_mask = "Wind*.hpl"

dir_mask = brphys_mask

list_vad_files = glf(init_dates, dir_mask, file_wind_mask)
list_ver_files = glf(init_dates, dir_mask, file_ver_mask)

In [4]:
def count_headers(list_files: list, type_file: str) -> tuple[dict, dict]:
    """Count and identify all different headers of *.hpl files.

    Reads only until the header end marker '****' to avoid loading entire large files.

    Returns:
        header_types: dict with unique header content and an example header as value.
        header_files: dict with each unique header to list files that have it.

    """
    header = []
    for file_path in list_files:
        src = Path(file_path)
        header_end_idx = []
        with src.open("r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                header_end_idx.append(line)
                if line.lstrip().startswith("****"):
                    break

        # filtering "Filename:", "Start time:", they generate uniqueness
        cleaned = [
            w for w in header_end_idx if not w.startswith(("Filename:", "Start time:"))
        ]

        header.append(cleaned)

    # Counting and identifying all possible types of headers.
    normalized_list = [tuple(sorted(w)) for w in header]

    header_types = {}
    header_files = {}
    for idx, contenido in enumerate(normalized_list):
        if contenido not in header_types:
            header_types[contenido] = header[idx]
            header_files[contenido] = []
        header_files[contenido].append(list_files[idx])

    print(f"[{type_file}] Number of unique types of headers: {len(header_types)}")  # noqa: T201

    if len(header_types) > 1:
        print(f"\n[{type_file}] Files per header type:")  # noqa: T201
        for tipo, files in header_files.items():
            print(f"- Header type {list(header_types.keys()).index(tipo) + 1}:")  # noqa: T201
            if len(files) > 1:
                print(f"  from {files[0]} to {files[-1]}")  # noqa: T201
            else:
                print(f"  {files[0]}")  # noqa: T201

    return header_types, header_files


header_vad = count_headers(list_vad_files, "VAD")
header_ver = count_headers(list_ver_files, "Stare")

KeyboardInterrupt: 

In [5]:
src = list_ver_files[0]
lines = src.read_text(encoding="utf-8", errors="ignore").splitlines(keepends=True)

header_end_idx = []
for i in lines:
    header_end_idx.append(i)
    if i.lstrip().startswith("****"):
        break

header_end_idx

['Filename:\tStare_18_20240319_11.hpl\n',
 'System ID:\t18\n',
 'Number of gates:\t3194\n',
 'Range gate length (m):\t18.0\n',
 'Gate length (pts):\t6\n',
 'Pulses/ray:\t30000\n',
 'No. of rays in file:\t1\n',
 'Scan type:\tStare\n',
 'Focus range:\t65535\n',
 'Start time:\t20240319 11:07:10.53\n',
 'Resolution (m/s):\t0.0382\n',
 'Altitude of measurement (center of gate) = (range gate + 0.5) * Gate length\n',
 'Data line 1: Decimal time (hours)  Azimuth (degrees)  Elevation (degrees) Pitch (degrees) Roll (degrees)\n',
 'f9.6,1x,f6.2,1x,f6.2\n',
 'Data line 2: Range Gate  Doppler (m/s)  Intensity (SNR + 1)  Beta (m-1 sr-1)\n',
 'i3,1x,f6.4,1x,f8.6,1x,e12.6 - repeat for no. gates\n',
 '****\n']

In [None]:
src = list_vad_files[-10]
lines = src.read_text(encoding="utf-8", errors="ignore").splitlines(keepends=True)

header_end_idx = []
for i in lines:
    header_end_idx.append(i)
    if i.lstrip().startswith("****"):
        break

header_end_idx

['Filename:\tWind_Profile_18_20250309_222318.hpl\n',
 'System ID:\t18\n',
 'Number of gates:\t200\n',
 'Range gate length (m):\t18\n',
 'Gate length (pts):\t6\n',
 'Pulses/ray:\t90000\n',
 'No. of rays in file:\t6\n',
 'Scan type:\tWind profile\n',
 'Focus range:\t65535\n',
 'Start time:\t20250309 22:23:28.62\n',
 'Resolution (m/s):\t0.0382\n',
 'Altitude of measurement (center of gate) = (range gate + 0.5) * Gate length\n',
 'Data line 1: Decimal time (hours)  Azimuth (degrees)  Elevation (degrees) Pitch (degrees) Roll (degrees)\n',
 'f9.6,1x,f6.2,1x,f6.2\n',
 'Data line 2: Range Gate  Doppler (m/s)  Intensity (SNR + 1)  Beta (m-1 sr-1)\n',
 'i3,1x,f6.4,1x,f8.6,1x,e12.6 - repeat for no. gates\n',
 '****\n']