<h1> IGRA2 Radiosonde Data Processing </h1>
<br>

This notebook implements an optimized pipeline for processing radiosonde data from the Integrated Global Radiosonde Archive Version 2 (IGRA2).
<br>

**Problem Statement**

IGRA2 raw data files present several computational challenges:
- Large file sizes
- Complex concatenated format with variable-length records
- Irregular pressure levels across soundings

**Solution Overview**

The pipeline addresses these challenges through:
- Automated data acquisition: Programmatic download and extraction of station files
- Parallel processing: Multi-core disaggregation and formatting
- Pressure level standardization: Semi-logarithmic interpolation to regular 5 hPa intervals (1010-100 hPa)
<br><br>

In [1]:
# Modify the parameters in this cell according to your requirements

# ID of selected station
ID = 'COM00080222'

# Period of climatology
clim_start_year = 1991
clim_end_year = 2020
clim_hour = 12  # UTC hour of sonde release

# Path of the repository folder
path_repo = '/home/david/radiosonde_climatology_analysis/'

<br><br><br>
<br><br><br>

# 0. Preamble (functions and libs)

In [2]:
############
### folders' paths

docs_folder = f'{path_repo}/docs/'
station_folder = f'{path_repo}/data/{ID}/'
raw_folder = f'{station_folder}/raw/'
separated_folder = f'{station_folder}/separated/'
interpolated_folder = f'{station_folder}/interpolated/'

In [3]:
## functions and libs

############
### libs
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import requests
from requests.exceptions import HTTPError
import os
import zipfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import partial
from metpy import calc as mpcalc
from metpy.units import units


############
### basic functions to print and habdle txt files

def print_(string):
    string = f"{datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')}: {string}"
    print(string)

def download_file(url, download_folder):
    file_name = url.split("/")[-1]
    file_path = os.path.join(download_folder, file_name)
    print_(f"Start to download: {file_name}")
    with requests.get(url, stream=True) as response:
        response.raise_for_status()
        with open(file_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
    print_(f"File downloaded: {file_path}")

def unzip_and_remove(zip_file_path, extract_to_folder):
    print_(f"Start to unzip: {zip_file_path}")
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to_folder)
    os.remove(zip_file_path)    
    print_(f"Unzipped and removed: {zip_file_path}")

def get_total_lines(filename):
    """Efficiently count total lines in file"""
    with open(filename, 'rb') as f:
        lines = 0
        buf_size = 1024 * 1024
        read_f = f.raw.read        
        buf = read_f(buf_size)
        while buf:
            lines += buf.count(b'\n')
            buf = read_f(buf_size)        
        return lines

def str_timediff(time_diff):
    total_seconds = time_diff.total_seconds()
    # Calculate hours, minutes, and seconds
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    # Print the result
    return( f"Tiempo transcurrido: {hours} horas, {minutes} minutos, {seconds} segundos.")
    
def get_lines_starting_with_(file_path, prefix):
    line_numbers = []

    # Open the file in read mode
    with open(file_path, 'r') as file:
        # Iterate through each line
        for line_number, line in enumerate(file, start=1):
            # Check if the line starts with '#'
            if line.startswith(prefix):
                line_numbers.append(line_number)
    return line_numbers

def get_line_from_file(file_path, line_number):
    # Open the file in read mode
    with open(file_path, 'r') as file:
        # Iterate through each line
        for current_line_number, line in enumerate(file, start=1):
            # Check if the current line number matches the desired line number
            if current_line_number == line_number:
                return line.rstrip()  # Return the line without trailing whitespaces

    # If the desired line number is not found, return an empty string
    return ''

############
### Creating the station's folders
os.makedirs(station_folder, exist_ok = True)
os.makedirs(separated_folder, exist_ok = True)
os.makedirs(interpolated_folder, exist_ok = True)

############
### Format info of the data in the rawfile, based on IGRA2 documentation
df_HeaderRecordFormat = pd.read_csv(f'{docs_folder}/format_header_record.csv')
colspecs_header = []
header_vars = []
for idx in df_HeaderRecordFormat.index:
    header_vars += [df_HeaderRecordFormat.loc[idx, 'variable']] 
    idx_ini_i = df_HeaderRecordFormat.loc[idx, 'start_icol']
    idx_end_i = df_HeaderRecordFormat.loc[idx, 'end_icol']
    colspecs_header += [(idx_ini_i, idx_end_i)]

df_DataRecordFormat = pd.read_csv(f'{docs_folder}/format_data_record.csv')
colspecs_data = []
for idx in df_DataRecordFormat.index:
    idx_ini_i = df_DataRecordFormat.loc[idx, 'start_icol']
    idx_end_i = df_DataRecordFormat.loc[idx, 'end_icol']
    colspecs_data += [(idx_ini_i, idx_end_i)]

############
# functions for extract anf format each sonde data
def read_file_once_get_line_positions(rawfile):
    """Read file once to get byte positions of each line start"""
    line_positions = [0]  # First line starts at position 0
    
    with open(rawfile, 'rb') as f:
        while True:
            line = f.readline()
            if not line:
                break
            line_positions.append(f.tell())
    
    return line_positions[:-1]  # Remove the last position (EOF)

# to get the metadata of each sonde
def extract_headers_optimized(rawfile, lines_start_sondes, colspecs_header, header_vars):
    """Extract all headers in a single file pass"""
    line_positions = read_file_once_get_line_positions(rawfile)
    df_sondes_list = []
    
    with open(rawfile, 'r') as file:
        for i_sounding, start_line in enumerate(lines_start_sondes):
            # Seek directly to byte position instead of reading line by line
            file.seek(line_positions[start_line - 1])
            
            # Read just the header line
            header_line = file.readline()
            
            # Parse the header using StringIO for pandas
            from io import StringIO
            df_headeri = pd.read_fwf(
                StringIO(header_line),
                colspecs=colspecs_header,
                names=header_vars,
                nrows=1
            )
            df_headeri.index = [i_sounding]
            df_sondes_list.append(df_headeri)
    
    return pd.concat(df_sondes_list)

### function to extract and format each sondes
def process_single_sonde(args):
    """Process and format a single sonde - designed for parallel execution"""
    (rawfile, i_sounding, start_line, end_line, colspecs_data, 
     data_vars, separated_folder, ID, sonde_date) = args
    
    # Read the specific section of the file
    with open(rawfile, 'r') as file:
        df_sondei = pd.read_fwf(
            file,
            colspecs=colspecs_data,
            names=data_vars,
            skiprows=start_line - 1,
            nrows=end_line - start_line,
            dtype=str
        )
    
    # Convert string columns to numeric where needed
    numeric_cols = ['PRESS', 'TEMP', 'DPDP', 'RH', 'WSPD', 'WDIR']
    for col in numeric_cols:
        if col in df_sondei.columns:
            df_sondei[col] = pd.to_numeric(df_sondei[col], errors='coerce')
    
    # Sonde data formatting based on the IGRA2 documentation
    df_sondei.replace(-9999, np.nan, inplace=True)
    df_sondei.replace(-8888, np.nan, inplace=True)
    
    # Apply scaling factors
    if 'PRESS' in df_sondei.columns:
        df_sondei['PRESS'] /= 100
    if 'TEMP' in df_sondei.columns:
        df_sondei['TEMP'] /= 10
    if 'DPDP' in df_sondei.columns:
        df_sondei['DPDP'] /= 10
    if 'RH' in df_sondei.columns:
        df_sondei['RH'] /= 10
    if 'WSPD' in df_sondei.columns:
        df_sondei['WSPD'] /= 10
    
    # Calculate dew point temperature
    if 'TEMP' in df_sondei.columns and 'DPDP' in df_sondei.columns:
        df_sondei['DEW'] = df_sondei['TEMP'] - df_sondei['DPDP']
    
    # Calculate missing atmospheric variables using global functions
    if 'RH' in df_sondei.columns and 'DEW' in df_sondei.columns and 'TEMP' in df_sondei.columns:
        if df_sondei['RH'].count() == 0 and df_sondei['DEW'].count() != 0:
            df_sondei['RH'] = calculate_relative_humidity(df_sondei['TEMP'].to_numpy(), df_sondei['DEW'].to_numpy())
        if df_sondei['RH'].count() != 0 and df_sondei['DEW'].count() == 0:
            df_sondei['DEW'] = calculate_dew_point(df_sondei['TEMP'].to_numpy(), df_sondei['RH'].to_numpy())
    
    # Calculate wind components
    if 'WSPD' in df_sondei.columns and 'WDIR' in df_sondei.columns:
        df_sondei['U'] = -df_sondei['WSPD'] * np.sin(df_sondei['WDIR'] * np.pi / 180)
        df_sondei['V'] = -df_sondei['WSPD'] * np.cos(df_sondei['WDIR'] * np.pi / 180)
    
    # Save formatted data to CSV
    output_file = f'{separated_folder}/{ID}_{sonde_date.strftime("%Y%m%d%H%M")}.csv'
    df_sondei.to_csv(output_file, index=False)
    
    return output_file

### function to disaggregate and format all sondes in a parallel way
def process_sondes_optimized(rawfile, lines_start_sondes, colspecs_header, header_vars,
                           colspecs_data, df_DataRecordFormat, df_HeaderRecordFormat,
                           separated_folder, ID, max_workers=4):
    """Main optimized processing function"""
    
    # Step 1: Extract all headers efficiently
    print_("Extracting headers...")
    df_sondes = extract_headers_optimized(rawfile, lines_start_sondes, colspecs_header, header_vars)
    df_sondes = df_sondes[df_HeaderRecordFormat['variable'].to_list()]
    
    # Step 2: Prepare arguments for parallel processing
    print_("Preparing sonde processing tasks...")
    processing_args = []
    
    for i_sounding in range(len(lines_start_sondes)):
        # Extract date info
        date_i = datetime(
            int(df_sondes.loc[i_sounding, 'YEAR']),
            int(df_sondes.loc[i_sounding, 'MONTH']),
            int(df_sondes.loc[i_sounding, 'DAY']),
            int(df_sondes.loc[i_sounding, 'HOUR'])
        )

        # Exclude data outside the defined climatology period
        if (date_i.year < clim_start_year) or (date_i.year > clim_end_year) or (date_i.hour != clim_hour): continue
        
        # Calculate line ranges
        start_line = lines_start_sondes[i_sounding] + 1
        if i_sounding != len(lines_start_sondes) - 1:
            end_line = lines_start_sondes[i_sounding + 1]
        else:
            end_line = get_total_lines(rawfile)
        
        args = (rawfile, i_sounding, start_line, end_line, colspecs_data,
                df_DataRecordFormat['variable'].to_list(), separated_folder, ID, date_i)
        processing_args.append(args)
    
    # Step 3: Process sondes in parallel
    print_(f"Processing {len(processing_args)} sondes with {max_workers} workers...")
    
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks
        future_to_sonde = {
            executor.submit(process_single_sonde, args): i 
            for i, args in enumerate(processing_args)
        }
        
        # Process completed tasks
        completed = 0
        total_sondes = len(processing_args)
        last_printed_percentage = 0
        
        for future in as_completed(future_to_sonde):
            try:
                output_file = future.result()
                completed += 1
                
                # Calculate percentage and print only at 10% intervals
                current_percentage = (completed * 100) // total_sondes
                if current_percentage >= last_printed_percentage + 10 and current_percentage <= 100:
                    print_(f"Progress: {current_percentage}% ({completed}/{total_sondes} sondes)")
                    last_printed_percentage = current_percentage
                    
            except Exception as exc:
                sonde_idx = future_to_sonde[future]
                print_(f'Sonde {sonde_idx} generated an exception: {exc}')
    
    print_("Processing complete!")
    return df_sondes

### function for calculate some meteorological variables
def calculate_relative_humidity(temp, dew_point):
    temp = temp * units.degC
    dew_point = dew_point * units.degC
    rh = mpcalc.relative_humidity_from_dewpoint(temp, dew_point)
    return rh.magnitude * 100

def calculate_dew_point(temp, rh_percent):
    temp = temp * units.degC
    rh = (rh_percent / 100) * units.dimensionless
    dew_point = mpcalc.dewpoint_from_relative_humidity(temp, rh)
    return dew_point.magnitude

<br><br><br>
<br><br><br>

# 1. Read metadata info of selected station

In [4]:
### Open metadata file

df_DataFormat = pd.read_csv(f'{docs_folder}/format_stations_metadata.csv')
colspecs = []
selected_variables = df_DataFormat['col_name'].to_list()
for i_var in df_DataFormat.index:
    var_i = df_DataFormat.loc[i_var, 'col_name']
    if not(var_i in selected_variables): continue
    idx_ini_i = df_DataFormat.loc[i_var, 'start_icol']
    idx_end_i = df_DataFormat.loc[i_var, 'end_icol']
    colspecs += [(idx_ini_i, idx_end_i)]

metadatafile = f'{docs_folder}/igra2-station-list.txt'
with open(metadatafile) as file:
    df_stations = pd.read_fwf(metadatafile, colspecs=colspecs, header=None, names=selected_variables)

index_station = df_stations.loc[df_stations['id']==ID].index[0]

df_stations.loc[index_station]

id                COM00080222
lat                       4.7
lon                    -74.15
height                 2547.0
name          BOGOTA/ELDORADO
start_year               1960
end_year                 2025
n_sondes                26176
Name: 692, dtype: object

<br><br><br>
<br><br><br>

# 2. Download Raw Sonde Data from IGRA2 server

This section handles the automated retrieval of IGRA2 sonde data files. It consists of two main steps:

1. **Download**: Constructs the download URL based on the station ID and retrieves the corresponding `.zip` file containing the raw sonde data.

2. **Extraction**: Once downloaded, the `.zip` file is extracted to retrieve the raw text data.


In [5]:
### Download sounding data

url_base = 'https://www.ncei.noaa.gov/data/integrated-global-radiosonde-archive/'

print_('Start to download the data file from IGRA2 server')
url_data = f'{url_base}/access/data-por/{ID}-data.txt.zip'
os.makedirs(raw_folder, exist_ok = True)

try:
    download_file(url_data, raw_folder)
    print_('Download completed successfully')
    
except HTTPError as e:
    if e.response.status_code == 503:
        print_("IGRA2 server is offline")
        print_(f"ERROR: {str(e)}")
        print_(f"Server link: {url_base}")
        print_("Try again later...")
    else:
        raise  # Re-raise other HTTP errors
print('\n'*2)

[2025-07-28 11:04:15]: Start to download the data file from IGRA2 server
[2025-07-28 11:04:15]: Start to download: COM00080222-data.txt.zip
[2025-07-28 11:04:18]: File downloaded: /home/david/radiosonde_climatology_analysis//data/COM00080222//raw/COM00080222-data.txt.zip
[2025-07-28 11:04:18]: Download completed successfully





In [6]:
print_('Start to unzip data file')
try:
    unzip_and_remove(f'{raw_folder}/{ID}-data.txt.zip', raw_folder)
except:
    print_(f'Error in file: {raw_folder}/{ID}-data.txt.zip')

print_('Unzip completed successfully')

[2025-07-28 11:04:18]: Start to unzip data file
[2025-07-28 11:04:18]: Start to unzip: /home/david/radiosonde_climatology_analysis//data/COM00080222//raw//COM00080222-data.txt.zip
[2025-07-28 11:04:18]: Unzipped and removed: /home/david/radiosonde_climatology_analysis//data/COM00080222//raw//COM00080222-data.txt.zip
[2025-07-28 11:04:18]: Unzip completed successfully


<br><br><br>
<br><br><br>

# 3. Disaggregation and Formatting of Sonde Data

This section implements an optimized method to split and format individual IGRA2 sonde records from a large raw file. By combining disaggregation and formatting into a single parallel pipeline, the implementation eliminates performance bottlenecks found in traditional sequential workflows.

**Key Optimizations:**
- Parallel Processing: Uses multiple CPU cores to handle each sonde independently
- Single-Pass Logic: Extracts and formats data in a unified step

**Process Flow**:
1. Header Extraction: Reads all sonde headers in one pass
2. Parallel Execution: Assigns each sonde to a worker for processing
3. In-Line Formatting: Applies IGRA2 scaling, units, and calculations as it extracts

**Functions:**
- `extract_headers_optimized()`: Fast header scan using file positioning
- `process_single_sonde()`: Handles disaggregation and formatting for one sonde
- `process_sondes_optimized()`: Orchestrates full parallel pipeline

In [7]:

station = df_stations.loc[index_station, 'name']
rawfile = f'{raw_folder}/{ID}-data.txt'

print_(f'Starting to disaggregate the rawfile of {station} ({ID})')
print_('It could take several minutes...')

lines_start_sondes = get_lines_starting_with_(rawfile,'#')

df_sondes = process_sondes_optimized(
    rawfile = rawfile,
    lines_start_sondes = lines_start_sondes,
    colspecs_header = colspecs_header,
    header_vars = header_vars,
    colspecs_data = colspecs_data,
    df_DataRecordFormat = df_DataRecordFormat,
    df_HeaderRecordFormat = df_HeaderRecordFormat,
    separated_folder = separated_folder,
    ID = ID,
    max_workers = os.cpu_count() - 1  # Use the cores you want
)
df_sondes.to_csv(f'{separated_folder}/{ID}__availability.csv',index=False)

print_(f'Completed disaggregation of the rawfile of {station} ({ID})')

[2025-07-28 11:04:18]: Starting to disaggregate the rawfile of BOGOTA/ELDORADO (COM00080222)
[2025-07-28 11:04:18]: It could take several minutes...
[2025-07-28 11:04:18]: Extracting headers...
[2025-07-28 11:04:36]: Preparing sonde processing tasks...
[2025-07-28 11:04:36]: Processing 8831 sondes with 11 workers...
[2025-07-28 11:06:12]: Progress: 10% (884/8831 sondes)
[2025-07-28 11:08:11]: Progress: 20% (1767/8831 sondes)
[2025-07-28 11:10:15]: Progress: 30% (2650/8831 sondes)
[2025-07-28 11:12:28]: Progress: 40% (3533/8831 sondes)
[2025-07-28 11:14:56]: Progress: 50% (4416/8831 sondes)
[2025-07-28 11:17:49]: Progress: 60% (5299/8831 sondes)
[2025-07-28 11:20:57]: Progress: 70% (6182/8831 sondes)
[2025-07-28 11:24:23]: Progress: 80% (7065/8831 sondes)
[2025-07-28 11:28:13]: Progress: 90% (7948/8831 sondes)
[2025-07-28 11:32:29]: Progress: 100% (8831/8831 sondes)
[2025-07-28 11:32:29]: Processing complete!
[2025-07-28 11:32:29]: Completed disaggregation of the rawfile of BOGOTA/ELDOR

<br><br><br>
<br><br><br>

# 4. Pressure Level Interpolation
This section interpolates the processed sonde data onto standardized pressure levels to enable consistent analysis and comparison across different soundings. The interpolation uses a semi-logarithmic approach (linear interpolation in log-pressure space) which is the meteorological standard for atmospheric data. Each sounding is interpolated to regular 5 hPa intervals from 1010 hPa (surface) to 100 hPa (upper troposphere).

In [8]:
df_sondes = pd.read_csv(f'{separated_folder}/{ID}__availability.csv')
df_sondes = df_sondes.loc[(df_sondes['YEAR']>=clim_start_year)&(df_sondes['YEAR']<=clim_end_year)]
df_sondes

Unnamed: 0,HEADREC,ID,YEAR,MONTH,DAY,HOUR,RELTIME,NUMLEV,P_SRC,NP_SRC,LAT,LON
15185,#,COM00080222,1991,1,1,0,9999,38,usaf-ds3,,47000,-741500
15186,#,COM00080222,1991,1,1,12,9999,40,ncdc6322,,47000,-741500
15187,#,COM00080222,1991,1,2,0,9999,25,ncdc6322,,47000,-741500
15188,#,COM00080222,1991,1,2,12,9999,34,ncdc6322,,47000,-741500
15189,#,COM00080222,1991,1,3,0,9999,4,ncdc6322,,47000,-741500
...,...,...,...,...,...,...,...,...,...,...,...,...
24441,#,COM00080222,2020,12,29,12,1134,156,ncdc-gts,ncdc-gts,47000,-741500
24442,#,COM00080222,2020,12,29,18,1701,149,ncdc-gts,ncdc-gts,47000,-741500
24443,#,COM00080222,2020,12,30,12,1131,126,ncdc-gts,ncdc-gts,47000,-741500
24444,#,COM00080222,2020,12,30,18,1736,133,ncdc-gts,ncdc-gts,47000,-741500


In [9]:
# Create interpolation template with standard pressure levels
df_interpolated = pd.DataFrame(columns=['PRESS', 'GPH', 'TEMP', 'RH', 'DEW', 'U', 'V', 'WSPD', 'WDIR'])
press_inf = 1010  # Upper atmosphere limit (hPa)
press_sup = 100   # Surface pressure limit (hPa)
d_press = 5       # Pressure interval (hPa)
df_interpolated['PRESS'] = list(range(press_inf, press_sup - d_press, -d_press))

print_(f'Starting interpolation of {len(df_sondes)} soundings to standard pressure levels...')

# Progress tracking
total_soundings = len(df_sondes.index)
completed = 0
last_printed_percentage = 0

# Process each sounding
for i_sounding in df_sondes.index:
    
    # Copy blank dataframe for this sounding
    df_interpolated_i = df_interpolated.copy()
    
    # Extract date and create filename
    date_i = datetime(
        int(df_sondes.loc[i_sounding, 'YEAR']),
        int(df_sondes.loc[i_sounding, 'MONTH']),
        int(df_sondes.loc[i_sounding, 'DAY']),
        int(df_sondes.loc[i_sounding, 'HOUR'])
    )
    
    filename_i = f'{ID}_{date_i.strftime("%Y%m%d%H%M")}.csv'
    df_sounding_i = pd.read_csv(f'{separated_folder}/{filename_i}')
    
    df_sounding_i.replace(-9999, np.nan, inplace=True)

    if df_sounding_i['PRESS'].isna().all():
        df_sondes = df_sondes.drop(i_sounding)
        completed += 1
        continue
    
    # Find pressure range for interpolation
    max_pressure_i = df_sounding_i['PRESS'].max()
    min_pressure_i = df_sounding_i['PRESS'].min()
    
    maxp_idx = (df_interpolated_i['PRESS'] - max_pressure_i).abs().idxmin()
    minp_idx = (df_interpolated_i['PRESS'] - min_pressure_i).abs().idxmin()
    
    press_interpolated_i = df_interpolated_i['PRESS'].loc[maxp_idx:minp_idx].to_numpy()
    
    # Interpolate each variable
    for var_i in ['GPH', 'TEMP', 'RH', 'DEW', 'U', 'V']:
        press_i = df_sounding_i['PRESS'].to_numpy()
        var_array_i = df_sounding_i[var_i].to_numpy()
        
        # Remove NaN values
        nan_indices = np.isnan(var_array_i)
        press_i = press_i[~nan_indices]
        var_array_i = var_array_i[~nan_indices]
        
        if len(var_array_i) < 5: continue
        
        # Semi-logarithmic interpolation
        var_interpolated_i = np.interp(
            np.log(press_interpolated_i), 
            np.log(press_i[::-1]), 
            var_array_i[::-1]
        )
        
        df_interpolated_i.loc[maxp_idx:minp_idx, var_i] = var_interpolated_i
    
    # Calculate wind speed and direction from U and V components
    U = df_interpolated_i['U'].astype(float).to_numpy()
    V = df_interpolated_i['V'].astype(float).to_numpy()
    df_interpolated_i['WSPD'] = np.sqrt(U**2 + V**2) # Wind speed: WSPD = sqrt(U² + V²)
    wind_dir_rad = np.arctan2(U, V)
    df_interpolated_i['WDIR'] = (np.degrees(wind_dir_rad) + 360) % 360 # Wind direction: meteorological convention (0-360°)
    # Handle NaN cases
    nan_mask = np.isnan(U) | np.isnan(V)
    df_interpolated_i.loc[nan_mask, 'WSPD'] = np.nan
    df_interpolated_i.loc[nan_mask, 'WDIR'] = np.nan
    
    # Export interpolated sounding
    df_interpolated_i.to_csv(f'{interpolated_folder}/{filename_i}', index=False)
    
    # Update progress
    completed += 1
    current_percentage = (completed * 100) // total_soundings
    if current_percentage >= last_printed_percentage + 10 and current_percentage <= 100:
        print_(f'Progress: {current_percentage}% ({completed}/{total_soundings} soundings)')
        last_printed_percentage = current_percentage

df_sondes.to_csv(f'{interpolated_folder}/{ID}__availability.csv',index=False)

print_(f'Interpolation completed! Processed {completed} soundings.')

[2025-07-28 11:32:29]: Starting interpolation of 9261 soundings to standard pressure levels...
[2025-07-28 11:32:33]: Progress: 10% (927/9261 soundings)
[2025-07-28 11:32:37]: Progress: 20% (1853/9261 soundings)
[2025-07-28 11:32:41]: Progress: 30% (2779/9261 soundings)
[2025-07-28 11:32:44]: Progress: 40% (3705/9261 soundings)
[2025-07-28 11:32:48]: Progress: 50% (4631/9261 soundings)
[2025-07-28 11:32:52]: Progress: 60% (5557/9261 soundings)
[2025-07-28 11:32:56]: Progress: 70% (6483/9261 soundings)
[2025-07-28 11:32:59]: Progress: 80% (7409/9261 soundings)
[2025-07-28 11:33:03]: Progress: 90% (8335/9261 soundings)
[2025-07-28 11:33:07]: Progress: 100% (9261/9261 soundings)
[2025-07-28 11:33:07]: Interpolation completed! Processed 9261 soundings.
