# Combined Data Extraction Workflow

This notebook creates a merged NetCDF file with 2D arrays combining precipitation and SWE data at elevation points.

## Workflow Steps:

1. **File Combination** → Combined NetCDF files
2. **Spatial Window Extraction** → NetCDF spatial windows
3. **Create Merged 2D NetCDF** → Single file with combined data arrays

**Output**: Single merged NetCDF file with 2D precipitation and SWE arrays

In [None]:
import sys
import os
from pathlib import Path
import numpy as np
import xarray as xr
import logging

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))

# Import required classes
from combine_casr_swe_files import CaSRFileCombiner
from extract_elevation_data_optimized import OptimizedElevationDataExtractor

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("✓ Modules imported")
print(f"Project root: {project_root}")

## Configuration

In [None]:
# Configuration
config = {
    'casr_input_dir': project_root / 'data' / 'input_data' / 'CaSR_SWE',
    'elevation_dir': project_root / 'data' / 'input_data' / 'Elevation',
    'combined_casr_dir': project_root / 'data' / 'output_data' / 'combined_casr',
    'elevation_windows_dir': project_root / 'data' / 'output_data' / 'elevation',
    'merged_output_dir': project_root / 'data' / 'output_data' / 'merged_2d',
    'window_size': 3,
    'time_sampling': 'monthly'
}

# Create output directories
config['merged_output_dir'].mkdir(parents=True, exist_ok=True)

print("Configuration set")
print(f"Output directory: {config['merged_output_dir']}")

## Step 1: Combine CaSR Files

In [None]:
print("Step 1: Combining CaSR files...")

combiner = CaSRFileCombiner(
    input_dir=str(config['casr_input_dir']),
    output_dir=str(config['combined_casr_dir'])
)

# Combine files temporally
combiner.combine_by_variable(combine_spatial=False, combine_temporal=True)

print("✓ Files combined")

## Step 2: Extract Spatial Windows

In [None]:
print("Step 2: Extracting spatial windows...")

extractor = OptimizedElevationDataExtractor(
    elevation_dir=str(config['elevation_dir']),
    combined_casr_dir=str(config['combined_casr_dir']),
    output_dir=str(config['elevation_windows_dir'])
)

# Load elevation data and extract windows
extractor.load_elevation_data()
results = extractor.process_all_files(
    file_types=['temporal'],
    time_sampling=config['time_sampling'],
    window_size=config['window_size'],
    output_format='netcdf'
)

# Save NetCDF results
extractor.save_results(results, format='netcdf')

print("✓ Spatial windows extracted")

## Step 3: Create Merged 2D NetCDF File

In [None]:
print("Step 3: Creating merged 2D NetCDF file...")

# Find the extracted NetCDF files
elevation_nc_files = list(config['elevation_windows_dir'].glob("*.nc"))
precip_file = None
swe_file = None

for f in elevation_nc_files:
    if "A_PR24_SFC" in f.name:
        precip_file = f
    elif "P_SWE_LAND" in f.name:
        swe_file = f

if not precip_file or not swe_file:
    raise FileNotFoundError("Could not find both precipitation and SWE NetCDF files")

print(f"Loading precipitation: {precip_file.name}")
print(f"Loading SWE: {swe_file.name}")

# Load datasets
precip_ds = xr.open_dataset(precip_file)
swe_ds = xr.open_dataset(swe_file)

# Get data variables (exclude coordinate variables)
precip_vars = [var for var in precip_ds.data_vars 
               if var not in ['lon_windows', 'lat_windows', 'original_lon', 'original_lat',
                            'window_center_lon', 'window_center_lat', 'elevation_mean',
                            'elevation_min', 'elevation_max', 'elevation_median']]
swe_vars = [var for var in swe_ds.data_vars 
            if var not in ['lon_windows', 'lat_windows', 'original_lon', 'original_lat',
                         'window_center_lon', 'window_center_lat', 'elevation_mean',
                         'elevation_min', 'elevation_max', 'elevation_median']]

precip_var = precip_vars[0] if precip_vars else None
swe_var = swe_vars[0] if swe_vars else None

print(f"Precipitation variable: {precip_var}")
print(f"SWE variable: {swe_var}")

# Create merged dataset with 2D arrays
n_points = precip_ds.dims['elevation_points']
n_times = precip_ds.dims.get('time', 1)
window_size = precip_ds.dims['window_lat']

print(f"Creating merged dataset: {n_points} points, {n_times} times, {window_size}x{window_size} windows")

# Create coordinate arrays
coords = {
    'elevation_points': np.arange(n_points),
    'window_lat': np.arange(window_size),
    'window_lon': np.arange(window_size)
}

if 'time' in precip_ds.dims:
    coords['time'] = precip_ds.time

# Create data variables for merged dataset
data_vars = {}

# Copy coordinate arrays
data_vars['lon_windows'] = precip_ds['lon_windows']
data_vars['lat_windows'] = precip_ds['lat_windows']
data_vars['original_lon'] = precip_ds['original_lon']
data_vars['original_lat'] = precip_ds['original_lat']
data_vars['elevation_mean'] = precip_ds['elevation_mean']

# Add precipitation and SWE data
if precip_var:
    data_vars['precipitation'] = precip_ds[precip_var]
if swe_var:
    data_vars['swe'] = swe_ds[swe_var]

# Create merged dataset
merged_ds = xr.Dataset(data_vars, coords=coords)

# Add metadata
merged_ds.attrs['title'] = 'Merged CaSR precipitation and SWE data at elevation points'
merged_ds.attrs['description'] = 'Combined 2D arrays of precipitation and snow water equivalent'
merged_ds.attrs['window_size'] = f'{window_size}x{window_size}'
merged_ds.attrs['n_elevation_points'] = n_points
merged_ds.attrs['source_files'] = [precip_file.name, swe_file.name]

# Save merged NetCDF file
output_file = config['merged_output_dir'] / 'merged_casr_elevation_2d.nc'
merged_ds.to_netcdf(output_file, engine='netcdf4')

print(f"✓ Merged NetCDF saved: {output_file}")
print(f"Dataset shape: {dict(merged_ds.dims)}")
print(f"Variables: {list(merged_ds.data_vars.keys())}")

# Close datasets
precip_ds.close()
swe_ds.close()
merged_ds.close()

## Results

In [None]:
print("=" * 50)
print("WORKFLOW COMPLETED")
print("=" * 50)

output_file = config['merged_output_dir'] / 'merged_casr_elevation_2d.nc'

if output_file.exists():
    print(f"✓ Merged NetCDF file created: {output_file.name}")
    print(f"  File size: {output_file.stat().st_size / (1024*1024):.1f} MB")
    
    # Quick preview of the merged file
    with xr.open_dataset(output_file) as ds:
        print(f"  Dimensions: {dict(ds.dims)}")
        print(f"  Variables: {list(ds.data_vars.keys())}")
        print(f"  Coordinates: {list(ds.coords.keys())}")
        
        if 'precipitation' in ds.data_vars:
            precip_data = ds['precipitation']
            print(f"  Precipitation shape: {precip_data.shape}")
            print(f"  Precipitation range: {float(precip_data.min().values):.3f} - {float(precip_data.max().values):.3f}")
        
        if 'swe' in ds.data_vars:
            swe_data = ds['swe']
            print(f"  SWE shape: {swe_data.shape}")
            print(f"  SWE range: {float(swe_data.min().values):.3f} - {float(swe_data.max().values):.3f}")

    print("\nThe merged 2D NetCDF file is ready for analysis!")
else:
    print("✗ Merged NetCDF file not found")