# SWEDataset Class Example

This notebook demonstrates how to use the SWEDataset class for managing Snow Water Equivalent (SWE) data. The SWEDataset class provides an object-oriented interface to the data preparation and gap filling functionality.

In [None]:
# Import required packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xarray as xr
import os

# Import snowdroughtindex package
from snowdroughtindex.core.dataset import SWEDataset
from snowdroughtindex.utils import visualization

## 1. Loading SWE Data

First, let's load some SWE data using the SWEDataset class.

In [None]:
# Create a SWEDataset object
swe_dataset = SWEDataset()

# Load data from a file
data_file = '../data/swe_data.nc'  # Update with your actual data file path

# Check if the file exists
if os.path.exists(data_file):
    swe_dataset.load_from_file(data_file)
    print(f"Loaded data from {data_file}")
else:
    print(f"File {data_file} not found. Loading sample data...")
    
    # Create sample data for demonstration
    dates = pd.date_range(start='2010-01-01', end='2020-12-31', freq='D')
    stations = [f'station_{i}' for i in range(1, 6)]
    
    # Create a DataFrame with random SWE values
    np.random.seed(42)  # For reproducibility
    data = {}
    for station in stations:
        # Create seasonal pattern with random noise
        days = np.arange(len(dates))
        seasonal = 100 * np.sin(2 * np.pi * days / 365.25 - np.pi/2) + 100
        seasonal[seasonal < 0] = 0  # No negative SWE values
        noise = np.random.normal(0, 10, len(dates))
        values = seasonal + noise
        values[values < 0] = 0  # No negative SWE values
        data[station] = values
    
    df = pd.DataFrame(data, index=dates)
    
    # Create station information
    station_info = pd.DataFrame({
        'station_id': stations,
        'lat': np.random.uniform(40, 45, len(stations)),
        'lon': np.random.uniform(-120, -115, len(stations)),
        'elevation': np.random.uniform(1500, 3000, len(stations))
    })
    
    # Create SWEDataset with sample data
    swe_dataset = SWEDataset(df, station_info)
    print("Sample data created successfully.")

## 2. Exploring the Data

Let's explore the SWE data.

In [None]:
# Display basic information about the dataset
print(swe_dataset)

# Display the first few rows of the data
print("\nFirst few rows of the data:")
display(swe_dataset.data.head())

# Display station information
if swe_dataset.stations is not None:
    print("\nStation information:")
    display(swe_dataset.stations)

## 3. Visualizing the Data

Let's visualize the SWE data using the visualization module.

In [None]:
# Plot SWE time series
fig = visualization.plot_swe_timeseries(swe_dataset.data)
plt.show()

## 4. Creating Artificial Gaps

Let's create artificial gaps in the data to demonstrate the gap filling functionality.

In [None]:
# Create a copy of the original data
data_with_gaps = swe_dataset.data.copy()

# Create artificial gaps (randomly remove 20% of the data)
np.random.seed(42)  # For reproducibility
for station in data_with_gaps.columns:
    mask = np.random.random(len(data_with_gaps)) < 0.2
    data_with_gaps.loc[mask, station] = np.nan

# Create a new SWEDataset with gaps
swe_dataset_with_gaps = SWEDataset(data_with_gaps, swe_dataset.stations)

# Plot the data with gaps
fig = visualization.plot_swe_timeseries(swe_dataset_with_gaps.data)
plt.title("SWE Data with Artificial Gaps")
plt.show()

## 5. Gap Filling

Now, let's fill the gaps in the data using the gap filling functionality.

In [None]:
# Fill gaps in the data
try:
    swe_dataset_filled = swe_dataset_with_gaps.gap_fill(
        window_days=15,
        min_obs_corr=5,
        min_obs_cdf=5,
        min_corr=0.5
    )
    
    # Plot the gap-filled data
    fig = visualization.plot_swe_timeseries(swe_dataset_filled.data)
    plt.title("Gap-Filled SWE Data")
    plt.show()
    
    # Calculate the percentage of gaps filled
    total_gaps = data_with_gaps.isna().sum().sum()
    remaining_gaps = swe_dataset_filled.data.isna().sum().sum()
    filled_gaps = total_gaps - remaining_gaps
    percent_filled = (filled_gaps / total_gaps) * 100 if total_gaps > 0 else 0
    
    print(f"Total gaps: {total_gaps}")
    print(f"Filled gaps: {filled_gaps}")
    print(f"Percentage of gaps filled: {percent_filled:.2f}%")
except Exception as e:
    print(f"Error during gap filling: {e}")
    print("Note: Gap filling may not work with the sample data due to insufficient correlations between stations.")

## 6. Evaluating Gap Filling Performance

Let's evaluate the performance of the gap filling algorithm using artificial gaps.

In [None]:
# Evaluate gap filling performance
try:
    evaluation, fig = swe_dataset.evaluate_gap_filling(
        iterations=2,
        artificial_gap_perc=10,
        window_days=15,
        min_obs_corr=5,
        min_obs_cdf=5,
        min_corr=0.5,
        min_obs_KGE=5,
        plot=True
    )
    
    plt.show()
    
    # Plot evaluation results
    fig = swe_dataset.plot_gap_filling_evaluation(evaluation)
    plt.show()
except Exception as e:
    print(f"Error during gap filling evaluation: {e}")
    print("Note: Gap filling evaluation may not work with the sample data due to insufficient correlations between stations.")

## 7. Extracting Monthly Data

Let's extract data for the first day of each month.

In [None]:
# Extract data for the first day of January
try:
    january_data = swe_dataset.extract_monthly_data(month=1, plot=True)
    
    print("\nJanuary data:")
    display(january_data.data.head())
except Exception as e:
    print(f"Error during monthly data extraction: {e}")

## 8. Calculating Daily Mean SWE

Let's calculate the daily mean SWE across all stations.

In [None]:
# Calculate daily mean SWE
daily_mean = swe_dataset.calculate_daily_mean()

# Display the first few rows
print("Daily mean SWE:")
display(daily_mean.head())

# Plot daily mean SWE
plt.figure(figsize=(12, 6))
plt.plot(daily_mean['date'], daily_mean['mean_SWE'])
plt.title('Daily Mean SWE Across All Stations')
plt.xlabel('Date')
plt.ylabel('Mean SWE (mm)')
plt.grid(True, alpha=0.3)
plt.show()

## 9. Saving Data

Let's save the data to a file.

In [None]:
# Save data to a CSV file
output_file = '../data/processed/swe_data_processed.csv'

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Save the data
swe_dataset.save(output_file, format='csv')
print(f"Data saved to {output_file}")

## 10. Summary

In this notebook, we've demonstrated how to use the SWEDataset class for managing Snow Water Equivalent (SWE) data. We've covered:

1. Loading SWE data
2. Exploring the data
3. Visualizing the data
4. Creating artificial gaps
5. Gap filling
6. Evaluating gap filling performance
7. Extracting monthly data
8. Calculating daily mean SWE
9. Saving data

The SWEDataset class provides a convenient object-oriented interface to the data preparation and gap filling functionality, making it easier to work with SWE data.