In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gudhi as gd

from datetime import datetime
import matplotlib.dates as mdates

In [3]:
# List of file paths
file_paths = ["snow_oct.csv", "snow_nov.csv", "snow_dec.csv", "snow_jan.csv"]

# Function to process each file
def process_file(file_path):
    # Read file and skip metadata rows
    df = pd.read_csv(file_path, skiprows=4)
    
    # Melt data to long format
    melted = pd.melt(
        df,
        id_vars=["GHCN ID", "Station Name", "County", "State", "Elevation", "Latitude", "Longitude"],
        var_name="Date",
        value_name="Snowfall"
    )
    
    # Add unique station identifier for clarity
    melted["Station"] = melted["Station Name"] + " (" + melted["County"] + ")"
    
    return melted[["Date", "Station", "Snowfall"]]

# Process and concatenate all files
data_long = pd.concat([process_file(file) for file in file_paths], ignore_index=True)

# Pivot data so site names become columns
data_pivot = data_long.pivot(index="Date", columns="Station", values="Snowfall")

# Clean up the index and column names (optional)
data_pivot.index.name = "Date"
data_pivot.columns.name = None

# Handle missing values: Replace "M" and "T" with NaN
data_pivot.replace({"M": None, "T": 0.01}, inplace=True)



In [None]:
def aggregate_stations(data_file, county_areas):
    pass
