In [1]:
import joblib
import json
import pandas as pd
import plotly.express as px
from shapely.geometry import shape, Point

ModuleNotFoundError: No module named 'plotly'

In [None]:
# Load model
model = joblib.load('random_forest_model.pkl')

# Load scalers
X_scaler = joblib.load('X_scaler.pkl')
Y_scaler = joblib.load('Y_scaler.pkl')
with open('ski_areas.geojson', 'r') as f:
    ski_areas = json.load(f)
crystal_mountain_area = [site for site in ski_areas['features'] if site['properties']['name'] == 'Crystal Mountain'][0]
crystal_polygon = shape(crystal_mountain_area['geometry'])
crystal_polygon

In [None]:
!aws s3 cp s3://aimees-snow-project/results.parquet .

In [None]:
# Load data
results_df = pd.read_parquet('results.parquet')
results_df = results_df.dropna(subset=['fsca'])
results_df['datetime'] = pd.to_datetime(results_df['time'])
results_df['day'] = results_df['datetime'].dt.day
results_df['month'] = results_df['datetime'].dt.month
results_df['year'] = results_df['datetime'].dt.year
results_df.loc[~results_df['fsca'].between(0, 1000), 'fsca'] = None
results_df.head()

In [None]:
input_columns = ["red", "green", "blue", "coastal", "nir08", "swir16", "swir22", "fsca", "latitude", "longitude", "month"]
x_input_data = results_df[input_columns]
x_scaled = X_scaler.transform(x_input_data)

In [None]:
%%time
y_pred = model.predict(x_scaled)
y_pred

In [None]:
y_scaled = Y_scaler.inverse_transform([[pred] for pred in y_pred])
results_df['snow_depth_prediction'] = y_scaled
results_df.shape

### Only represent one pixel per day

Reduce overlapping pixels: group by day, lat, lon and pick the highest value for that pixel

In [None]:
max_by_day = results_df.groupby(['year', 'month', 'day', 'latitude', 'longitude']).agg({'snow_depth_prediction': 'max'}).reset_index(level=[3,4])
max_by_day.head()

### Check each day has pixels that cover the entire polygon

In [None]:
def check_daily_coverage(rows, target_polygon):
    # Create points from your DataFrame
    points = [Point(lon, lat) for lon, lat in zip(rows['longitude'], rows['latitude'])]

    # Check if all points are within the polygon
    points_in_polygon = [point.within(target_polygon) for point in points]
    coverage_percentage = sum(points_in_polygon) / len(points_in_polygon) * 100
    return coverage_percentage

In [None]:
%%time
for daily_index in max_by_day.index.unique():
    rows = max_by_day.loc[daily_index]
    coverage_percentage = check_daily_coverage(rows, crystal_polygon)
    if coverage_percentage < 99:
        print(f"Day {daily_index} has {coverage_percentage}% coverage")
        # remove from group if <99% coverage

### Calculate the snow volume

In [None]:
# what we have a matrix of observations, identified by datetime and a snow depth prediction for each pixel.
# We can multiply those by the size of each pixel to get the area of snow.
# Then we can sum those up to get the total area of snow for each year.
area_of_each_pixel = 30 * 30
max_by_day['snow_volume_prediction'] = max_by_day['snow_depth_prediction'] * area_of_each_pixel
max_by_day['snow_volume_prediction']

In [None]:
max_by_day

In [None]:
# Assuming your dataframe is called 'df'
# Create a datetime column for better time handling
max_by_day['date'] = pd.to_datetime(df[['year', 'month', 'day']])

# Create animated scatter mapbox
fig = px.scatter_mapbox(
    max_by_day, 
    lat="latitude", 
    lon="longitude",
    color="value",
    animation_frame="date",  # This creates the time slider
    hover_data=["snow_volume_prediction"],
    color_continuous_scale="Viridis",
    size_max=15,
    zoom=3,
    mapbox_style="open-street-map"
)

fig.update_layout(
    title="Time-series Map of snow_volume_prediction",
    height=600
)

fig.show()

In [2]:
!pip3 install plotly

Defaulting to user installation because normal site-packages is not writeable


### Calculate the monthly average across the daily max volume

In [None]:
avg_by_month = max_by_day.groupby(['year', 'month']).agg({'snow_volume_prediction': 'mean'})
avg_by_month

### Calculate the sum for each season

In [None]:
def get_snow_season(month, year):
    """Convert month/year to snow season year"""
    if month >= 11:  # Nov, Dec
        return f"{int(year)}-{int(year)+1}"  # Snow season starts this year
    elif month <= 4:  # Jan, Feb, Mar, Apr
        return f"{int(year)-1}-{int(year)}"  # Snow season started previous year
    else:
        return None  # Not in snow season (May-Oct)

# Apply to your DataFrame
avg_by_month = avg_by_month.reset_index()  # Convert index to columns temporarily
avg_by_month['snow_season'] = avg_by_month.apply(lambda row: get_snow_season(row['month'], row['year']), axis=1)

# Filter to only snow season months and group
snow_season_data = avg_by_month[avg_by_month['snow_season'].notna()]
grouped = snow_season_data.groupby('snow_season')

In [None]:
filter_incomplete_seasons = grouped.filter(lambda group: len(group) == 6, dropna=True)
sum_by_season = filter_incomplete_seasons.groupby('snow_season').agg({'snow_volume_prediction': 'sum'})
sum_by_season

In [None]:
# Plot and set all indices as xticks
ax = sum_by_season.plot(figsize=(12,6))
ax.set_xticks(range(len(sum_by_season.index)))
ax.set_xticklabels(sum_by_season.index)
plt.xticks(rotation=45)  # Rotate if needed
plt.show()