In [None]:
# First, install required packages
!pip install pykrige geopandas geopy google-cloud-storage plotly ipywidgets

import pandas as pd
import numpy as np
from io import StringIO
from pykrige.ok import OrdinaryKriging
from google.cloud import storage
from geopy.distance import geodesic
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, clear_output
import os

# Authenticate GCP (Only once per session)
from google.colab import auth
auth.authenticate_user()


# Set up GCS Access and Load Data

In [None]:
# Define a function to get the CSV file from GCS
def get_csv_from_gcs(bucket_name, file_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_name)
    if not blob.exists():
        raise FileNotFoundError("File does not exist")
    data = blob.download_as_text()
    return pd.read_csv(StringIO(data))

# Load PM2.5 data
sjv_pm25 = get_csv_from_gcs("sjv_pm25", "sjv_pm25_daily_df.csv")
sjv_pm25["date_local"] = pd.to_datetime(sjv_pm25["date_local"])


#Helper Functions

In [None]:
def create_grid(min_lat, max_lat, min_lon, max_lon, resolution=0.05):
    latitudes = np.arange(min_lat, max_lat, resolution)
    longitudes = np.arange(min_lon, max_lon, resolution)
    return [(lat, lon) for lat in latitudes for lon in longitudes]

def is_within_distance(lon, lat, longitudes, latitudes, threshold_km=200):
    for lon0, lat0 in zip(longitudes, latitudes):
        if geodesic((lat, lon), (lat0, lon0)).km <= threshold_km:
            return True
    return False


#Create Date Picker

In [None]:
date_picker = widgets.DatePicker(
    description='Pick a Date',
    value=pd.to_datetime("2024-01-01"),
    disabled=False
)

button = widgets.Button(description="Predict AQI Grid")
output = widgets.Output()

def on_button_clicked(b):
    with output:
        clear_output()
        run_prediction(date_picker.value.strftime('%Y-%m-%d'))

button.on_click(on_button_clicked)

display(date_picker, button, output)


#Run Kriging Prediction and Plot Map

In [None]:
def run_prediction(date_str):
    subset = sjv_pm25[sjv_pm25["date_local"] == date_str]
    if subset.empty:
        print("No data available for this date.")
        return

    min_lat, max_lat = subset["latitude"].min(), subset["latitude"].max()
    min_lon, max_lon = subset["longitude"].min(), subset["longitude"].max()
    grid_points = create_grid(min_lat, max_lat, min_lon, max_lon, resolution=0.05)

    aqi_predictions = []
    for lat, lon in grid_points:
        if is_within_distance(lon, lat, subset['longitude'].values, subset['latitude'].values):
            try:
                OK = OrdinaryKriging(
                    subset['longitude'].values,
                    subset['latitude'].values,
                    subset['aqi'].values,
                    variogram_model="spherical",
                    variogram_parameters={"sill": 60, "range": 3500.0, "nugget": 5}
                )
                pred, var = OK.execute("points", [lon], [lat])
                aqi_predictions.append((lat, lon, pred[0]))
            except:
                continue

    grid_df = pd.DataFrame(aqi_predictions, columns=["latitude", "longitude", "predicted_aqi"])

    fig = px.scatter_mapbox(
        subset, lat="latitude", lon="longitude", color="aqi",
        hover_name="site_number", zoom=6, height=600
    )

    fig.update_traces(marker=dict(size=18))
    fig.update_layout(mapbox_style="open-street-map")

    fig.add_trace(go.Scattermapbox(
        lat=grid_df["latitude"].tolist(),
        lon=grid_df["longitude"].tolist(),
        mode="markers",
        marker=dict(
            size=6,
            color=grid_df["predicted_aqi"],
            colorscale="Viridis",
            showscale=True,
            opacity=0.4
        ),
        name="Predicted AQI Grid"
    ))

    fig.show()
