Import Packages

In [None]:
!pip install uv
!uv pip install  -r requirements.txt 

In [None]:
import snowflake
from snowflake.snowpark.context import get_active_session
session = get_active_session()

import warnings
warnings.filterwarnings("ignore")

# Data manipulation and analysis
import numpy as np
import pandas as pd

# Multi-dimensional arrays and datasets (e.g., NetCDF, Zarr)
import xarray as xr

from scipy.spatial import cKDTree

# Planetary Computer tools for STAC API access and authentication
import pystac_client
import planetary_computer as pc

from datetime import date
from tqdm import tqdm
import os

Load Data

In [None]:
path = "landsat_raw.parquet"
landsat_raw = pd.read_parquet(path)

In [None]:
display(landsat_raw)

# Define Indices

In [None]:
landsat_raw.columns

In [None]:
import numpy as np
import pandas as pd

def median_indices_calculation(df):
    df_out = df.copy()
    band_cols = ['blue', 'green', 'red', 'nir', 'swir16', 'swir22']

    def clean_median(val):
        # missing
        if val is None:
            return np.nan
        
        # scalar NaN (float nan)
        if np.isscalar(val):
            return np.nan if pd.isna(val) else float(val)

        # ndarray / list
        arr = np.asarray(val, dtype=float)
        arr = arr[arr != 0]
        return float(np.median(arr)) if arr.size else np.nan

    # medians
    for col in band_cols:
        if col in df_out.columns:
            df_out[f"med_{col}"] = df_out[col].apply(clean_median)
            print(f"Complete median calculation for med_{col}!")

    # indices
    print("Start calculating indices.")
    eps = 1e-10
    b = df_out['med_blue']
    g = df_out['med_green']
    r = df_out['med_red']
    n = df_out['med_nir']
    s16 = df_out['med_swir16']

    df_out['med_NDMI']  = (n - s16) / (n + s16 + eps)
    df_out['med_MNDWI'] = (g - s16) / (g + s16 + eps)
    df_out['med_NDVI']  = (n - r)   / (n + r + eps)
    df_out['med_NDTI']  = (r - g)   / (r + g + eps)
    df_out['med_EVI']   = 2.5 * (n - r) / (n + 6*r - 7.5*b + 1 + eps)

    print("Finish calculating indices.")
    return df_out


In [None]:
df_med = median_indices_calculation(landsat_raw)

In [None]:
df_med.head()