In [None]:
CREATE OR REPLACE NETWORK RULE noaa_cpc_rule
    MODE = EGRESS
    TYPE = HOST_PORT
    VALUE_LIST = ('ftp.cpc.ncep.noaa.gov:443', 'ftp.cpc.ncep.noaa.gov:80');

CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION noaa_cpc_access
    ALLOWED_NETWORK_RULES = (noaa_cpc_rule)
    ENABLED = TRUE;

In [None]:
CREATE TABLE IF NOT EXISTS PRF_RAINFALL_REALTIME (
    observation_date    DATE            NOT NULL,
    latitude            FLOAT           NOT NULL,
    longitude           FLOAT           NOT NULL,
    precip_mm           FLOAT,
    precip_in           FLOAT,
    gauge_count         INT,
    file_type           VARCHAR(5),
    ingested_at         TIMESTAMP_NTZ,
    
    CONSTRAINT pk_prf_rain PRIMARY KEY (observation_date, latitude, longitude)
);

In [None]:
CREATE OR REPLACE PROCEDURE ingest_cpc_realtime(target_date VARCHAR)
RETURNS VARCHAR
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('requests', 'numpy', 'snowflake-snowpark-python', 'pandas')
HANDLER = 'run'
EXTERNAL_ACCESS_INTEGRATIONS = (noaa_cpc_access)
AS
$$
import requests
import numpy as np
import pandas as pd

def run(session, target_date: str) -> str:
    """
    Downloads one day of CPC Unified Gauge-Based precipitation data,
    parses the binary .lnx file, and loads into PRF_RAINFALL_REALTIME.
    
    Args:
        target_date: YYYYMMDD string (e.g. '20260211')
    """
    
    # ── Grid constants (from NOAA .ctl control file) ──
    NROWS     = 120
    NCOLS     = 300
    LON_START = -129.875
    LAT_START =   20.125
    STEP      =    0.25
    
    # ── Build URL and download ──
    year = target_date[:4]
    url = (
        f"https://ftp.cpc.ncep.noaa.gov/precip/CPC_UNI_PRCP/"
        f"GAUGE_CONUS/RT/{year}/"
        f"PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{target_date}.RT"
    )
    
    resp = requests.get(url, timeout=60)
    resp.raise_for_status()
    raw = resp.content
    
    # ── Validate file size ──
    grid_bytes = NROWS * NCOLS * 4
    if len(raw) == grid_bytes * 2:
        two_grids = True
    elif len(raw) == grid_bytes:
        two_grids = False
    else:
        return f"ERROR: Unexpected file size {len(raw)} bytes for {target_date}"
    
    # ── Parse binary ──
    grid_floats = NROWS * NCOLS
    precip_grid = np.frombuffer(
        raw[:grid_floats * 4], dtype='<f4'
    ).reshape(NROWS, NCOLS)
    
    gauge_grid = None
    if two_grids:
        gauge_grid = np.frombuffer(
            raw[grid_floats * 4 : grid_floats * 8], dtype='<f4'
        ).reshape(NROWS, NCOLS)
    
    # ── Convert to rows ──
    obs_date = f"{target_date[:4]}-{target_date[4:6]}-{target_date[6:8]}"
    now = pd.Timestamp.now()
    rows = []
    
    for r in range(NROWS):
        for c in range(NCOLS):
            val = precip_grid[r, c]
            if val >= 0:
                lat = round(LAT_START + r * STEP, 4)
                lon = round(LON_START + c * STEP, 4)
                gc  = int(gauge_grid[r, c]) if (
                    gauge_grid is not None and gauge_grid[r, c] >= 0
                ) else None
                
                rows.append({
                    'OBSERVATION_DATE': obs_date,
                    'LATITUDE':         lat,
                    'LONGITUDE':        lon,
                    'PRECIP_MM':        round(float(val), 2),
                    'PRECIP_IN':        round(float(val) / 25.4, 4),
                    'GAUGE_COUNT':      gc,
                    'FILE_TYPE':        'RT',
                    'INGESTED_AT':      now,
                })
    
    if not rows:
        return f"ERROR: No land cells found for {target_date}"
    
    # ── Delete existing rows for this date (idempotent) ──
    session.sql(
        f"DELETE FROM PRF_RAINFALL_REALTIME WHERE observation_date = '{obs_date}'"
    ).collect()
    
    # ── Load into Snowflake ──
    df = pd.DataFrame(rows)
    df['OBSERVATION_DATE'] = pd.to_datetime(df['OBSERVATION_DATE'])
    
    snowpark_df = session.create_dataframe(df)
    snowpark_df.write.mode("append").save_as_table("PRF_RAINFALL_REALTIME")
    
    # ── Summary ──
    rain_count = sum(1 for r in rows if r['PRECIP_MM'] > 0)
    max_mm = max(r['PRECIP_MM'] for r in rows)
    
    return (
        f"OK: Loaded {len(rows):,} rows for {obs_date} | "
        f"{rain_count:,} cells with rain | "
        f"max {max_mm:.1f} mm ({max_mm/25.4:.3f} in)"
    )
$$;

In [None]:
CALL ingest_cpc_realtime('20260211');

In [None]:
SELECT 
    observation_date,
    COUNT(*)                                        AS total_cells,
    SUM(CASE WHEN precip_mm > 0 THEN 1 ELSE 0 END) AS cells_with_rain,
    ROUND(AVG(precip_mm), 2)                        AS avg_mm,
    ROUND(MAX(precip_mm), 2)                        AS max_mm,
    ROUND(MAX(precip_in), 3)                        AS max_in
FROM PRF_RAINFALL_REALTIME
WHERE observation_date = '2026-02-11'
GROUP BY 1;

In [None]:
SELECT *
FROM PRF_RAINFALL_REALTIME
WHERE observation_date = '2026-02-11'
ORDER BY precip_mm DESC
LIMIT 20;

In [None]:
SELECT 
    CASE 
        WHEN latitude = 27.375  AND longitude = -97.875  THEN 'King Ranch, TX'
        WHEN latitude = 35.125  AND longitude = -101.875 THEN 'Amarillo, TX'
        WHEN latitude = 30.625  AND longitude = -98.625  THEN 'Hill Country (Llano)'
        WHEN latitude = 31.625  AND longitude = -94.625  THEN 'East TX (Nacogdoches)'
        WHEN latitude = 27.625  AND longitude = -99.375  THEN 'South TX (Laredo)'
        WHEN latitude = 43.875  AND longitude = -101.125 THEN 'Haakon, SD (PRM example)'
        WHEN latitude = 34.875  AND longitude = -99.625  THEN 'SW Oklahoma'
        WHEN latitude = 37.375  AND longitude = -105.125 THEN 'S Colorado'
        WHEN latitude = 34.375  AND longitude = -92.625  THEN 'Central Arkansas'
        WHEN latitude = 30.375  AND longitude = -91.125  THEN 'Baton Rouge, LA'
    END AS location_name,
    latitude,
    longitude,
    precip_mm,
    precip_in,
    gauge_count
FROM PRF_RAINFALL_REALTIME
WHERE observation_date = '2026-02-11'
  AND (latitude, longitude) IN (
    (27.375, -97.875),
    (35.125, -101.875),
    (30.625, -98.625),
    (31.625, -94.625),
    (27.625, -99.375),
    (43.875, -101.125),
    (34.875, -99.625),
    (37.375, -105.125),
    (34.375, -92.625),
    (30.375, -91.125)
  )
ORDER BY location_name;

In [None]:
-- Kennedy County, TX (center approx 26.875°N, -97.625°W)
-- Check nearest grid cells
SELECT 
    observation_date,
    latitude,
    longitude,
    precip_mm,
    precip_in,
    gauge_count
FROM PRF_RAINFALL_REALTIME
WHERE observation_date = '2026-02-11'
  AND latitude  BETWEEN 26.625 AND 27.125
  AND longitude BETWEEN -97.875 AND -97.375
ORDER BY latitude, longitude;

In [None]:
-- Create the RMA network rule
CREATE OR REPLACE NETWORK RULE rma_ftp_rule
    MODE = EGRESS
    TYPE = HOST_PORT
    VALUE_LIST = ('pubfs-rma.fpac.usda.gov:443', 'pubfs-rma.fpac.usda.gov:80');

-- Show what rules NOAA_CPC_ACCESS currently uses
DESCRIBE EXTERNAL ACCESS INTEGRATION NOAA_CPC_ACCESS;

In [None]:
ALTER EXTERNAL ACCESS INTEGRATION NOAA_CPC_ACCESS
    SET ALLOWED_NETWORK_RULES = (CAPITAL_MARKETS_SANDBOX.PUBLIC.NOAA_CPC_RULE, CAPITAL_MARKETS_SANDBOX.PUBLIC.RMA_FTP_RULE);

In [None]:
CREATE OR REPLACE PROCEDURE ingest_prf_grid_reference()
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('snowflake-snowpark-python', 'requests', 'geopandas', 'pandas', 'fiona', 'pyproj', 'shapely')
EXTERNAL_ACCESS_INTEGRATIONS = (NOAA_CPC_ACCESS)
HANDLER = 'run'
AS
$$
def run(session):
    import requests
    import zipfile
    import io
    import tempfile
    import os
    import geopandas as gpd
    import pandas as pd
    
    url = "https://pubfs-rma.fpac.usda.gov/pub/Miscellaneous_Files/VI_RI_Data/rainfall_index_grids.zip"
    r = requests.get(url)
    if r.status_code != 200:
        return f"Download failed: HTTP {r.status_code}"
    
    tmpdir = tempfile.mkdtemp()
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(tmpdir)
    
    shp_path = None
    for root, dirs, files in os.walk(tmpdir):
        for f in files:
            if f.endswith('.shp'):
                shp_path = os.path.join(root, f)
                break
    
    if not shp_path:
        return "No .shp file found in archive"
    
    gdf = gpd.read_file(shp_path)
    original_crs = str(gdf.crs)
    
    if gdf.crs and gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(epsg=4326)
    
    gdf['CENTER_LAT'] = gdf.geometry.centroid.y.round(6)
    gdf['CENTER_LON'] = gdf.geometry.centroid.x.round(6)
    gdf['MIN_LAT']    = gdf.geometry.bounds['miny'].round(6)
    gdf['MAX_LAT']    = gdf.geometry.bounds['maxy'].round(6)
    gdf['MIN_LON']    = gdf.geometry.bounds['minx'].round(6)
    gdf['MAX_LON']    = gdf.geometry.bounds['maxx'].round(6)
    
    df = pd.DataFrame(gdf.drop(columns='geometry'))
    df.columns = [c.upper() for c in df.columns]
    df['INGESTED_AT'] = pd.Timestamp.now()
    
    sp_df = session.create_dataframe(df)
    sp_df.write.mode("overwrite").save_as_table("PRF_GRID_REFERENCE")
    
    total = len(df)
    cols = df.columns.tolist()
    lat_min = df['CENTER_LAT'].min()
    lat_max = df['CENTER_LAT'].max()
    lon_min = df['CENTER_LON'].min()
    lon_max = df['CENTER_LON'].max()
    
    return (
        f"PRF_GRID_REFERENCE loaded: {total:,} grids | "
        f"CRS: {original_crs} | "
        f"Columns: {cols} | "
        f"Lat: {lat_min:.3f} to {lat_max:.3f} | "
        f"Lon: {lon_min:.3f} to {lon_max:.3f}"
    )
$$;

In [None]:
CALL ingest_prf_grid_reference();

In [None]:
SELECT * FROM PRF_GRID_REFERENCE LIMIT 20;

In [None]:
SELECT 
    r.observation_date,
    g.GRIDCODE         AS grid_id,
    m.STATE_CODE,
    m.COUNTY_CODE,
    m.INTERVAL_CODE,
    m.PRACTICE_CODE,
    r.precip_mm,
    r.precip_in,
    r.gauge_count
FROM PRF_RAINFALL_REALTIME r
JOIN PRF_GRID_REFERENCE g
    ON ROUND(r.latitude, 3)  = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.longitude, 3) = ROUND(g.CENTER_LON, 3)
JOIN MAP_YTD m
    ON g.GRIDCODE = TRY_TO_NUMBER(m.SUB_COUNTY_CODE)
    AND m.INSURANCE_PLAN_CODE = '13'
    AND m.DELETED_DATE IS NULL
WHERE r.observation_date = '2026-02-11'
    AND m.STATE_CODE = '48'
ORDER BY r.precip_mm DESC
LIMIT 20;

In [None]:
SELECT 
    r.observation_date,
    g.GRIDCODE         AS grid_id,
    m.STATE_CODE,
    m.COUNTY_CODE,
    r.precip_mm,
    r.precip_in,
    r.gauge_count,
    COUNT(DISTINCT m.INTERVAL_CODE) AS intervals_insured
FROM PRF_RAINFALL_REALTIME r
JOIN PRF_GRID_REFERENCE g
    ON ROUND(r.latitude, 3)  = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.longitude, 3) = ROUND(g.CENTER_LON, 3)
JOIN MAP_YTD m
    ON g.GRIDCODE = TRY_TO_NUMBER(m.SUB_COUNTY_CODE)
    AND m.INSURANCE_PLAN_CODE = '13'
    AND m.DELETED_DATE IS NULL
WHERE r.observation_date = '2026-02-11'
    AND m.STATE_CODE = '48'
GROUP BY 1,2,3,4,5,6,7
ORDER BY r.precip_mm DESC
LIMIT 20;

In [None]:
CREATE OR REPLACE PROCEDURE backfill_cpc_rainfall(start_date STRING, end_date STRING)
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('snowflake-snowpark-python', 'requests', 'numpy', 'pandas')
EXTERNAL_ACCESS_INTEGRATIONS = (NOAA_CPC_ACCESS)
HANDLER = 'run'
AS
$$
def run(session, start_date, end_date):
    import requests
    import numpy as np
    import pandas as pd
    from datetime import datetime, timedelta

    NROWS, NCOLS = 120, 300
    LAT_START, LON_START, STEP = 20.125, -129.875, 0.25
    GRID_FLOATS = NROWS * NCOLS
    BASE = "https://ftp.cpc.ncep.noaa.gov/precip/CPC_UNI_PRCP/GAUGE_CONUS"

    current = datetime.strptime(start_date, '%Y%m%d')
    end     = datetime.strptime(end_date,   '%Y%m%d')

    results = []
    total_ok = 0
    total_skip = 0

    while current <= end:
        dt   = current.strftime('%Y%m%d')
        year = current.strftime('%Y')
        prev = str(int(year) - 1)
        obs  = f"{dt[:4]}-{dt[4:6]}-{dt[6:8]}"

        url_candidates = [
            (f"{BASE}/UPDATED/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.UPDATED", 'UPDATED'),
            (f"{BASE}/RT/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
            (f"{BASE}/RT/{prev}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
        ]

        raw = None
        source = None
        for url, src in url_candidates:
            try:
                r = requests.get(url, timeout=30)
                if r.status_code == 200 and len(r.content) >= GRID_FLOATS * 4:
                    raw = r.content
                    source = src
                    break
            except:
                continue

        if raw is None:
            results.append(f"SKIP {dt}")
            total_skip += 1
            current += timedelta(days=1)
            continue

        try:
            precip = np.frombuffer(raw[:GRID_FLOATS*4], dtype='<f4').reshape(NROWS, NCOLS)

            gauge = None
            if len(raw) >= GRID_FLOATS * 8:
                try:
                    gauge = np.frombuffer(raw[GRID_FLOATS*4:GRID_FLOATS*8], dtype='<f4').reshape(NROWS, NCOLS)
                except:
                    gauge = None

            rows = []
            for r_i in range(NROWS):
                for c_i in range(NCOLS):
                    val = float(precip[r_i, c_i])
                    if val >= 0:
                        gc = None
                        if gauge is not None:
                            try:
                                gc = int(gauge[r_i, c_i])
                            except:
                                gc = None
                        # ─── UNIT FIX ───────────────────────────────
                        # NOAA CPC binary stores precipitation as
                        # tenths of mm (0.1 mm units), NOT millimeters.
                        # val=110 means 11.0 mm, not 110 mm.
                        # ─────────────────────────────────────────────
                        rows.append({
                            'OBSERVATION_DATE': obs,
                            'LATITUDE':   round(LAT_START + r_i * STEP, 3),
                            'LONGITUDE':  round(LON_START + c_i * STEP, 3),
                            'PRECIP_MM':  round(val / 10, 2),       # tenths of mm → mm
                            'PRECIP_IN':  round(val / 254, 4),      # tenths of mm → inches
                            'GAUGE_COUNT': gc,
                            'FILE_TYPE':  source,
                            'INGESTED_AT': pd.Timestamp.now()
                        })

            df = pd.DataFrame(rows)

            session.sql(f"DELETE FROM PRF_RAINFALL_REALTIME WHERE observation_date = '{obs}'").collect()
            session.create_dataframe(df).write.mode("append").save_as_table("PRF_RAINFALL_REALTIME")

            rain_cells = len(df[df['PRECIP_MM'] > 0])
            results.append(f"OK {dt}: {len(df):,} cells, {rain_cells:,} with rain [{source}]")
            total_ok += 1

        except Exception as e:
            results.append(f"ERR {dt}: {str(e)[:100]}")
            total_skip += 1

        current += timedelta(days=1)

    summary = f"DONE: {total_ok} days loaded, {total_skip} skipped"
    results.insert(0, summary)
    return "\n".join(results)
$$;

In [None]:
TRUNCATE TABLE PRF_RAINFALL_REALTIME;


In [None]:

-- 2023 Jan-Feb
CALL backfill_cpc_rainfall('20230101', '20230228');

-- 2024 Jan-Feb
CALL backfill_cpc_rainfall('20240101', '20240229');

-- 2025 Jan-Feb
CALL backfill_cpc_rainfall('20250101', '20250228');

-- 2026 Jan 1 through today
CALL backfill_cpc_rainfall('20260101', '20260213');

In [None]:
SELECT 
    g.GRIDCODE                            AS grid_id,
    ri.INTERVAL_CODE,
    ri.INTERVAL_NAME,
    ri.YEAR,
    ri.INDEX_VALUE                        AS rma_index,
    COUNT(DISTINCT r.OBSERVATION_DATE)    AS days_with_data,
    ROUND(SUM(r.PRECIP_IN), 4)           AS total_precip_in,
    -- Reverse-engineer the historical normal
    ROUND(SUM(r.PRECIP_IN) / NULLIF(ri.INDEX_VALUE / 100, 0), 4) AS implied_normal_in
FROM PRF_RAINFALL_REALTIME r
JOIN PRF_GRID_REFERENCE g
    ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
    ON ri.GRID_ID = g.GRIDCODE
    AND ri.INTERVAL_CODE = '625'          -- Jan-Feb
    AND ri.YEAR = 2024
WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
GROUP BY 1, 2, 3, 4, 5
ORDER BY grid_id
LIMIT 50;

In [None]:
-- Step 1: 2024 baseline → implied normal
-- Step 2: 2025 rainfall → predicted index
-- Step 3: Compare to actual 2025 index

WITH rainfall_2024 AS (
    SELECT ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE g.GRIDCODE = 7328
      AND r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
),
rainfall_2025 AS (
    SELECT ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE g.GRIDCODE = 7328
      AND r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
),
index_2024 AS (
    SELECT INDEX_VALUE 
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE GRID_ID = 7328 AND INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT INDEX_VALUE 
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE GRID_ID = 7328 AND INTERVAL_CODE = '625' AND YEAR = 2025
)
SELECT 
    7328                                                          AS grid_id,
    r24.total_in                                                  AS rain_2024_in,
    i24.INDEX_VALUE                                               AS rma_index_2024,
    ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4)    AS implied_normal_in,
    r25.total_in                                                  AS rain_2025_in,
    ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) 
                                                                  AS predicted_2025_index,
    i25.INDEX_VALUE                                               AS actual_2025_index,
    ROUND(
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) 
        - i25.INDEX_VALUE, 1)                                     AS difference
FROM rainfall_2024 r24, rainfall_2025 r25, index_2024 i24, index_2025 i25;

In [None]:
WITH rainfall_2024 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
    GROUP BY 1
),
rainfall_2025 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
    GROUP BY 1
),
index_2024 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2025
)
SELECT 
    r24.grid_id,
    r24.total_in                                                    AS rain_2024_in,
    i24.INDEX_VALUE                                                 AS rma_index_2024,
    ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4)      AS implied_normal_in,
    r25.total_in                                                    AS rain_2025_in,
    ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) 
                                                                    AS predicted_2025_index,
    i25.INDEX_VALUE                                                 AS actual_2025_index,
    ROUND(
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
        - i25.INDEX_VALUE, 1)                                       AS difference,
    ABS(ROUND(
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
        - i25.INDEX_VALUE, 1))                                      AS abs_difference
FROM rainfall_2024 r24
JOIN rainfall_2025 r25 ON r24.grid_id = r25.grid_id
JOIN index_2024 i24    ON r24.grid_id = i24.GRID_ID
JOIN index_2025 i25    ON r24.grid_id = i25.GRID_ID
WHERE i24.INDEX_VALUE > 0
ORDER BY abs_difference DESC
LIMIT 100;

In [None]:
WITH rainfall_2024 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
    GROUP BY 1
),
rainfall_2025 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
    GROUP BY 1
),
index_2024 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2025
),
comparisons AS (
    SELECT 
        r24.grid_id,
        r24.total_in AS rain_2024_in,
        i24.INDEX_VALUE AS rma_index_2024,
        ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4) AS implied_normal_in,
        r25.total_in AS rain_2025_in,
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) AS predicted_2025_index,
        i25.INDEX_VALUE AS actual_2025_index,
        ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1) AS difference,
        ABS(ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1)) AS abs_difference
    FROM rainfall_2024 r24
    JOIN rainfall_2025 r25 ON r24.grid_id = r25.grid_id
    JOIN index_2024 i24    ON r24.grid_id = i24.GRID_ID
    JOIN index_2025 i25    ON r24.grid_id = i25.GRID_ID
    WHERE i24.INDEX_VALUE > 0
)
SELECT
    COUNT(*)                                                    AS total_grids,
    ROUND(AVG(abs_difference), 2)                               AS avg_abs_error,
    ROUND(MEDIAN(abs_difference), 2)                            AS median_abs_error,
    SUM(CASE WHEN abs_difference <= 1 THEN 1 ELSE 0 END)       AS within_1pt,
    SUM(CASE WHEN abs_difference <= 5 THEN 1 ELSE 0 END)       AS within_5pt,
    SUM(CASE WHEN abs_difference <= 10 THEN 1 ELSE 0 END)      AS within_10pt,
    ROUND(MIN(difference), 1)                                   AS worst_under,
    ROUND(MAX(difference), 1)                                   AS worst_over
FROM comparisons;

In [None]:
WITH rainfall_2024 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
    GROUP BY 1
),
rainfall_2025 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
    GROUP BY 1
),
index_2024 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2025
),
comparisons AS (
    SELECT 
        r24.grid_id,
        r24.total_in AS rain_2024_in,
        ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4) AS implied_normal_in,
        r25.total_in AS rain_2025_in,
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) AS predicted_2025_index,
        i25.INDEX_VALUE AS actual_2025_index,
        ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1) AS difference,
        ABS(ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1)) AS abs_difference
    FROM rainfall_2024 r24
    JOIN rainfall_2025 r25 ON r24.grid_id = r25.grid_id
    JOIN index_2024 i24    ON r24.grid_id = i24.GRID_ID
    JOIN index_2025 i25    ON r24.grid_id = i25.GRID_ID
    WHERE i24.INDEX_VALUE > 0
)
SELECT 
    grid_id,
    rain_2024_in,
    implied_normal_in,
    rain_2025_in,
    predicted_2025_index,
    actual_2025_index,
    difference
FROM comparisons
WHERE abs_difference > 20
ORDER BY abs_difference DESC
LIMIT 20;

In [None]:
-- Where is grid 14793?
SELECT GRIDCODE, CENTER_LAT, CENTER_LON, MIN_LAT, MAX_LAT, MIN_LON, MAX_LON
FROM PRF_GRID_REFERENCE
WHERE GRIDCODE = 14793;

In [None]:
WITH rainfall_2024 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in,
        ROUND(AVG(r.GAUGE_COUNT), 1) AS avg_gauges,
        MIN(r.FILE_TYPE) AS file_type
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
    GROUP BY 1
),
rainfall_2025 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in,
        ROUND(AVG(r.GAUGE_COUNT), 1) AS avg_gauges,
        MIN(r.FILE_TYPE) AS file_type
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
    GROUP BY 1
),
index_2024 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2025
),
tx_grids AS (
    SELECT DISTINCT TRY_TO_NUMBER(SUB_COUNTY_CODE) AS grid_id
    FROM MAP_YTD
    WHERE INSURANCE_PLAN_CODE = '13'
      AND INTERVAL_CODE = '625'
      AND STATE_CODE = '48'
      AND DELETED_DATE IS NULL
),
comparisons AS (
    SELECT 
        r24.grid_id,
        r24.total_in AS rain_2024,
        r25.total_in AS rain_2025,
        ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4) AS implied_normal,
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) AS predicted,
        i25.INDEX_VALUE AS actual,
        ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1) AS difference,
        ABS(ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1)) AS abs_diff,
        r24.avg_gauges AS gauges_2024,
        r25.avg_gauges AS gauges_2025,
        r24.file_type AS file_type_2024,
        r25.file_type AS file_type_2025
    FROM rainfall_2024 r24
    JOIN rainfall_2025 r25 ON r24.grid_id = r25.grid_id
    JOIN index_2024 i24    ON r24.grid_id = i24.GRID_ID
    JOIN index_2025 i25    ON r24.grid_id = i25.GRID_ID
    JOIN tx_grids t        ON r24.grid_id = t.grid_id
    WHERE i24.INDEX_VALUE > 0
)
SELECT *
FROM comparisons
WHERE abs_diff > 10
ORDER BY abs_diff DESC;

In [None]:
CREATE OR REPLACE PROCEDURE backfill_cpc_rainfall(start_date STRING, end_date STRING)
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('snowflake-snowpark-python', 'requests', 'numpy', 'pandas')
EXTERNAL_ACCESS_INTEGRATIONS = (NOAA_CPC_ACCESS)
HANDLER = 'run'
AS
$$
def run(session, start_date, end_date):
    import requests
    import numpy as np
    import pandas as pd
    from datetime import datetime, timedelta

    NROWS, NCOLS = 120, 300
    LAT_START, LON_START, STEP = 20.125, -129.875, 0.25
    GRID_FLOATS = NROWS * NCOLS
    BASE = "https://ftp.cpc.ncep.noaa.gov/precip/CPC_UNI_PRCP/GAUGE_CONUS"

    current = datetime.strptime(start_date, '%Y%m%d')
    end     = datetime.strptime(end_date,   '%Y%m%d')
    today   = datetime.now()

    results = []
    total_ok = 0
    total_skip = 0

    while current <= end:
        dt   = current.strftime('%Y%m%d')
        year = current.strftime('%Y')
        prev = str(int(year) - 1)
        obs  = f"{dt[:4]}-{dt[4:6]}-{dt[6:8]}"

        # Smart priority: V1.0 first for older data, RT first for recent
        days_ago = (today - current).days
        if days_ago > 60:
            # Historical — prefer QC'd V1.0
            url_candidates = [
                (f"{BASE}/V1.0/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}", 'V1.0'),
                (f"{BASE}/RT/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
                (f"{BASE}/RT/{prev}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
            ]
        else:
            # Recent — RT likely only option
            url_candidates = [
                (f"{BASE}/RT/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
                (f"{BASE}/RT/{prev}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
                (f"{BASE}/V1.0/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}", 'V1.0'),
            ]

        raw = None
        source = None
        for url, src in url_candidates:
            try:
                r = requests.get(url, timeout=30)
                if r.status_code == 200 and len(r.content) >= GRID_FLOATS * 4:
                    raw = r.content
                    source = src
                    break
            except:
                continue

        if raw is None:
            results.append(f"SKIP {dt}")
            total_skip += 1
            current += timedelta(days=1)
            continue

        precip = np.frombuffer(raw[:GRID_FLOATS*4], dtype='<f4').reshape(NROWS, NCOLS)
        gauge  = None
        if len(raw) >= GRID_FLOATS * 8:
            gauge = np.frombuffer(raw[GRID_FLOATS*4:GRID_FLOATS*8], dtype='<f4').reshape(NROWS, NCOLS)

        rows = []
        for r_i in range(NROWS):
            for c_i in range(NCOLS):
                val = float(precip[r_i, c_i])
                if val >= 0:
                    rows.append({
                        'OBSERVATION_DATE': obs,
                        'LATITUDE':   round(LAT_START + r_i * STEP, 3),
                        'LONGITUDE':  round(LON_START + c_i * STEP, 3),
                        'PRECIP_MM':  round(val, 2),
                        'PRECIP_IN':  round(val / 25.4, 4),
                        'GAUGE_COUNT': int(gauge[r_i, c_i]) if gauge is not None else None,
                        'FILE_TYPE':  source,
                        'INGESTED_AT': pd.Timestamp.now()
                    })

        df = pd.DataFrame(rows)
        session.sql(f"DELETE FROM PRF_RAINFALL_REALTIME WHERE observation_date = '{obs}'").collect()
        session.create_dataframe(df).write.mode("append").save_as_table("PRF_RAINFALL_REALTIME")

        rain_cells = len(df[df['PRECIP_MM'] > 0])
        results.append(f"OK {dt}: {len(df):,} cells, {rain_cells:,} with rain [{source}]")
        total_ok += 1
        current += timedelta(days=1)

    summary = f"DONE: {total_ok} days loaded, {total_skip} skipped"
    results.insert(0, summary)
    return "\n".join(results)
$$;

In [None]:
CREATE OR REPLACE PROCEDURE backfill_cpc_rainfall(start_date STRING, end_date STRING)
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('snowflake-snowpark-python', 'requests', 'numpy', 'pandas')
EXTERNAL_ACCESS_INTEGRATIONS = (NOAA_CPC_ACCESS)
HANDLER = 'run'
AS
$$
def run(session, start_date, end_date):
    import requests
    import numpy as np
    import pandas as pd
    from datetime import datetime, timedelta

    NROWS, NCOLS = 120, 300
    LAT_START, LON_START, STEP = 20.125, -129.875, 0.25
    GRID_FLOATS = NROWS * NCOLS
    BASE = "https://ftp.cpc.ncep.noaa.gov/precip/CPC_UNI_PRCP/GAUGE_CONUS"

    current = datetime.strptime(start_date, '%Y%m%d')
    end     = datetime.strptime(end_date,   '%Y%m%d')

    results = []
    total_ok = 0
    total_skip = 0

    while current <= end:
        dt   = current.strftime('%Y%m%d')
        year = current.strftime('%Y')
        prev = str(int(year) - 1)
        obs  = f"{dt[:4]}-{dt[4:6]}-{dt[6:8]}"

        url_candidates = [
            (f"{BASE}/UPDATED/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.UPDATED", 'UPDATED'),
            (f"{BASE}/RT/{year}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
            (f"{BASE}/RT/{prev}/PRCP_CU_GAUGE_V1.0CONUS_0.25deg.lnx.{dt}.RT", 'RT'),
        ]

        raw = None
        source = None
        for url, src in url_candidates:
            try:
                r = requests.get(url, timeout=30)
                if r.status_code == 200 and len(r.content) >= GRID_FLOATS * 4:
                    raw = r.content
                    source = src
                    break
            except:
                continue

        if raw is None:
            results.append(f"SKIP {dt}")
            total_skip += 1
            current += timedelta(days=1)
            continue

        try:
            precip = np.frombuffer(raw[:GRID_FLOATS*4], dtype='<f4').reshape(NROWS, NCOLS)
            
            gauge = None
            if len(raw) >= GRID_FLOATS * 8:
                try:
                    gauge = np.frombuffer(raw[GRID_FLOATS*4:GRID_FLOATS*8], dtype='<f4').reshape(NROWS, NCOLS)
                except:
                    gauge = None

            rows = []
            for r_i in range(NROWS):
                for c_i in range(NCOLS):
                    val = float(precip[r_i, c_i])
                    if val >= 0:
                        gc = None
                        if gauge is not None:
                            try:
                                gc = int(gauge[r_i, c_i])
                            except:
                                gc = None
                        rows.append({
                            'OBSERVATION_DATE': obs,
                            'LATITUDE':   round(LAT_START + r_i * STEP, 3),
                            'LONGITUDE':  round(LON_START + c_i * STEP, 3),
                            'PRECIP_MM':  round(val, 2),
                            'PRECIP_IN':  round(val / 25.4, 4),
                            'GAUGE_COUNT': gc,
                            'FILE_TYPE':  source,
                            'INGESTED_AT': pd.Timestamp.now()
                        })

            df = pd.DataFrame(rows)
            
            session.sql(f"DELETE FROM PRF_RAINFALL_REALTIME WHERE observation_date = '{obs}'").collect()
            session.create_dataframe(df).write.mode("append").save_as_table("PRF_RAINFALL_REALTIME")

            rain_cells = len(df[df['PRECIP_MM'] > 0])
            results.append(f"OK {dt}: {len(df):,} cells, {rain_cells:,} with rain [{source}]")
            total_ok += 1
            
        except Exception as e:
            results.append(f"ERR {dt}: {str(e)[:100]}")
            total_skip += 1

        current += timedelta(days=1)

    summary = f"DONE: {total_ok} days loaded, {total_skip} skipped"
    results.insert(0, summary)
    return "\n".join(results)
$$;

In [None]:
ALTER TABLE PRF_RAINFALL_REALTIME MODIFY COLUMN FILE_TYPE VARCHAR(10);

In [None]:
CALL backfill_cpc_rainfall('20260101', '20260211');

In [None]:
WITH rainfall_2024 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
    GROUP BY 1
),
rainfall_2025 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3)  = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
    GROUP BY 1
),
index_2024 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2024
),
index_2025 AS (
    SELECT GRID_ID, INDEX_VALUE
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE INTERVAL_CODE = '625' AND YEAR = 2025
),
tx_grids AS (
    SELECT DISTINCT TRY_TO_NUMBER(SUB_COUNTY_CODE) AS grid_id
    FROM MAP_YTD
    WHERE INSURANCE_PLAN_CODE = '13'
      AND INTERVAL_CODE = '625'
      AND STATE_CODE = '48'
      AND DELETED_DATE IS NULL
),
comparisons AS (
    SELECT 
        r24.grid_id,
        ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1) AS predicted,
        i25.INDEX_VALUE AS actual,
        ABS(ROUND(
            ROUND((r25.total_in / NULLIF(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 0)) * 100, 1)
            - i25.INDEX_VALUE, 1)) AS abs_diff
    FROM rainfall_2024 r24
    JOIN rainfall_2025 r25 ON r24.grid_id = r25.grid_id
    JOIN index_2024 i24    ON r24.grid_id = i24.GRID_ID
    JOIN index_2025 i25    ON r24.grid_id = i25.GRID_ID
    JOIN tx_grids t        ON r24.grid_id = t.grid_id
    WHERE i24.INDEX_VALUE > 0
)
SELECT
    COUNT(*)                                                        AS total_grids,
    ROUND(AVG(abs_diff), 2)                                         AS avg_abs_error,
    ROUND(MEDIAN(abs_diff), 2)                                      AS median_abs_error,
    SUM(CASE WHEN abs_diff <= 1 THEN 1 ELSE 0 END)                 AS within_1pt,
    ROUND(SUM(CASE WHEN abs_diff <= 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS pct_within_1,
    SUM(CASE WHEN abs_diff <= 5 THEN 1 ELSE 0 END)                 AS within_5pt,
    ROUND(SUM(CASE WHEN abs_diff <= 5 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS pct_within_5,
    SUM(CASE WHEN abs_diff <= 10 THEN 1 ELSE 0 END)                AS within_10pt,
    ROUND(SUM(CASE WHEN abs_diff <= 10 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS pct_within_10,
    SUM(CASE WHEN abs_diff > 10 THEN 1 ELSE 0 END)                 AS outliers_gt_10
FROM comparisons;

In [None]:
WITH r24 AS (
    SELECT ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE g.GRIDCODE = 7928
      AND r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29'
),
r25 AS (
    SELECT ROUND(SUM(r.PRECIP_IN), 4) AS total_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE g.GRIDCODE = 7928
      AND r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28'
),
i24 AS (
    SELECT INDEX_VALUE FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE GRID_ID = 7928 AND INTERVAL_CODE = '625' AND YEAR = 2024
),
i25 AS (
    SELECT INDEX_VALUE FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE GRID_ID = 7928 AND INTERVAL_CODE = '625' AND YEAR = 2025
)
SELECT 
    7928                                                            AS grid_id,
    r24.total_in                                                    AS rain_2024_in,
    i24.INDEX_VALUE                                                 AS rma_index_2024,
    ROUND(r24.total_in / NULLIF(i24.INDEX_VALUE / 100, 0), 4)     AS implied_normal_in,
    r25.total_in                                                    AS rain_2025_in,
    ROUND((r25.total_in / (r24.total_in / (i24.INDEX_VALUE / 100))) * 100, 1) AS predicted_2025,
    i25.INDEX_VALUE                                                 AS actual_2025,
    ROUND(
        ROUND((r25.total_in / (r24.total_in / (i24.INDEX_VALUE / 100))) * 100, 1)
        - i25.INDEX_VALUE, 1)                                       AS difference
FROM r24, r25, i24, i25;

In [None]:
WITH implied_normals AS (
    SELECT grid_id, ROUND(AVG(normal_in), 4) AS normal_in
    FROM (
        SELECT 
            g.GRIDCODE AS grid_id,
            ri.YEAR,
            ROUND(SUM(r.PRECIP_IN), 4) / NULLIF(ri.INDEX_VALUE / 100, 0) AS normal_in
        FROM PRF_RAINFALL_REALTIME r
        JOIN PRF_GRID_REFERENCE g
            ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
            AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
        JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
            ON ri.GRID_ID = g.GRIDCODE AND ri.INTERVAL_CODE = '625'
            AND ri.YEAR = YEAR(r.OBSERVATION_DATE)
        WHERE g.GRIDCODE IN (7929, 7930, 8230, 8231)
          AND ((r.OBSERVATION_DATE BETWEEN '2023-01-01' AND '2023-02-28' AND ri.YEAR = 2023)
            OR (r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29' AND ri.YEAR = 2024)
            OR (r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28' AND ri.YEAR = 2025))
        GROUP BY g.GRIDCODE, ri.INDEX_VALUE, ri.YEAR
    )
    GROUP BY grid_id
),
rain_2026 AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ROUND(SUM(r.PRECIP_IN), 4) AS rain_so_far,
        COUNT(DISTINCT r.OBSERVATION_DATE) AS days_collected,
        MAX(r.OBSERVATION_DATE) AS last_day
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE g.GRIDCODE IN (7929, 7930, 8230, 8231)
      AND r.OBSERVATION_DATE BETWEEN '2026-01-01' AND '2026-02-28'
    GROUP BY 1
)
SELECT 
    n.grid_id,
    n.normal_in                                                       AS hist_normal_in,
    r.days_collected                                                  AS days_so_far,
    59                                                                AS total_days,
    ROUND(r.rain_so_far, 4)                                          AS rain_so_far_in,
    ROUND((r.rain_so_far / NULLIF(n.normal_in, 0)) * 100, 1)        AS partial_index,
    ROUND((r.rain_so_far / r.days_collected) * 59, 4)                AS projected_rain_in,
    ROUND(((r.rain_so_far / r.days_collected) * 59 / NULLIF(n.normal_in, 0)) * 100, 1) AS projected_final_index,
    CASE 
        WHEN ROUND(((r.rain_so_far / r.days_collected) * 59 / NULLIF(n.normal_in, 0)) * 100, 1) < 75 THEN 'LIKELY INDEMNITY'
        WHEN ROUND(((r.rain_so_far / r.days_collected) * 59 / NULLIF(n.normal_in, 0)) * 100, 1) < 90 THEN 'WATCH'
        ELSE 'OK'
    END AS signal
FROM implied_normals n
JOIN rain_2026 r ON n.grid_id = r.grid_id
ORDER BY n.grid_id;

In [None]:
-- Grid 7929 validation
WITH years AS (
    SELECT 
        ri.YEAR,
        ri.INDEX_VALUE AS actual_index,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_rain
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
        ON ri.GRID_ID = g.GRIDCODE AND ri.INTERVAL_CODE = '625'
        AND ri.YEAR = YEAR(r.OBSERVATION_DATE)
    WHERE g.GRIDCODE = 7929
      AND ((r.OBSERVATION_DATE BETWEEN '2023-01-01' AND '2023-02-28' AND ri.YEAR = 2023)
        OR (r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29' AND ri.YEAR = 2024)
        OR (r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28' AND ri.YEAR = 2025))
    GROUP BY 1, 2
)
SELECT 
    YEAR,
    total_rain,
    actual_index,
    ROUND(total_rain / NULLIF(actual_index / 100, 0), 4) AS implied_normal,
    -- Predict each year using the OTHER years' avg normal
    ROUND((total_rain / 25.9033) * 100, 1) AS predicted_using_avg_normal,
    ROUND((total_rain / 25.9033) * 100 - actual_index, 1) AS diff
FROM years
ORDER BY YEAR;

In [None]:
-- Grid 8230 validation
WITH years AS (
    SELECT 
        ri.YEAR,
        ri.INDEX_VALUE AS actual_index,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_rain
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
        ON ri.GRID_ID = g.GRIDCODE AND ri.INTERVAL_CODE = '625'
        AND ri.YEAR = YEAR(r.OBSERVATION_DATE)
    WHERE g.GRIDCODE = 8230
      AND ((r.OBSERVATION_DATE BETWEEN '2023-01-01' AND '2023-02-28' AND ri.YEAR = 2023)
        OR (r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29' AND ri.YEAR = 2024)
        OR (r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28' AND ri.YEAR = 2025))
    GROUP BY 1, 2
)
SELECT 
    YEAR,
    total_rain,
    actual_index,
    ROUND(total_rain / NULLIF(actual_index / 100, 0), 4) AS implied_normal,
    ROUND((total_rain / 28.4183) * 100, 1) AS predicted_using_avg_normal,
    ROUND((total_rain / 28.4183) * 100 - actual_index, 1) AS diff
FROM years
ORDER BY YEAR;

In [None]:
CREATE OR REPLACE TABLE PRF_GRID_NORMALS AS
WITH yearly_normals AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ri.YEAR,
        ri.INTERVAL_CODE,
        ri.INTERVAL_NAME,
        ri.INDEX_VALUE AS actual_index,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_rain_in,
        ROUND(SUM(r.PRECIP_IN) / NULLIF(ri.INDEX_VALUE / 100, 0), 4) AS implied_normal_in,
        ROUND(AVG(r.GAUGE_COUNT), 1) AS avg_gauges,
        MIN(r.FILE_TYPE) AS file_type
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
        ON ri.GRID_ID = g.GRIDCODE 
        AND ri.INTERVAL_CODE = '625'
        AND ri.YEAR = YEAR(r.OBSERVATION_DATE)
    WHERE (r.OBSERVATION_DATE BETWEEN '2023-01-01' AND '2023-02-28' AND ri.YEAR = 2023)
       OR (r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29' AND ri.YEAR = 2024)
       OR (r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28' AND ri.YEAR = 2025)
    GROUP BY 1, 2, 3, 4, 5
)
SELECT 
    grid_id,
    interval_code,
    interval_name,
    COUNT(*) AS years_used,
    ROUND(AVG(implied_normal_in), 4) AS normal_in,
    ROUND(MIN(implied_normal_in), 4) AS normal_min,
    ROUND(MAX(implied_normal_in), 4) AS normal_max,
    ROUND(STDDEV(implied_normal_in), 4) AS normal_stddev,
    ROUND(AVG(avg_gauges), 1) AS avg_gauges,
    CASE 
        WHEN AVG(avg_gauges) >= 3 THEN 'HIGH'
        WHEN AVG(avg_gauges) >= 1 THEN 'MEDIUM'
        ELSE 'LOW'
    END AS confidence_tier,
    CURRENT_TIMESTAMP() AS created_at
FROM yearly_normals
WHERE implied_normal_in > 0
GROUP BY 1, 2, 3;

In [None]:
SELECT confidence_tier, COUNT(*), 
       ROUND(AVG(normal_in), 2) AS avg_normal,
       ROUND(AVG(years_used), 1) AS avg_years
FROM PRF_GRID_NORMALS
GROUP BY 1;

In [None]:
CREATE OR REPLACE TABLE PRF_GRID_NORMALS AS
WITH yearly_normals AS (
    SELECT 
        g.GRIDCODE AS grid_id,
        ri.YEAR,
        ri.INTERVAL_CODE,
        ri.INTERVAL_NAME,
        ri.INDEX_VALUE,
        ROUND(SUM(r.PRECIP_IN), 4) AS total_rain_in
    FROM PRF_RAINFALL_REALTIME r
    JOIN PRF_GRID_REFERENCE g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    JOIN RAIN_INDEX_PLATINUM_ENHANCED ri
        ON ri.GRID_ID = g.GRIDCODE 
        AND ri.INTERVAL_CODE = '625'
        AND ri.YEAR = YEAR(r.OBSERVATION_DATE)
    WHERE (r.OBSERVATION_DATE BETWEEN '2023-01-01' AND '2023-02-28' AND ri.YEAR = 2023)
       OR (r.OBSERVATION_DATE BETWEEN '2024-01-01' AND '2024-02-29' AND ri.YEAR = 2024)
       OR (r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-02-28' AND ri.YEAR = 2025)
    GROUP BY 1, 2, 3, 4, 5
),
with_normals AS (
    SELECT 
        grid_id,
        YEAR,
        INTERVAL_CODE,
        INTERVAL_NAME,
        INDEX_VALUE,
        total_rain_in,
        ROUND(total_rain_in / NULLIF(INDEX_VALUE / 100, 0), 4) AS implied_normal_in
    FROM yearly_normals
    WHERE INDEX_VALUE > 0
)
SELECT 
    grid_id,
    interval_code,
    interval_name,
    COUNT(*) AS years_used,
    ROUND(AVG(implied_normal_in), 4) AS normal_in,
    ROUND(STDDEV(implied_normal_in), 4) AS normal_stddev,
    ROUND(STDDEV(implied_normal_in) / NULLIF(AVG(implied_normal_in), 0) * 100, 1) AS cv_pct,
    CASE 
        WHEN STDDEV(implied_normal_in) / NULLIF(AVG(implied_normal_in), 0) < 0.05 THEN 'HIGH'
        WHEN STDDEV(implied_normal_in) / NULLIF(AVG(implied_normal_in), 0) < 0.15 THEN 'MEDIUM'
        ELSE 'LOW'
    END AS confidence_tier,
    CURRENT_TIMESTAMP() AS created_at
FROM with_normals
WHERE implied_normal_in > 0
GROUP BY 1, 2, 3;

In [None]:
SELECT grid_id, normal_in, cv_pct, confidence_tier
FROM PRF_GRID_NORMALS
WHERE grid_id IN (8230, 8231);

In [None]:
SELECT DISTINCT 
    m.COUNTY_CODE, 
    c.COUNTY_NAME,
    m.INTERVAL_CODE
FROM MAP_YTD m
LEFT JOIN COUNTY_YTD c 
    ON c.STATE_CODE = '48' 
    AND c.COUNTY_CODE = m.COUNTY_CODE
    AND c.REINSURANCE_YEAR = 2025
    AND c.DELETED_DATE IS NULL
WHERE m.INSURANCE_PLAN_CODE = '13'
  AND m.STATE_CODE = '48'
  AND TRY_TO_NUMBER(m.SUB_COUNTY_CODE) = 7929
  AND m.DELETED_DATE IS NULL;

In [None]:
-- CELL 1: Where is Grid 8231?
SELECT 
    GRIDCODE, 
    CENTER_LAT, 
    CENTER_LON,
    ROUND(CENTER_LAT, 3) AS LAT_ROUNDED,
    ROUND(CENTER_LON, 3) AS LON_ROUNDED
FROM PRF_GRID_REFERENCE
WHERE GRIDCODE = 8231;

In [None]:
-- CELL 2: What CPC rainfall cell did we match for Jan-Feb each year?
WITH grid AS (
    SELECT GRIDCODE, CENTER_LAT, CENTER_LON
    FROM PRF_GRID_REFERENCE
    WHERE GRIDCODE = 8231
)
SELECT 
    YEAR(r.OBSERVATION_DATE) AS YEAR,
    r.FILE_TYPE,
    COUNT(DISTINCT r.OBSERVATION_DATE) AS DAYS,
    MIN(r.OBSERVATION_DATE) AS FIRST_DAY,
    MAX(r.OBSERVATION_DATE) AS LAST_DAY,
    ROUND(SUM(r.PRECIP_IN), 4) AS TOTAL_RAIN_IN,
    r.LATITUDE AS CPC_LAT,
    r.LONGITUDE AS CPC_LON,
    g.CENTER_LAT AS GRID_LAT,
    g.CENTER_LON AS GRID_LON,
    ROUND(r.LATITUDE - g.CENTER_LAT, 4) AS LAT_DIFF,
    ROUND(r.LONGITUDE - g.CENTER_LON, 4) AS LON_DIFF
FROM PRF_RAINFALL_REALTIME r
JOIN grid g
    ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
WHERE MONTH(r.OBSERVATION_DATE) IN (1, 2)
  AND YEAR(r.OBSERVATION_DATE) IN (2023, 2024, 2025)
GROUP BY 1, 2, r.LATITUDE, r.LONGITUDE, g.CENTER_LAT, g.CENTER_LON
ORDER BY 1, 2;

In [None]:
-- CELL 3: What did RMA publish for Grid 8231, Interval 625?
SELECT 
    REINSURANCE_YEAR,
    GRID_ID,
    INTERVAL_CODE,
    COUNTY_CODE,
    FINAL_INDEX,
    FINAL_GRID_INDEX,
    INDEX_VALUE,
    RAIN_INDEX
FROM RAIN_INDEX_PLATINUM_ENHANCED
WHERE GRID_ID = 8231
  AND INTERVAL_CODE = '625'
  AND REINSURANCE_YEAR IN (2023, 2024, 2025)
ORDER BY REINSURANCE_YEAR;

In [None]:
-- CELL 4: Implied normal calculation — year by year with sanity check
WITH grid AS (
    SELECT GRIDCODE, CENTER_LAT, CENTER_LON
    FROM PRF_GRID_REFERENCE WHERE GRIDCODE = 8231
),
rain_by_year AS (
    SELECT 
        YEAR(r.OBSERVATION_DATE) AS YR,
        ROUND(SUM(r.PRECIP_IN), 4) AS RAIN_IN
    FROM PRF_RAINFALL_REALTIME r
    JOIN grid g
        ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
        AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
    WHERE MONTH(r.OBSERVATION_DATE) IN (1, 2)
      AND YEAR(r.OBSERVATION_DATE) IN (2023, 2024, 2025)
      AND r.FILE_TYPE = 'UPDATED'
    GROUP BY 1
),
index_by_year AS (
    SELECT 
        REINSURANCE_YEAR AS YR,
        FINAL_INDEX,
        FINAL_GRID_INDEX
    FROM RAIN_INDEX_PLATINUM_ENHANCED
    WHERE GRID_ID = 8231
      AND INTERVAL_CODE = '625'
      AND REINSURANCE_YEAR IN (2023, 2024, 2025)
)
SELECT 
    r.YR,
    r.RAIN_IN,
    i.FINAL_INDEX,
    i.FINAL_GRID_INDEX,
    ROUND(r.RAIN_IN / (i.FINAL_INDEX / 100), 4) AS IMPLIED_NORMAL_FROM_FINAL_INDEX,
    ROUND(r.RAIN_IN / (i.FINAL_GRID_INDEX / 100), 4) AS IMPLIED_NORMAL_FROM_GRID_INDEX,
    -- If normal were ~3" (reasonable Jan-Feb Kenedy), what index?
    ROUND(r.RAIN_IN / 3.0 * 100, 1) AS INDEX_IF_NORMAL_WERE_3IN
FROM rain_by_year r
LEFT JOIN index_by_year i ON r.YR = i.YR
ORDER BY r.YR;

In [None]:
-- CELL 5: All Kenedy County grids — are normals ALL inflated?
SELECT 
    n.GRID_ID,
    n.NORMAL_IN,
    n.CV_PCT,
    g.CENTER_LAT,
    g.CENTER_LON
FROM PRF_GRID_NORMALS n
LEFT JOIN PRF_GRID_REFERENCE g ON g.GRIDCODE = n.GRID_ID
WHERE n.GRID_ID IN (
    SELECT TRY_TO_NUMBER(m.SUB_COUNTY_CODE)
    FROM MAP_YTD m
    WHERE m.STATE_CODE = '48'
      AND m.COUNTY_CODE IN (
          SELECT COUNTY_CODE FROM COUNTY_YTD 
          WHERE STATE_CODE = '48' AND COUNTY_NAME = 'Kenedy'
            AND REINSURANCE_YEAR = 2025
      )
      AND m.INSURANCE_PLAN_CODE = '13'
      AND m.DELETED_DATE IS NULL
)
ORDER BY n.NORMAL_IN DESC;

In [None]:
-- CELL 6: Raw daily CPC values — do these look like inches?
WITH grid AS (
    SELECT CENTER_LAT, CENTER_LON 
    FROM PRF_GRID_REFERENCE 
    WHERE GRIDCODE = 8231
)
SELECT 
    r.OBSERVATION_DATE,
    r.LATITUDE,
    r.LONGITUDE,
    r.PRECIP_IN,
    r.FILE_TYPE
FROM PRF_RAINFALL_REALTIME r
JOIN grid g
    ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
WHERE r.OBSERVATION_DATE BETWEEN '2025-01-01' AND '2025-01-10'
ORDER BY r.OBSERVATION_DATE, r.FILE_TYPE;

In [None]:
-- 1. Fix the rainfall table
UPDATE PRF_RAINFALL_REALTIME 
SET PRECIP_IN = PRECIP_IN / 10;

-- 2. Fix the ingestion procedure so new data comes in correct
--    (divide by 254 instead of 25.4 in backfill_cpc_rainfall)

-- 3. Rebuild PRF_GRID_NORMALS with corrected rainfall
--    (normals will automatically come out right)

In [None]:
-- CELL A: Current values (BEFORE fix) — what do 8230 & 8231 look like now?
WITH grid AS (
    SELECT GRIDCODE, CENTER_LAT, CENTER_LON
    FROM PRF_GRID_REFERENCE
    WHERE GRIDCODE IN (8230, 8231)
)
SELECT 
    g.GRIDCODE AS GRID_ID,
    YEAR(r.OBSERVATION_DATE) AS YR,
    r.FILE_TYPE,
    COUNT(DISTINCT r.OBSERVATION_DATE) AS DAYS,
    ROUND(SUM(r.PRECIP_IN), 4) AS RAIN_IN_CURRENT,
    ROUND(SUM(r.PRECIP_IN) / 10, 4) AS RAIN_IN_CORRECTED,
    n.NORMAL_IN AS NORMAL_CURRENT,
    ROUND(n.NORMAL_IN / 10, 4) AS NORMAL_CORRECTED,
    -- Index is the same either way (proof)
    ROUND(SUM(r.PRECIP_IN) / n.NORMAL_IN * 100, 1) AS INDEX_CURRENT,
    ROUND((SUM(r.PRECIP_IN) / 10) / (n.NORMAL_IN / 10) * 100, 1) AS INDEX_CORRECTED
FROM PRF_RAINFALL_REALTIME r
JOIN grid g
    ON ROUND(r.LATITUDE, 3) = ROUND(g.CENTER_LAT, 3)
    AND ROUND(r.LONGITUDE, 3) = ROUND(g.CENTER_LON, 3)
JOIN PRF_GRID_NORMALS n ON n.GRID_ID = g.GRIDCODE
WHERE MONTH(r.OBSERVATION_DATE) IN (1, 2)
  AND YEAR(r.OBSERVATION_DATE) IN (2025, 2026)
  AND r.FILE_TYPE IN ('UPDATED', 'RT')
GROUP BY g.GRIDCODE, YEAR(r.OBSERVATION_DATE), r.FILE_TYPE, n.NORMAL_IN
ORDER BY g.GRIDCODE, YR, r.FILE_TYPE;

In [None]:
SELECT GET_DDL('PROCEDURE', 'BACKFILL_CPC_RAINFALL()');