In [2]:
import ibis
from ibis import _
import sys
sys.path.append("../cng-python/")
from utils import set_secrets, source_secrets

con = ibis.duckdb.connect(extensions=["spatial"])
set_secrets(con)
source_secrets(con)


In [3]:
url = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/cpad-stats.parquet"
url = "s3://public-biodiversity/ca30x30/ca_areas.parquet" # local MINIO copy

ca_areas = con.read_parquet(url, "ca_areas").select(_.id, _.geom)


In [4]:
ca_fire = con.read_parquet("s3://public-fire/calfire-2023.parquet", "ca_fire").select(_.geometry)

In [6]:
## to SQL code clean we make these into tables with valid geoms, standard geometry column and dummy table names:
q1 = '''
CREATE OR REPLACE TABLE t1 AS SELECT id, ST_MakeValid(geom) as geom FROM "ca_areas"
'''

q2 = '''
CREATE OR REPLACE TABLE t2 AS SELECT ST_MakeValid(geometry) as geom FROM "ca_fire"
'''

con.raw_sql(f"{q1};")
con.raw_sql(f"{q2};")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x75b49c0677b0>

In [42]:
# Verify t2 table exists and contains the geometry column as geospatial type.  (ibis converts it to WKB blob, which geopandas understands but duckdb does not)
con.table("t2")

Okay here we go.  We'll do the operation as SQL becuase it's not obvious to me how to do in ibis yet.  this solution from llama


- The coalesce may be unnecessary, apparently it is just converting areas with no overlap to 0 rather than leaving them as NULL / NA.
 
 Llama3.3 thinks this is the same as:

 ```
 SELECT 
    t1.id,
    ST_Area(ST_Intersection(t1.geom, (SELECT ST_Union(geom) FROM t2))) / ST_Area(t1.geom) AS coverage_fraction
FROM 
    t1;
```

but suggests that the join approach should scale much better. 

In [None]:
 
con.raw_sql('''
COPY (
SELECT 
    t1.id,
    COALESCE(
        SUM(
            ST_Area(ST_Intersection(t1.geom, t2.geom)) / ST_Area(t1.geom)
        ),
        0
    ) as overlap_fraction
FROM t1
LEFT JOIN t2 ON ST_Intersects(t1.geom, t2.geom)
GROUP BY t1.id, t1.geom
) TO 
'overlap.parquet' (FORMAT 'parquet');
''')

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))