In [1]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Analyzing a GEOGRAPHY column with `bigframes.geopandas.GeoSeries`

In [2]:
import bigframes
import bigframes.geopandas
import bigframes.pandas as bpd
bpd.options.display.progress_bar = None

### 1. Load the Counties table from the Census Bureau US Boundaries dataset

In [3]:
df = bpd.read_gbq("bigquery-public-data.geo_us_boundaries.counties")

and/or partitioned, but BigQuery DataFrames was not able to find a
`index_col` or `filters`.[0m


### 2. Create a series from the int_point_geom column

In [4]:
point_geom_series = df['int_point_geom']

## The `GeoSeries` constructor accepts local data or a `bigframes.pandas.Series` object.

### 1. Create a GeoSeries from local data with `Peek`

In [5]:
five_geo_points = point_geom_series.peek(n = 5)
five_geo_points

163      POINT (-83.11922 35.9162)
143     POINT (-84.06423 38.51347)
96       POINT (-96.9712 28.79637)
310     POINT (-90.45393 37.11107)
38     POINT (-106.31668 38.73822)
Name: int_point_geom, dtype: geometry

### 2. Convert the five geo points to `GeoSeries`

In [6]:
geo_points = bigframes.geopandas.GeoSeries(
        [point for point in five_geo_points]
)
geo_points

0      POINT (-83.11922 35.9162)
1     POINT (-84.06423 38.51347)
2      POINT (-96.9712 28.79637)
3     POINT (-90.45393 37.11107)
4    POINT (-106.31668 38.73822)
dtype: geometry

### 3. Retrieve the x (longitude) and y (latitude) from the GeoSeries with `.x` and `.y`.

#### Note: TypeError is raised if `.x` and `.y` are used with a geometry type other than `Point`.

### `.x`

In [7]:
geo_points.x

0    -83.119224
1     -84.06423
2    -96.971198
3    -90.453931
4   -106.316683
dtype: Float64

### `.y`

In [8]:
geo_points.y

0    35.916198
1    38.513473
2     28.79637
3    37.111074
4    38.738223
dtype: Float64

### 4. Alternatively, use the `.geo` accessor to access GeoSeries methods from a `bigframes.pandas.Series` object.

#### `geo.x`

In [9]:
point_geom_series.geo.x

0    -101.298265
1     -99.111085
2      -66.58687
3    -102.601791
4     -71.578625
5     -88.961529
6     -87.492986
7     -82.422666
8    -100.208166
9     -85.815939
10   -101.681133
11   -119.516659
12    -89.398306
13    -107.78848
14    -91.159306
15   -113.887042
16    -83.470416
17    -98.520146
18    -83.911718
19    -87.321865
20    -91.727626
21    -93.466093
22   -101.143324
23    -78.657634
24    -94.272323
dtype: Float64

#### `geo.y`

In [10]:
point_geom_series.geo.y

0     46.710819
1     29.353661
2     18.211152
3     38.835646
4     41.869768
5     39.860237
6     36.892059
7     38.143642
8     34.524623
9     30.862007
10    40.180165
11    46.228125
12    36.054196
13    38.154731
14    38.761902
15    44.928506
16    30.447232
17    29.448671
18    42.602532
19    34.529776
20    33.957675
21    42.037538
22    29.875285
23    36.299884
24    44.821657
dtype: Float64

## Retrive the `area` of different geometry shapes. 

### 1. Create a geometry collection from local data with `Peek`

In [11]:
geom_series = df["county_geom"].peek(n = 5)
geom_series

115    POLYGON ((-86.69516 40.3012, -86.69515 40.3011...
28     POLYGON ((-94.76099 39.04366, -94.75875 39.043...
173    POLYGON ((-76.98439 40.51456, -76.98403 40.513...
52     POLYGON ((-90.87722 35.44364, -90.87858 35.443...
3      POLYGON ((-102.57685 39.04068, -102.57696 39.0...
Name: county_geom, dtype: geometry

### 2. Convert the geometry collection to `GeoSeries`

In [12]:
five_geom = bigframes.geopandas.GeoSeries(
        [point for point in geom_series]
)
five_geom

0    POLYGON ((-86.69516 40.3012, -86.69515 40.3011...
1    POLYGON ((-94.76099 39.04366, -94.75875 39.043...
2    POLYGON ((-76.98439 40.51456, -76.98403 40.513...
3    POLYGON ((-90.87722 35.44364, -90.87858 35.443...
4    POLYGON ((-102.57685 39.04068, -102.57696 39.0...
dtype: geometry

## Note: `GeoSeries.area` raises NotImplementedError.  

In [13]:
five_geom.area

NotImplementedError: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.

### 3. Use `bigframes.bigquery.st_area` to retrieve the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area

In [14]:
import bigframes.bigquery as bbq

In [15]:
geom_area = bbq.st_area(five_geom)
geom_area

0    1048396641.043185
1     404195271.072038
2    1443999689.714014
3    1611776377.004328
4    4610240226.664809
dtype: Float64

##  Use `GeoSeries.from_xy()` to create a GeoSeries of `Point` geometries. 

### 1. Reuse the `geo_points.x` and `geo_points.y` results by passing them to `.from_xy()` 

In [16]:
bigframes.geopandas.GeoSeries.from_xy(geo_points.x, geo_points.y)

0      POINT (-83.11922 35.9162)
1     POINT (-84.06423 38.51347)
2      POINT (-96.9712 28.79637)
3     POINT (-90.45393 37.11107)
4    POINT (-106.31668 38.73822)
dtype: geometry

## Use `GeoSeries.to_wkt()` to convert geo points from geometry data type to Well-Knonw Text (WKT).

### 1. Reuse the `geo_points`

In [17]:
geo_to_wkts = bigframes.geopandas.GeoSeries.to_wkt(geo_points)
geo_to_wkts

0      POINT(-83.1192242 35.916198)
1       POINT(-84.06423 38.5134727)
2     POINT(-96.9711977 28.7963699)
3     POINT(-90.4539307 37.1110737)
4    POINT(-106.3166833 38.7382234)
dtype: string

## Use `GeoSeries.from_wkt()` to convert geo points from Well-Knonw Text (WKT) to geometry data type.

### 1. Reuse `geo_to_wkts` results from `GeoSeries.to_wkts`

In [18]:
wkts_from_geo = bigframes.geopandas.GeoSeries.from_wkt(geo_to_wkts)
wkts_from_geo

0      POINT (-83.11922 35.9162)
1     POINT (-84.06423 38.51347)
2      POINT (-96.9712 28.79637)
3     POINT (-90.45393 37.11107)
4    POINT (-106.31668 38.73822)
dtype: geometry

## Discover the set-theoretic boundary of geometry objects with `GeoSeries.boundary`

In [19]:
from shapely.geometry import Polygon, LineString, Point
geom_obj = bigframes.geopandas.GeoSeries(
            [
                Polygon([(0, 0), (1, 1), (0, 1)]),
                Polygon([(10, 0), (10, 5), (0, 0)]),
                Polygon([(0, 0), (2, 2), (2, 0)]),
                LineString([(0, 0), (1, 1), (0, 1)]),
                Point(0, 1),
            ]
)
geom_obj

0       POLYGON ((0 0, 1 1, 0 1, 0 0))
1    POLYGON ((10 0, 10 5, 0 0, 10 0))
2       POLYGON ((0 0, 2 2, 2 0, 0 0))
3           LINESTRING (0 0, 1 1, 0 1)
4                          POINT (0 1)
dtype: geometry

In [20]:
geom_obj.geo.boundary

0       LINESTRING (0 0, 1 1, 0 1, 0 0)
1    LINESTRING (10 0, 10 5, 0 0, 10 0)
2       LINESTRING (0 0, 2 2, 2 0, 0 0)
3                 MULTIPOINT (0 0, 0 1)
4              GEOMETRYCOLLECTION EMPTY
dtype: geometry

## Find the difference between two `GeoSeries` 

### Note: GeoSeries.difference raises a `NotImplementedError`.

#### Reuse `wkts_from_geo` and `geom_obj`

In [21]:
wkts_from_geo.difference(geom_obj)

NotImplementedError: GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.

### Use `BigQuery.st_difference()` to find the difference between two GeSeries. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_difference

In [22]:
bbq.st_difference(wkts_from_geo, geom_obj)

0      POINT (-83.11922 35.9162)
1     POINT (-84.06423 38.51347)
2       GEOMETRYCOLLECTION EMPTY
3     POINT (-90.45393 37.11107)
4    POINT (-106.31668 38.73822)
dtype: geometry

### Find the difference between a `GeoSeries` and a single geometry shape.

In [23]:
bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])

0    POINT (-83.11922 35.9162)
1                         None
2                         None
3                         None
4                         None
dtype: geometry

### Find the difference in GeoSeries with the same parts

In [24]:
bbq.st_difference(geom_obj, geom_obj)

0    GEOMETRYCOLLECTION EMPTY
1    GEOMETRYCOLLECTION EMPTY
2    GEOMETRYCOLLECTION EMPTY
3    GEOMETRYCOLLECTION EMPTY
4    GEOMETRYCOLLECTION EMPTY
dtype: geometry