In [1]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Analyzing a GEOGRAPHY column with `bigframes.geopandas.GeoSeries`

In [2]:
import bigframes
import bigframes.geopandas
import bigframes.pandas as bpd
bpd.options.display.progress_bar = None

### 1. Load the Counties table from the Census Bureau US Boundaries dataset

In [3]:
df = bpd.read_gbq("bigquery-public-data.geo_us_boundaries.counties")

and/or partitioned, but BigQuery DataFrames was not able to find a
`index_col` or `filters`.[0m


### 2. Create a series from the int_point_geom column

In [4]:
point_geom_series = df['int_point_geom']

## The `GeoSeries` constructor accepts local data or a `bigframes.pandas.Series` object.

### 1. Create a GeoSeries from local data with `Peek`

In [5]:
five_geo_points = point_geom_series.peek(n = 5)
five_geo_points

217    POINT (-86.80185 38.70532)
16     POINT (-83.47042 30.44723)
40     POINT (-94.33925 38.25722)
139    POINT (-78.88532 38.50758)
400      POINT (-95.6191 41.0337)
Name: int_point_geom, dtype: geometry

### 2. Convert the five geo points to `GeoSeries`

In [6]:
geo_points = bigframes.geopandas.GeoSeries(
        [point for point in five_geo_points]
)
geo_points

0    POINT (-86.80185 38.70532)
1    POINT (-83.47042 30.44723)
2    POINT (-94.33925 38.25722)
3    POINT (-78.88532 38.50758)
4      POINT (-95.6191 41.0337)
dtype: geometry

### 3. Retrieve the x (longitude) and y (latitude) from the GeoSeries with `.x` and `.y`.

#### Note: TypeError is raised if `.x` and `.y` are used with a geometry type other than `Point`.

### `.x`

In [7]:
geo_points.x

0   -86.801847
1   -83.470416
2   -94.339246
3   -78.885321
4   -95.619101
dtype: Float64

### `.y`

In [8]:
geo_points.y

0    38.705322
1    30.447232
2    38.257217
3    38.507585
4    41.033703
dtype: Float64

### 4. Alternatively, use the `.geo` accessor to access GeoSeries methods from a `bigframes.pandas.Series` object.

#### `geo.x`

In [9]:
point_geom_series.geo.x

0    -101.298265
1     -99.111085
2      -66.58687
3    -102.601791
4     -71.578625
5     -88.961529
6     -87.492986
7     -82.422666
8    -100.208166
9     -85.815939
10   -101.681133
11   -119.516659
12    -89.398306
13    -107.78848
14    -91.159306
15   -113.887042
16    -83.470416
17    -98.520146
18    -83.911718
19    -87.321865
20    -91.727626
21    -93.466093
22   -101.143324
23    -78.657634
24    -94.272323
dtype: Float64

#### `geo.y`

In [10]:
point_geom_series.geo.y

0     46.710819
1     29.353661
2     18.211152
3     38.835646
4     41.869768
5     39.860237
6     36.892059
7     38.143642
8     34.524623
9     30.862007
10    40.180165
11    46.228125
12    36.054196
13    38.154731
14    38.761902
15    44.928506
16    30.447232
17    29.448671
18    42.602532
19    34.529776
20    33.957675
21    42.037538
22    29.875285
23    36.299884
24    44.821657
dtype: Float64

## Retrive the `area` of different geometry shapes. 

### 1. Create a geometry collection from local data with `Peek`

In [11]:
geom_series = df["county_geom"].peek(n = 5)
geom_series

214    POLYGON ((-79.36704 34.96248, -79.36696 34.962...
161    POLYGON ((-89.08844 33.53252, -89.08843 33.532...
57     POLYGON ((-110.75069 35.50001, -110.75069 35.4...
46     POLYGON ((-94.6865 39.04405, -94.68764 39.0440...
260    POLYGON ((-100.53965 34.99391, -100.53966 34.9...
Name: county_geom, dtype: geometry

### 2. Convert the geometry collection to `GeoSeries`

In [12]:
five_geom = bigframes.geopandas.GeoSeries(
        [point for point in geom_series]
)
five_geom

0    POLYGON ((-79.36704 34.96248, -79.36696 34.962...
1    POLYGON ((-89.08844 33.53252, -89.08843 33.532...
2    POLYGON ((-110.75069 35.50001, -110.75069 35.4...
3    POLYGON ((-94.6865 39.04405, -94.68764 39.0440...
4    POLYGON ((-100.53965 34.99391, -100.53966 34.9...
dtype: geometry

## Note: `GeoSeries.area` raises NotImplementedError.  

In [13]:
five_geom.area

NotImplementedError: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.

### 3. Use `bigframes.bigquery.st_area` to retrieve the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area

In [14]:
import bigframes.bigquery as bbq

In [15]:
geom_area = bbq.st_area(five_geom)
geom_area

0     1014426111.476457
1     1196896004.730286
2    25794235993.165642
3     1242002056.351685
4     2381217221.963739
dtype: Float64

##  Use `GeoSeries.from_xy()` to create a GeoSeries of `Point` geometries. 

### 1. Reuse the `geo_points.x` and `geo_points.y` results by passing them to `.from_xy()` 

In [16]:
bigframes.geopandas.GeoSeries.from_xy(geo_points.x, geo_points.y)

0    POINT (-86.80185 38.70532)
1    POINT (-83.47042 30.44723)
2    POINT (-94.33925 38.25722)
3    POINT (-78.88532 38.50758)
4      POINT (-95.6191 41.0337)
dtype: geometry

## Use `GeoSeries.to_wkt()` to convert geo points from geometry data type to Well-Knonw Text (WKT).

### 1. Reuse the `geo_points`

In [17]:
geo_to_wkts = bigframes.geopandas.GeoSeries.to_wkt(geo_points)
geo_to_wkts

0     POINT(-86.8018468 38.705322)
1    POINT(-83.4704159 30.4472325)
2    POINT(-94.3392459 38.2572171)
3    POINT(-78.8853213 38.5075848)
4     POINT(-95.619101 41.0337028)
dtype: string

## Use `GeoSeries.from_wkt()` to convert geo points from Well-Knonw Text (WKT) to geometry data type.

### 1. Reuse `geo_to_wkts` results from `GeoSeries.to_wkts`

In [18]:
wkts_from_geo = bigframes.geopandas.GeoSeries.from_wkt(geo_to_wkts)
wkts_from_geo

0    POINT (-86.80185 38.70532)
1    POINT (-83.47042 30.44723)
2    POINT (-94.33925 38.25722)
3    POINT (-78.88532 38.50758)
4      POINT (-95.6191 41.0337)
dtype: geometry

## Discover the set-theoretic boundary of geometry objects with `GeoSeries.boundary`

In [19]:
from shapely.geometry import Polygon, LineString, Point
geom_obj = bigframes.geopandas.GeoSeries(
            [
                Polygon([(0, 0), (1, 1), (0, 1)]),
                Polygon([(10, 0), (10, 5), (0, 0)]),
                Polygon([(0, 0), (2, 2), (2, 0)]),
                LineString([(0, 0), (1, 1), (0, 1)]),
                Point(0, 1),
            ]
)
geom_obj

0       POLYGON ((0 0, 1 1, 0 1, 0 0))
1    POLYGON ((10 0, 10 5, 0 0, 10 0))
2       POLYGON ((0 0, 2 2, 2 0, 0 0))
3           LINESTRING (0 0, 1 1, 0 1)
4                          POINT (0 1)
dtype: geometry

In [20]:
geom_obj.geo.boundary

0       LINESTRING (0 0, 1 1, 0 1, 0 0)
1    LINESTRING (10 0, 10 5, 0 0, 10 0)
2       LINESTRING (0 0, 2 2, 2 0, 0 0)
3                 MULTIPOINT (0 0, 0 1)
4              GEOMETRYCOLLECTION EMPTY
dtype: geometry

## Find the `difference` between two `GeoSeries` 

#### Reuse `wkts_from_geo` and `geom_obj` to find the difference between the geometry objects

In [21]:
wkts_from_geo.difference(geom_obj)

0    POINT (-86.80185 38.70532)
1    POINT (-83.47042 30.44723)
2      GEOMETRYCOLLECTION EMPTY
3    POINT (-78.88532 38.50758)
4      POINT (-95.6191 41.0337)
dtype: geometry

### Find the difference between a `GeoSeries` and a single geometry shape.

In [22]:
wkts_from_geo.difference([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])

0    POINT (-86.80185 38.70532)
1                          None
2                          None
3                          None
4                          None
dtype: geometry

### Find the difference in `GeoSeries` with the same shapes

In [23]:
geom_obj.difference(geom_obj)

0    GEOMETRYCOLLECTION EMPTY
1    GEOMETRYCOLLECTION EMPTY
2    GEOMETRYCOLLECTION EMPTY
3    GEOMETRYCOLLECTION EMPTY
4    GEOMETRYCOLLECTION EMPTY
dtype: geometry

## You can also use`BigQuery.st_difference()` to find the difference between two `GeoSeries`. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_difference

In [24]:
bbq.st_difference(wkts_from_geo, geom_obj)

0    POINT (-86.80185 38.70532)
1    POINT (-83.47042 30.44723)
2      GEOMETRYCOLLECTION EMPTY
3    POINT (-78.88532 38.50758)
4      POINT (-95.6191 41.0337)
dtype: geometry

### Find the difference between a `GeoSeries` and a single geometry shape.

In [25]:
bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])

0    POINT (-86.80185 38.70532)
1                          None
2                          None
3                          None
4                          None
dtype: geometry

### Find the difference in GeoSeries with the same shapes

In [26]:
bbq.st_difference(geom_obj, geom_obj)

0    GEOMETRYCOLLECTION EMPTY
1    GEOMETRYCOLLECTION EMPTY
2    GEOMETRYCOLLECTION EMPTY
3    GEOMETRYCOLLECTION EMPTY
4    GEOMETRYCOLLECTION EMPTY
dtype: geometry