In [2]:
import boto3
import configparser
import os
import urllib3

import folium
import geopandas as gpd
import rasterio
from rasterio.plot import show
import numpy as np
from matplotlib import pyplot

import tempfile

In [3]:
urllib3.disable_warnings()

### Connection with S3 Bucket
All Census GRID datasets are available on S3 Bucket. Below configuration allows to list and download defined datasets from there.

In [4]:
def s3_connection(credentials: dict) -> boto3.session.Session:
    """Establishes a connection to an S3 bucket.

    Args:
        credentials (dict): A dictionary containing AWS S3 credentials with keys 
                            'host_base', 'access_key', and 'secret_key'.

    Returns:
        boto3.session.Session: A boto3 session client configured with the provided 
                               credentials for interacting with the S3 service.
    """
    s3 = boto3.client('s3',
                      endpoint_url=credentials['host_base'],
                      aws_access_key_id=credentials['access_key'],
                      aws_secret_access_key=credentials['secret_key'],
                      use_ssl=True,
                      verify=False)
    return s3

# Load s3 credentials
config = configparser.ConfigParser()
config.read('/home/eouser/.s3cfg')
credentials = dict(config['default'].items())

# Connection with S3 eodata
s3 = s3_connection(credentials)

### Browsing S3 bucket content

In [5]:
response = s3.list_objects_v2(Bucket='ESTAT', Prefix='Field_boundaries')

In [6]:
if 'Contents' in response:
    print("Objects in bucket:")
    # Iterate over each object
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No objects found in the bucket.")

Objects in bucket:
Field_boundaries/field_boundaries.parquet


### Reading Parquet file to GeoDataFrame

In [7]:
object_path = 'Census_GRID/2021/ESTAT_Census_2021_V2.parquet'

# Create a temporary directory to store parquet file
with tempfile.TemporaryDirectory() as tmpdirname:
    # Define local path to save parquet
    local_parquet_path = os.path.join(tmpdirname, object_path.split('/')[-1])

    # Download the parquet file from S3
    s3.download_file('ESTAT', object_path, local_parquet_path)

    # Read the parquet into a GeoDataFrame
    gdf = gpd.read_parquet(local_parquet_path)

ImportError: Missing optional dependency 'pyarrow.parquet'. pyarrow is required for Parquet support.  "
        "Use pip or conda to install pyarrow.parquet.

### Displaying geometries on basemap

To display vector geometry on map we recommend folium. Folium allows displaying different types of geometries like Polygons, Lines and Points. <br>
IMPORTANT: Each geometry presenting on map must be transformed to EPSG:4326 coordinates system

In [18]:
# Filtering many polygons
gdf_filter = gdf.loc[:100]

In [25]:
# Add the polygons to the map

m1 = folium.Map(location=[28.730442, -13.911504], zoom_start=13)

for _, r in gdf_filter.to_crs(4326).iterrows():
    sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "orange"})
    folium.Popup(r["GRD_ID"]).add_to(geo_j)
    geo_j.add_to(m1)

m1