In [1]:
import os
import sys
import pandas as pd
import pyarrow as pa
import awswrangler as wr
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta
from dataclasses import dataclass, asdict

from shapely import wkt
from shapely.geometry import Polygon

In [2]:
# os.listdir("/tmp")

In [4]:
def athena_read_sql(query: str) -> pd.DataFrame:
    return wr.athena.read_sql_query(
        sql=query,
        database="db_ice",
        ctas_approach=False,
        s3_output="s3://aws-athena-query-results-891377318910-us-east-1/output/",
    )


def get_polygon_record_by_day(id_poligono: str, dt_ref: str) -> dict:
    # date_reference = datetime.today().strftime("%Y%m%d")
    query = f"""
    SELECT *
    FROM db_ice.tb_poligono
    WHERE anomesdia = {dt_ref}
        AND id_poligono = '{id_poligono}'
    """
    df = athena_read_sql(query)
    return df
    # return df.iloc[0, :].to_dict()

In [5]:
def get_polygon_from_geometry(geometry: dict) -> Polygon:
    return wkt.loads(geometry)

In [6]:
# ref = datetime.today().strftime("%Y%m%d")
ref = "20250810"
ref

'20250810'

In [7]:
id_poligono = "id_001"

In [8]:
record = get_polygon_record_by_day(id_poligono, ref)

In [11]:
# record.iloc[0, :].to_dict()

In [47]:
# record['poligono']

In [53]:
polygon = get_polygon_from_geometry(record['poligono'])

In [20]:
query = """
SELECT id_poligono, data_plantio, anomesdia
FROM db_ice.tb_poligono
-- WHERE id_poligono = 'id_001'
"""
df = athena_read_sql(query)


In [21]:
df.shape[0]

5

In [22]:
df.head()

Unnamed: 0,id_poligono,data_plantio,anomesdia
0,id_002,2023-07-10,20250810
1,id_003,2023-08-01,20250810
2,id_004,2023-08-01,20250810
3,id_005,2023-08-01,20250810
4,id_001,2023-06-21,20250810


In [11]:
try:
    x = df.iloc[0, :].to_dict()
except IndexError:
    print("Oi")

In [12]:
x

{'id_poligono': 'id_001',
 'data_penultima_ingestao': None,
 'data_ultima_ingestao': '2025-08-29'}

In [None]:
# orient: Literal['dict', 'list', 'series', 'index']
for idx, item in df.to_dict(orient="index").items():
    print(item)

{'id_poligono': 'id_002', 'data_plantio': '2023-07-10', 'anomesdia': 20250810}
{'id_poligono': 'id_003', 'data_plantio': '2023-08-01', 'anomesdia': 20250810}
{'id_poligono': 'id_004', 'data_plantio': '2023-08-01', 'anomesdia': 20250810}
{'id_poligono': 'id_005', 'data_plantio': '2023-08-01', 'anomesdia': 20250810}
{'id_poligono': 'id_001', 'data_plantio': '2023-06-21', 'anomesdia': 20250810}


In [None]:
df.to_records()

rec.array([(0, 'id_002', '2023-07-10', 20250810),
           (1, 'id_003', '2023-08-01', 20250810),
           (2, 'id_004', '2023-08-01', 20250810),
           (3, 'id_005', '2023-08-01', 20250810),
           (4, 'id_001', '2023-06-21', 20250810)],
          dtype=[('index', '<i8'), ('id_poligono', 'O'), ('data_plantio', 'O'), ('anomesdia', 'O')])

In [30]:
# pd.DataFrame.from_records(df.to_records()).reset_index(drop=True)
# pd.DataFrame.from_records(df.to_dict())

In [31]:
df.head()

Unnamed: 0,id_poligono,data_plantio,anomesdia
0,id_002,2023-07-10,20250810
1,id_003,2023-08-01,20250810
2,id_004,2023-08-01,20250810
3,id_005,2023-08-01,20250810
4,id_001,2023-06-21,20250810


In [33]:
arrow_table = pa.Table.from_pandas(df)

In [34]:
arrow_table

pyarrow.Table
id_poligono: string
data_plantio: string
anomesdia: int32
----
id_poligono: [["id_002","id_003","id_004","id_005","id_001"]]
data_plantio: [["2023-07-10","2023-08-01","2023-08-01","2023-08-01","2023-06-21"]]
anomesdia: [[20250810,20250810,20250810,20250810,20250810]]

In [45]:
values = []

for item in df.to_records(index=False):
    print(item)
    values.append(item)

('id_002', '2023-07-10', 20250810)
('id_003', '2023-08-01', 20250810)
('id_004', '2023-08-01', 20250810)
('id_005', '2023-08-01', 20250810)
('id_001', '2023-06-21', 20250810)


In [48]:
values

[('id_002', '2023-07-10', 20250810),
 ('id_003', '2023-08-01', 20250810),
 ('id_004', '2023-08-01', 20250810),
 ('id_005', '2023-08-01', 20250810),
 ('id_001', '2023-06-21', 20250810)]

In [46]:
schema: pa.schema = pa.schema([
    pa.field("id_poligono", pa.string(), nullable=False),
    pa.field("data_plantio", pa.string(), nullable=False),
    pa.field("anomesdia", pa.uint32(), nullable=True),
])


arrow_table = pa.Table.from_pylist(
    values,
    schema=schema
)

In [47]:
arrow_table.to_pandas()

Unnamed: 0,id_poligono,data_plantio,anomesdia
0,,,
1,,,
2,,,
3,,,
4,,,


In [60]:
@dataclass
class ControleIngestao:
    id_poligono: str
    data_plantio: str
    anomesdia: int


values_class = []


for idx, item in df.to_dict(orient="index").items():
    # values_class.append(ControleIngestao(**item))
    values_class.append(item)

In [61]:
values_class[0]

{'id_poligono': 'id_002', 'data_plantio': '2023-07-10', 'anomesdia': 20250810}

In [33]:
date_reference = datetime.today().strftime("%Y-%m-%d")
date_reference

'2025-08-24'

In [34]:
dt_ref = datetime.strptime(date_reference, "%Y-%m-%d")
dt_ref

datetime.datetime(2025, 8, 24, 0, 0)

In [42]:
dt_ref - relativedelta(months=1)

datetime.datetime(2025, 7, 24, 0, 0)

In [14]:
last_ingestion = "2025-08-13T13:18:59.0Z"  # "2025-08-13T13:18:59.003000Z"
datetime.strptime(last_ingestion, "%Y-%m-%dT%H:%M:%S.%fZ")

datetime.datetime(2025, 8, 13, 13, 18, 59)

## s3fs

In [31]:
from s3fs import S3FileSystem
from pyarrow import fs
from obstore.store import S3Store
import boto3

In [20]:
my_session = boto3.session.Session(region_name="us-east-1")

In [21]:
s3_fs = S3FileSystem(anon=False)

id_poligono = "id_001"
zarr_path = "/".join([
    "s3://data-us-east-1-891377318910/mvp_sensoriamento/images",
    f"sentinel-2-l2a/{id_poligono}.zarr"
])
print(f"zarr_path: {zarr_path}")

zarr_path: s3://data-us-east-1-891377318910/mvp_sensoriamento/images/sentinel-2-l2a/id_001.zarr


In [22]:
if s3_fs.exists("s3://data-us-east-1-891377318910/geospatial/starage/zarr/lafaiete.zarr"):
    print("Existe")
else:
    print("Nao existe")

Existe


In [23]:
s3 = fs.S3FileSystem(region='us-east-1')

In [27]:
def s3_path_exists(bucket_name: str, s3_path: str) -> bool:
    client_s3 = boto3.client(service_name="s3")

    # Remove barra inicial, se existir
    s3_path = s3_path.lstrip("/")

    # Usa list_objects_v2 para verificar se o caminho existe
    response = client_s3.list_objects_v2(
        Bucket=bucket_name,
        Prefix=s3_path,
        MaxKeys=1
    )

    return "Contents" in response

In [29]:
s3_path_exists("data-us-east-1-891377318910", "/geospatial/starage/zarr/lafaiete.zarr")

True