In [None]:
import ee
import os
import pandas as pd
import geopandas as gpd
import numba
import numpy as np
from tqdm.notebook import tqdm


In [None]:
ee.Authenticate()
ee.Initialize(project='ee-gtikhonov')
print(ee.String('Hello from the Earth Engine servers!').getInfo())

In [None]:
path_data = "/home/gt/DATA/geolifeclef-2025"
file_path_po = os.path.join(path_data, "GLC25_P0_metadata_train.csv")
df_po = pd.read_csv(file_path_po)


In [None]:
df_un = df_po.groupby(["lat","lon"]).agg({"surveyId": lambda values : ",".join(values.unique().astype(int).astype(str)), "speciesId": lambda values : ",".join(values.unique().astype(str))}).reset_index()
df_un["id"] = np.arange(len(df_un))

In [None]:
gdf_un = gpd.GeoDataFrame(df_un, geometry=gpd.points_from_xy(df_un['lon'], df_un['lat']), crs='EPSG:4326')
gdf_un = gdf_un.to_crs(epsg=3857)

In [None]:
def row_to_feature(row):
    point = ee.Geometry.Point([row["lon"], row["lat"]])
    return ee.Feature(point, {"id": row["id"]})

grouped = gdf_un
features = [row_to_feature(row) for idx, row in tqdm(grouped.iterrows(),  total=len(grouped))]

In [None]:
chunk_size = 4500
chunks = [features[i:i + chunk_size] for i in range(0, len(features), chunk_size)]

# Загружаем WorldCover
worldcover = ee.Image("ESA/WorldCover/v100/2020")
cover = worldcover.select("Map")

results = {}
for i, chunk in enumerate(tqdm(chunks, desc="batch-requests")):
    fc = ee.FeatureCollection(chunk)
    sampled = cover.sampleRegions(collection=fc, scale=10, geometries=False)
    try:
        data = sampled.getInfo()
    except Exception as e:
        print(f"error chunk {i}: {e}")
        continue

    sids = [f["properties"]["id"] for f in data["features"]]
    vals = [f["properties"]["Map"] for f in data["features"]]
    results.update(zip(sids, vals))

grouped["class"] = grouped["id"].map(results)

In [None]:
res = df_un
res["class"] = res["id"].map(results)
res.drop("id", axis=1).to_csv(os.path.join(path_data, "worldcover", "po_train_survey_points_with_worldcover.csv"))

In [None]:
res.surveyId

In [None]:
gdf_points_po.value_counts(["surveyId"])

In [None]:
tmp = gdf_points_po.value_counts(["lat","lon"])

In [None]:
tmp.iloc[:10000]

In [None]:
tmp.index[0]

In [None]:
gdf_points_po

In [None]:
gdf_points_po.loc[(gdf_points_po.loc[:,["lat","lon"]] == tmp.index[0]).all(axis=1)]

In [None]:
gdf_points_po.loc[:,["lat","lon"]] == tmp.index[0]