In [5]:
import geopandas as gpd
import pandas as pd
from pathlib import Path

In [6]:
DATA_DIR = Path(r"C:\Users\aitor.herran\Desktop\data\data-copernicus")
SHP_FILE = DATA_DIR / "modis.ba.poly.shp"

OUT_DIR = DATA_DIR / "output"
OUT_DIR.mkdir(exist_ok=True)

In [7]:
gdf = gpd.read_file(
    SHP_FILE,
    encoding="latin1"  # üîë evita ????? en provincias/municipios
)

print("Total incendios (Europa):", len(gdf))
gdf.head()

Total incendios (Europa): 87704


Unnamed: 0,id,FIREDATE,LASTUPDATE,COUNTRY,PROVINCE,COMMUNE,AREA_HA,BROADLEA,CONIFER,MIXED,SCLEROPH,TRANSIT,OTHERNATLC,AGRIAREAS,ARTIFSURF,OTHERLC,PERCNA2K,CLASS,geometry
0,2,2016-08-28 00:00:00,2022-01-26 11:57:54.973474,AL,Korc√´,Konispol,67,0.0,0.0,0.0,19.402985074597908,16.417910447736688,10.447761194014255,53.731343283501886,0.0,0,0.0,FireSeason,"POLYGON ((20.18901 39.74908, 20.18909 39.75006..."
1,888,2016-08-08 00:00:00,2022-01-26 11:57:54.973474,PT,√Årea Metropolitana do Porto,Covelo de Paiv√≥ e Janarde,26593,0.6020922706404734,0.2558892150222012,0.2220215247986746,0.0,56.893956498833234,38.66184992850141,3.2174305712350297,0.1467599909686154,0,9.843358251326316,FireSeason,"POLYGON ((-8.06251 40.84474, -8.06033 40.8436,..."
2,19,2016-08-11 00:00:00,2022-01-26 11:57:54.973474,PT,T√¢mega e Sousa,Ca√ßarilhe e Infesta,81,0.0,1.17647058823391,0.0,0.0,67.05882352933287,19.99999999997647,11.7647058823391,0.0,0,0.0,FireSeason,"POLYGON ((-8.05657 41.40623, -8.05945 41.4048,..."
3,87,2016-07-27 00:00:00,2022-01-26 11:57:54.973474,TR,??rnak,N.A.,72,0.0,0.0,0.0,0.0,70.83333333323495,22.222222222191355,0.0,6.9444444444348,0,0.0,FireSeason,"POLYGON ((42.50247 37.53773, 42.5029 37.53451,..."
4,368,2016-08-24 00:00:00,2022-01-26 11:57:54.973474,IT,Genova,Bogliasco,267,11.654135338341485,5.639097744358783,0.3759398496239188,0.0,7.894736842102295,74.06015037591202,0.3759398496239188,0.0,0,5.453094633641743,FireSeason,"POLYGON ((9.06157 44.39255, 9.06266 44.39417, ..."


In [8]:
gdf["FIREDATE"] = pd.to_datetime(
    gdf["FIREDATE"],
    errors="coerce"
)

gdf["YEAR"] = gdf["FIREDATE"].dt.year

gdf[["FIREDATE", "YEAR"]].head()

Unnamed: 0,FIREDATE,YEAR
0,2016-08-28,2016.0
1,2016-08-08,2016.0
2,2016-08-11,2016.0
3,2016-07-27,2016.0
4,2016-08-24,2016.0


In [9]:
gdf_es = gdf[gdf["COUNTRY"] == "ES"].copy()

print("Incendios en Espa√±a:", len(gdf_es))
gdf_es[["COUNTRY", "PROVINCE", "COMMUNE", "YEAR"]].head()

Incendios en Espa√±a: 7314


Unnamed: 0,COUNTRY,PROVINCE,COMMUNE,YEAR
15,ES,A Coru√±a,Vimianzo,2016.0
31,ES,√Åvila,Serranillos,2016.0
39,ES,Ourense,Mu√≠√±os,2016.0
40,ES,Ourense,O√≠mbra,2016.0
69,ES,C√°ceres,San Mart√≠n de Trevejo,2016.0


In [10]:
# quitamos incendios sin a√±o
gdf_es = gdf_es[gdf_es["YEAR"].notna()]

gdf_es["YEAR"] = gdf_es["YEAR"].astype(int)
gdf_es["AREA_HA"] = pd.to_numeric(
    gdf_es["AREA_HA"],
    errors="coerce"
)

gdf_es.dtypes

id                    object
FIREDATE      datetime64[ns]
LASTUPDATE            object
COUNTRY               object
PROVINCE              object
COMMUNE               object
AREA_HA                int64
BROADLEA              object
CONIFER               object
MIXED                 object
SCLEROPH              object
TRANSIT               object
OTHERNATLC            object
AGRIAREAS             object
ARTIFSURF             object
OTHERLC               object
PERCNA2K              object
CLASS                 object
geometry            geometry
YEAR                   int64
dtype: object

In [11]:
OUT_FILE = OUT_DIR / "copernicus_es_clean.gpkg"

gdf_es.to_file(OUT_FILE, driver="GPKG")
print("Guardado:", OUT_FILE)

Guardado: C:\Users\aitor.herran\Desktop\data\data-copernicus\output\copernicus_es_clean.gpkg


In [12]:
# =========================================
# 8. SIMPLIFICAR GEOMETR√çA
# =========================================

# reproyectamos a metros (importante)
gdf_es_m = gdf_es.to_crs(epsg=3857)

# simplificaci√≥n (ajusta tolerancia si quieres)
gdf_es_m["geometry"] = gdf_es_m.geometry.simplify(
    tolerance=100,      # metros (50‚Äì200 suele ir bien)
    preserve_topology=True
)

# volvemos a lat/lon
gdf_es_simpl = gdf_es_m.to_crs(epsg=4326)

print("Geometr√≠a simplificada")

Geometr√≠a simplificada


In [13]:
# =========================================
# 9. GUARDAR DATASET FINAL
# =========================================
OUT_FILE_FINAL = OUT_DIR / "copernicus_es_final.gpkg"

gdf_es_simpl.to_file(OUT_FILE_FINAL, driver="GPKG")

print("Guardado final:", OUT_FILE_FINAL)

Guardado final: C:\Users\aitor.herran\Desktop\data\data-copernicus\output\copernicus_es_final.gpkg
