In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
# Charge le shapefile
gdf = gpd.read_file("data/RPG_2023/PARCELLES_GRAPHIQUES.gpkg")

In [4]:
gdf["SURF_PARC"].median()

np.float32(1.24)

In [5]:
# Charger le fichier Excel 
correspondance = pd.read_excel("data/correspondances.xlsx", sheet_name="1")

# 'code_culture' et 'nom_culture'
correspondance.columns = correspondance.columns.str.strip()  # nettoyage des noms de colonnes

# Ajouter la colonne 'NOM_CULTURE' 
gdf = gdf.merge(correspondance, how="left", left_on="CODE_CULTU", right_on="code_culture")

gdf = gdf.rename(columns={"nom_culture": "NOM_CULTURE"})

gdf

Unnamed: 0,ID_PARCEL,SURF_PARC,CODE_CULTU,CODE_GROUP,CULTURE_D1,CULTURE_D2,geometry,code_culture,NOM_CULTURE
0,1,5.01,ORH,3,,,"POLYGON ((620859.121 7049174.538, 621155.742 7...",ORH,Orge d’hiver
1,2,0.43,TRN,6,,,"POLYGON ((518271.056 6380847.362, 518271.088 6...",TRN,Tournesol
2,3,2.34,BTH,1,,,"POLYGON ((653603.846 6971494.032, 653548.732 6...",BTH,Blé tendre d’hiver
3,4,2.31,VRC,21,,,"POLYGON ((450066.555 6519505.157, 449911.365 6...",VRC,Vigne (sauf vigne rouge)
4,5,0.14,SNE,28,,,"POLYGON ((542585.535 6905398.389, 542579.8 690...",SNE,Surface agricole temporairement non admissible...
...,...,...,...,...,...,...,...,...,...
9797400,9866940,0.06,SPH,17,,,"POLYGON ((941769.023 6407193.483, 941769.35 64...",SPH,Prairie avec herbe prédominante et ressources ...
9797401,9866941,0.17,BOR,28,,,"POLYGON ((749684.059 6617162.615, 749683.959 6...",BOR,Bordure de champ
9797402,9866942,3.78,MIS,2,,,"POLYGON ((640902.575 6310402.873, 640902.561 6...",MIS,Maïs (hors maïs doux)
9797403,9866943,3.44,MIS,2,,,"POLYGON ((889837.386 6687978.699, 889853.479 6...",MIS,Maïs (hors maïs doux)


In [6]:
gdf['NOM_CULTURE'].value_counts()


NOM_CULTURE
Prairie de 6 ans et plus (couvert herbacé)                                                                           3043081
Blé tendre d’hiver                                                                                                    885083
Maïs (hors maïs doux)                                                                                                 710194
Prairie temporaire de 5 ans ou moins et autre mélange avec graminées                                                  703112
Jachère (terre arable)                                                                                                572050
                                                                                                                      ...   
Autre culture pérenne et jachère dans les bananeraies                                                                    103
Cultures conduites en inter-rangs (bandes de cultures différentes) – 3 cultures représentant chacune plus de 25 %

In [8]:
gdf_filtered = gdf[(gdf['SURF_PARC'] > 2)]


counts = gdf_filtered['CODE_CULTU'].value_counts()
valid_classes = counts[counts > 15000].index
gdf_filtered = gdf_filtered[gdf_filtered['CODE_CULTU'].isin(valid_classes)]


print(gdf_filtered['NOM_CULTURE'].value_counts(), len(gdf_filtered['CODE_CULTU'].value_counts()), sum(gdf_filtered['CODE_CULTU'].value_counts()))

NOM_CULTURE
Prairie de 6 ans et plus (couvert herbacé)                                              1053420
Blé tendre d’hiver                                                                       609231
Maïs (hors maïs doux)                                                                    395267
Prairie temporaire de 5 ans ou moins et autre mélange avec graminées                     244777
Orge d’hiver                                                                             182955
Colza d’hiver                                                                            164131
Tournesol                                                                                110096
Prairie avec herbe prédominante et ressources fourragères ligneuses présentes             81319
Vigne (sauf vigne rouge)                                                                  74223
Luzerne                                                                                   71774
Triticale d’hiver           

In [59]:
n = 1  # nombre par culture
sample = (
    gdf_filtered.groupby("CODE_CULTU", group_keys=False)
    .apply(lambda x: x.sample(min(len(x), n), random_state=42))
)

In [60]:
sample["SURF_PARC"].sum()

np.float32(150.16)

In [61]:
sample.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 21 entries, 6747702 to 5088367
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   ID_PARCEL     21 non-null     object  
 1   SURF_PARC     21 non-null     float32 
 2   CODE_CULTU    21 non-null     object  
 3   CODE_GROUP    21 non-null     object  
 4   CULTURE_D1    21 non-null     object  
 5   CULTURE_D2    21 non-null     object  
 6   geometry      21 non-null     geometry
 7   code_culture  21 non-null     object  
 8   NOM_CULTURE   21 non-null     object  
dtypes: float32(1), geometry(1), object(7)
memory usage: 1.6+ KB


In [66]:
sampled = sample.head(1)
sampled

Unnamed: 0,ID_PARCEL,SURF_PARC,CODE_CULTU,CODE_GROUP,CULTURE_D1,CULTURE_D2,geometry,code_culture,NOM_CULTURE
6747702,6795423,21.370001,BDH,4,,,"POLYGON ((517492.146 6321037.224, 517497.528 6...",BDH,Blé dur d’hiver


In [12]:
import ee 
import geemap

  import pkg_resources


In [13]:
ee.Authenticate()


Successfully saved authorization token.


In [14]:
ee.Initialize()

In [67]:
# reprojettection si nécessaire
if sampled.crs is None or sampled.crs.to_epsg() != 4326:
    sampled = sampled.to_crs(epsg=4326)
    
# Convertir en GeoJSON pour l'affichage
geojson = sampled.__geo_interface__

# Centrer la carte sur les parcelles
center = [sampled.geometry.centroid.y.mean(), sampled.geometry.centroid.x.mean()]

# Créer une carte interactive
Map = geemap.Map(center=center, zoom=5)

# Ajouter les polygones du shapefile
Map.add_geojson(geojson, layer_name="Parcelles")

# Afficher la carte


In [68]:
Map

Map(center=[np.float64(43.96338082681801), np.float64(0.7298515231406117)], controls=(WidgetControl(options=['…

In [85]:
# Convertir en GEE FeatureCollection
fc = geemap.geopandas_to_ee(sampled, geodesic=False)

In [86]:
start_date = '2023-01-01'
end_date = '2023-12-31'

# Créer une ImageCollection NDVI Sentinel-2
s2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
    .filterBounds(fc.geometry()) \
    .filterDate(start_date, end_date) \
    .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 10)) \
    .map(lambda img: img.clip(fc.geometry()))  # Clip aux parcelles

In [87]:

def add_ndvi(img):
    ndvi = img.normalizedDifference(['B8', 'B4']).rename('NDVI')
    return img.addBands(ndvi).clip(fc.geometry())

s2_with_ndvi = s2.map(add_ndvi)

In [88]:
s2_with_ndvi

In [73]:
sample_img = s2_with_ndvi.first()
print(sample_img.getInfo())

{'type': 'Image', 'bands': [{'id': 'B1', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [13, 13], 'origin': [1648, 418], 'crs': 'EPSG:32630', 'crs_transform': [60, 0, 699960, 0, -60, 4900020]}, {'id': 'B2', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [72, 72], 'origin': [9892, 2513], 'crs': 'EPSG:32630', 'crs_transform': [10, 0, 699960, 0, -10, 4900020]}, {'id': 'B3', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [72, 72], 'origin': [9892, 2513], 'crs': 'EPSG:32630', 'crs_transform': [10, 0, 699960, 0, -10, 4900020]}, {'id': 'B4', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [72, 72], 'origin': [9892, 2513], 'crs': 'EPSG:32630', 'crs_transform': [10, 0, 699960, 0, -10, 4900020]}, {'id': 'B5', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 65535}, 'dimensions': [36, 37], 

In [84]:
# Itère et ajoute chaque image à la carte
image_list = s2_with_ndvi.toList(s2_with_ndvi.size())
for i in range(s2_with_ndvi.size().getInfo()):
    img = ee.Image(image_list.get(i))
    date_str = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd').getInfo()
    Map.addLayer(img.select('NDVI'), {'min': 0, 'max': 0.8}, f'NDVI {date_str}')

# Centrer la carte
Map.centerObject(sample_img, 12)


TypeError: 'NoneType' object is not subscriptable

In [6]:
from sklearn.model_selection import train_test_split



gdf_train, gdf_test = train_test_split(
    gdf_filtered,
    test_size=0.2,
    stratify=gdf_filtered['CODE_CULTU'],
    random_state=42
)

print(gdf['CODE_CULTU'].value_counts(normalize=True))
print(gdf_train['CODE_CULTU'].value_counts(normalize=True))
print(gdf_test['CODE_CULTU'].value_counts(normalize=True))