In [33]:
import pandas as pd
import numpy as np
from config import SOM_DATA_URL, AWC_DATA_URL, CURRENT_SURFACE_URL
from functools import reduce
from datetime import datetime

In [34]:
departements = { #set a dict of departement name as keys and dep code as value for missing dep values in datasets
    'Ain': 1,
    'Aisne': 2,
    'Allier': 3,
    'Alpes-de-Haute-Provence': 4,
    'Hautes-Alpes': 5,
    'Alpes-Maritimes': 6,
    'Ardèche': 7,
    'Ardennes': 8,
    'Ariège': 9,
    'Aube': 10,
    'Aude': 11,
    'Aveyron': 12,
    'Bouches-du-Rhône': 13,
    'Calvados': 14,
    'Cantal': 15,
    'Charente': 16,
    'Charente-Maritime': 17,
    'Cher': 18,
    'Corrèze': 19,
    'Corse': 20,
    "Côte-d'Or": 21,
    "Côtes-d'Armor": 22,
    'Creuse': 23,
    'Dordogne': 24,
    'Doubs': 25,
    'Drôme': 26,
    'Eure': 27,
    'Eure-et-Loir': 28,
    'Finistère': 29,
    'Gard': 30,
    'Haute-Garonne': 31,
    'Gers': 32,
    'Gironde': 33,
    'Hérault': 34,
    'Ille-et-Vilaine': 35,
    'Indre': 36,
    'Indre-et-Loire': 37,
    'Isère': 38,
    'Jura': 39,
    'Landes': 40,
    'Loir-et-Cher': 41,
    'Loire': 42,
    'Haute-Loire': 43,
    'Loire-Atlantique': 44,
    'Loiret': 45,
    'Lot': 46,
    'Lot-et-Garonne': 47,
    'Lozère': 48,
    'Maine-et-Loire': 49,
    'Manche': 50,
    'Marne': 51,
    'Haute-Marne': 52,
    'Mayenne': 53,
    'Meurthe-et-Moselle': 54,
    'Meuse': 55,
    'Morbihan': 56,
    'Moselle': 57,
    'Nièvre': 58,
    'Nord': 59,
    'Oise': 60,
    'Orne': 61,
    'Pas-de-Calais': 62,
    'Puy-de-Dôme': 63,
    'Pyrénées-Atlantiques': 64,
    'Hautes-Pyrénées': 65,
    'Pyrénées-Orientales': 66,
    'Bas-Rhin': 67,
    'Haut-Rhin': 68,
    'Rhône': 69,
    'Haute-Saône': 70,
    'Saône-et-Loire': 71,
    'Sarthe': 72,
    'Savoie': 73,
    'Haute-Savoie': 74,
    'Paris': 75,
    'Seine-Maritime': 76,
    'Seine-et-Marne': 77,
    'Yvelines': 78,
    'Deux-Sèvres': 79,
    'Somme': 80,
    'Tarn': 81,
    'Tarn-et-Garonne': 82,
    'Var': 83,
    'Vaucluse': 84,
    'Vendée': 85,
    'Vienne': 86,
    'Haute-Vienne': 87,
    'Vosges': 88,
    'Yonne': 89,
    'Territoire de Belfort': 90,
    'Essonne': 91,
    'Hauts-de-Seine': 92,
    'Seine-Saint-Denis': 93,
    'Val-de-Marne': 94,
    "Val-d'Oise": 95
}

In [38]:
#load historical mean of data for month we can't predict (we cant predict EVI data, we cant predict weather for 9 month in future etc) so we remplace prediction with mean historical values
hist_temp = pd.read_csv("historic_mean_weather.csv")
hist_ndvi = pd.read_csv("ndvi_mean_historical.csv")
hist_vpd = pd.read_csv("vpd_mean_historical.csv")

#load current market year data, without data from current month
current_weather = pd.read_csv("current_mean_weather.csv")
current_ndvi = pd.read_csv("ndvi_mean_current.csv")
current_vpd = pd.read_csv("vpd_mean_current.csv")

#load forecast data
forecast_weather = pd.read_csv("weather_forecast.csv")

#load available water capacity and soil organic matter
som_pom = pd.read_json(f"{SOM_DATA_URL}/pom.json") #read json
som_pom['DEP'] = som_pom['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
som_pom = som_pom.drop("nom", axis=1) #remove unwanted dep name -> final som pom data

maom_pom = pd.read_json(f"{SOM_DATA_URL}/maom.json") #read json
maom_pom['DEP'] = maom_pom['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
maom_pom = maom_pom.drop("nom", axis=1) #remove unwanted dep name -> final som maom data

awc = pd.read_json(f"{AWC_DATA_URL}/AWC.json") #read json
awc['DEP'] = awc['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
awc = awc.drop("nom", axis=1) #remove unwanted dep name -> final AWC data

current_weather = current_weather[current_weather["MONTH"] != datetime.today().month] #remove current month
current_ndvi = current_ndvi[current_ndvi["MONTH"] != datetime.today().month] #remove current month
current_vpd = current_vpd[current_vpd["MONTH"] != datetime.today().month] #remove current month

In [39]:
hist_ndvi_month = np.setxor1d(hist_ndvi["MONTH"].unique(), current_ndvi["MONTH"].unique()) #get month that we do not have in current data 
hist_ndvi = hist_ndvi[hist_ndvi["MONTH"].isin(hist_ndvi_month)] #only keep month were we do not have the current data

forecast_weather_month = np.setxor1d(hist_temp["MONTH"].unique(), current_weather["MONTH"].unique()) #same for forecast data
forecast_weather = forecast_weather[forecast_weather["MONTH"].isin(forecast_weather_month)]

hist_vpd_month = np.setxor1d(hist_vpd["MONTH"].unique(), current_vpd["MONTH"].unique()) #get month that we do not have in current data 
hist_vpd = hist_vpd[hist_vpd["MONTH"].isin(hist_vpd_month)] #only keep month were we do not have the current data

In [40]:
print(current_weather["MONTH"].unique()) #check current month we have
print(forecast_weather["MONTH"].unique()) #check forecast we have
print(hist_ndvi["MONTH"].unique()) #check monthly mean ndvi we need
print(hist_vpd["MONTH"].unique()) #check monthly mean vpd we need

[ 1  2  3  9 10 11 12]
[4 5 6 7 8]
[4 5 6 7 8]
[4 5 6 7 8]


In [None]:
# TODO : Add VPD Historical and current pivot table AND remove nans (we have some in NDVI)

In [None]:
# PIVOT CURRENT WEATHER AND VPD DATA
current_weather_pivot = current_weather.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['RR', 'TN', 'TX', 'TM']
)

current_weather_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in current_weather_pivot.columns
]
current_weather_pivot = current_weather_pivot.reset_index()


# PIVOT FORECAST WEATHER AND VPD DATA
forecast_weather_pivot = forecast_weather.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['RR', 'TN', 'TX', 'TM']
)

forecast_weather_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in forecast_weather_pivot.columns
]
forecast_weather_pivot = forecast_weather_pivot.reset_index()


# PIVOT CURRENT EVI DATA
current_ndvi_pivot = current_ndvi.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['evi']
)

current_ndvi_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in current_ndvi_pivot.columns
]
current_evi_pivot = current_ndvi_pivot.reset_index()


# PIVOT HISTORICAL EVI DATA
hist_ndvi_pivot = hist_ndvi.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['evi']
)

hist_ndvi_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in hist_ndvi_pivot.columns
]
hist_evi_pivot = hist_ndvi_pivot.reset_index()

In [87]:
current_area = pd.read_csv(f"{CURRENT_SURFACE_URL}/SCR-GRC-hist_dep_surface_prod_cult_cer-A25.csv")
current_area = current_area[(current_area["ESPECES"] == "Blé tendre") & (current_area["ANNEE"] == 2025)]

corseSud = current_area[current_area['DEPARTEMENT'] == "CORSE-DU-SUD             "]
corseHaute = current_area[current_area['DEPARTEMENT'] == "HAUTE-CORSE              "]
corseArea = corseSud["CULT_SURF"].iloc[0] + corseHaute["CULT_SURF"].iloc[0]
corse = pd.DataFrame({"ESPECES": "Blé tendre", "DEPARTEMENT": "Corse", "DEP": 20, "ANNEE": 2025, "CULT_REND": 0, "CULT_SURF": corseArea, "CULT_PROD": 0}, index=[0])
current_area = pd.concat([current_area, corse])
current_area = current_area[(current_area['DEPARTEMENT'] != "CORSE-DU-SUD             ") & (current_area['DEPARTEMENT'] != "HAUTE-CORSE              ")].reset_index(drop=True)

current_area["DEP"] = current_area["DEP"].astype(int)

In [None]:
dfs = [current_weather_pivot, forecast_weather_pivot, current_ndvi_pivot, hist_ndvi_pivot]

In [89]:
merged = reduce(lambda left, right: pd.merge(left, right, on="DEP", how="inner"), dfs) #use reduce to merge df1 and df2, then the result with df3, then df4... until there is no more df in the dfs list

#merge awc and som datas
merged = merged.merge(som_pom, on="DEP", how="left")
merged = merged.merge(maom_pom, on="DEP", how="left")
merged = merged.merge(awc, on="DEP", how="left")
merged = merged.merge(current_area[["DEP", "CULT_SURF"]], on="DEP", how="left")

In [94]:
#match training data column order
# match training data column order
merged = merged[["DEP", "year", "CULT_SURF", "RR1", "RR2", "RR3", "RR4", "RR5", "RR6", "RR7", "RR8", "RR9", "RR10", "RR11", "RR12", 
                 "TM1", "TM2", "TM3", "TM4", "TM5", "TM6", "TM7", "TM8", "TM9", "TM10", "TM11", "TM12", 
                 "TN1", "TN2", "TN3", "TN4", "TN5", "TN6", "TN7", "TN8", "TN9", "TN10", "TN11", "TN12", 
                 "TX1", "TX2", "TX3", "TX4", "TX5", "TX6", "TX7", "TX8", "TX9", "TX10", "TX11", "TX12", 
                 "vpd_max1", "vpd_max2", "vpd_max3", "vpd_max4", "vpd_max5", "vpd_max6", "vpd_max7", "vpd_max8", "vpd_max9", "vpd_max10", "vpd_max11", "vpd_max12", 
                 "vpd_mean1", "vpd_mean2", "vpd_mean3", "vpd_mean4", "vpd_mean5", "vpd_mean6", "vpd_mean7", "vpd_mean8", "vpd_mean9", "vpd_mean10", "vpd_mean11", "vpd_mean12", 
                 "vpd_min1", "vpd_min2", "vpd_min3", "vpd_min4", "vpd_min5", "vpd_min6", "vpd_min7", "vpd_min8", "vpd_min9", "vpd_min10", "vpd_min11", "vpd_min12", 
                 "evi1", "evi2", "evi3", "evi4", "evi5", "evi6", "evi7", "evi8", "evi9", "evi10", "evi11", "evi12","pom","maom","awc"]]

In [95]:
merged['year'] = 2025

In [96]:
merged = merged.dropna() #we have nan pom an maom for 75 Paris, there is no fields in Paris so remove them ok

In [97]:
merged

Unnamed: 0,DEP,year,CULT_SURF,RR1,RR2,RR3,RR4,RR5,RR6,RR7,...,evi6,evi7,evi8,evi9,evi10,evi11,evi12,pom,maom,awc
0,1,2025,31300.0,175.760000,41.340000,59.114778,96.880738,104.501967,117.572765,94.841166,...,0.519922,0.512620,0.492009,0.468340,0.393781,0.346867,0.289146,16.789692,26.925089,0.107088
1,2,2025,184000.0,123.192857,40.557143,37.928176,55.103448,60.566481,70.319262,60.907001,...,0.568780,0.471959,0.409176,0.418794,0.391760,0.347499,0.322178,5.252623,18.222063,0.095255
2,3,2025,46800.0,68.440000,30.380000,45.588658,62.542539,81.465384,86.955897,66.725420,...,0.553037,0.469106,0.449425,0.464130,0.488228,0.462617,0.403730,6.453841,21.526911,0.089040
3,4,2025,2994.0,76.387500,10.325000,74.259985,70.608596,91.482681,95.681591,61.591945,...,0.379838,0.378907,0.347075,0.342470,0.295121,0.252091,0.221568,16.788250,28.354742,0.118781
4,5,2025,2909.0,105.975000,16.866667,72.009111,81.025938,102.588014,114.329146,84.262117,...,0.333960,0.362402,0.334166,0.313995,0.250363,0.208731,0.168434,29.324879,31.984188,0.119110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,88,2025,27000.0,138.311111,70.233333,41.246545,67.595001,78.294175,88.211332,81.584326,...,0.549811,0.508733,0.483523,0.496765,0.423578,0.366557,0.312296,19.340321,29.435806,0.112435
88,89,2025,111200.0,79.933333,45.300000,41.762829,59.141364,71.220132,74.364962,60.633347,...,0.552851,0.422868,0.382229,0.414454,0.374777,0.323499,0.261036,5.579002,19.594700,0.102762
89,90,2025,2900.0,102.700000,49.200000,46.007624,79.070080,93.474239,105.864451,101.803921,...,0.553085,0.525909,0.504568,0.503770,0.383430,0.314863,0.298320,16.129911,28.980818,0.105598
90,91,2025,26825.0,96.000000,42.200000,38.307286,51.439575,57.604330,192.076377,54.653940,...,0.526223,0.398204,0.348727,0.385912,0.351164,0.175606,0.257704,4.623960,16.732876,0.088218


In [98]:
merged.to_csv("../../wheat_model_current.csv")