In [8]:
import pandas as pd
import numpy as np
from config import SOM_DATA_URL, AWC_DATA_URL, CURRENT_SURFACE_URL
from functools import reduce
from datetime import datetime

In [9]:
departements = { #set a dict of departement name as keys and dep code as value for missing dep values in datasets
    'Ain': 1,
    'Aisne': 2,
    'Allier': 3,
    'Alpes-de-Haute-Provence': 4,
    'Hautes-Alpes': 5,
    'Alpes-Maritimes': 6,
    'Ardèche': 7,
    'Ardennes': 8,
    'Ariège': 9,
    'Aube': 10,
    'Aude': 11,
    'Aveyron': 12,
    'Bouches-du-Rhône': 13,
    'Calvados': 14,
    'Cantal': 15,
    'Charente': 16,
    'Charente-Maritime': 17,
    'Cher': 18,
    'Corrèze': 19,
    'Corse': 20,
    "Côte-d'Or": 21,
    "Côtes-d'Armor": 22,
    'Creuse': 23,
    'Dordogne': 24,
    'Doubs': 25,
    'Drôme': 26,
    'Eure': 27,
    'Eure-et-Loir': 28,
    'Finistère': 29,
    'Gard': 30,
    'Haute-Garonne': 31,
    'Gers': 32,
    'Gironde': 33,
    'Hérault': 34,
    'Ille-et-Vilaine': 35,
    'Indre': 36,
    'Indre-et-Loire': 37,
    'Isère': 38,
    'Jura': 39,
    'Landes': 40,
    'Loir-et-Cher': 41,
    'Loire': 42,
    'Haute-Loire': 43,
    'Loire-Atlantique': 44,
    'Loiret': 45,
    'Lot': 46,
    'Lot-et-Garonne': 47,
    'Lozère': 48,
    'Maine-et-Loire': 49,
    'Manche': 50,
    'Marne': 51,
    'Haute-Marne': 52,
    'Mayenne': 53,
    'Meurthe-et-Moselle': 54,
    'Meuse': 55,
    'Morbihan': 56,
    'Moselle': 57,
    'Nièvre': 58,
    'Nord': 59,
    'Oise': 60,
    'Orne': 61,
    'Pas-de-Calais': 62,
    'Puy-de-Dôme': 63,
    'Pyrénées-Atlantiques': 64,
    'Hautes-Pyrénées': 65,
    'Pyrénées-Orientales': 66,
    'Bas-Rhin': 67,
    'Haut-Rhin': 68,
    'Rhône': 69,
    'Haute-Saône': 70,
    'Saône-et-Loire': 71,
    'Sarthe': 72,
    'Savoie': 73,
    'Haute-Savoie': 74,
    'Paris': 75,
    'Seine-Maritime': 76,
    'Seine-et-Marne': 77,
    'Yvelines': 78,
    'Deux-Sèvres': 79,
    'Somme': 80,
    'Tarn': 81,
    'Tarn-et-Garonne': 82,
    'Var': 83,
    'Vaucluse': 84,
    'Vendée': 85,
    'Vienne': 86,
    'Haute-Vienne': 87,
    'Vosges': 88,
    'Yonne': 89,
    'Territoire de Belfort': 90,
    'Essonne': 91,
    'Hauts-de-Seine': 92,
    'Seine-Saint-Denis': 93,
    'Val-de-Marne': 94,
    "Val-d'Oise": 95
}

In [10]:
#load historical mean of data for month we can't predict (we cant predict EVI data, we cant predict weather for 9 month in future etc) so we remplace prediction with mean historical values
hist_temp = pd.read_csv("historic_mean_weather.csv")
hist_ndvi = pd.read_csv("ndvi_mean_historical.csv")
hist_vpd = pd.read_csv("vpd_mean_historical.csv")

#load current market year data, without data from current month
current_weather = pd.read_csv("current_mean_weather.csv")
current_ndvi = pd.read_csv("ndvi_mean_current.csv")
current_vpd = pd.read_csv("vpd_mean_current.csv")

#load forecast data
forecast_weather = pd.read_csv("weather_forecast.csv")

#load available water capacity and soil organic matter
som_pom = pd.read_json(f"{SOM_DATA_URL}/pom.json") #read json
som_pom['DEP'] = som_pom['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
som_pom = som_pom.drop("nom", axis=1) #remove unwanted dep name -> final som pom data

maom_pom = pd.read_json(f"{SOM_DATA_URL}/maom.json") #read json
maom_pom['DEP'] = maom_pom['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
maom_pom = maom_pom.drop("nom", axis=1) #remove unwanted dep name -> final som maom data

awc = pd.read_json(f"{AWC_DATA_URL}/AWC.json") #read json
awc['DEP'] = awc['nom'].map(departements) #map dep name to dep codevpd['DEP'] = vpd['dep'].map(departements) #map dep name to dep code
awc = awc.drop("nom", axis=1) #remove unwanted dep name -> final AWC data

current_weather = current_weather[current_weather["MONTH"] != datetime.today().month] #remove current month
current_ndvi = current_ndvi[current_ndvi["MONTH"] != datetime.today().month] #remove current month
current_vpd = current_vpd[current_vpd["MONTH"] != datetime.today().month] #remove current month

current_ndvi = current_ndvi.rename(columns={"dep": "DEP"})
hist_ndvi = hist_ndvi.rename(columns={"dep": "DEP"})
current_vpd = current_vpd.rename(columns={"dep": "DEP"})
hist_vpd = hist_vpd.rename(columns={"dep": "DEP"})

In [11]:
hist_ndvi_month = np.setxor1d(hist_ndvi["MONTH"].unique(), current_ndvi["MONTH"].unique()) #get month that we do not have in current data 
hist_ndvi = hist_ndvi[hist_ndvi["MONTH"].isin(hist_ndvi_month)] #only keep month were we do not have the current data

forecast_weather_month = np.setxor1d(hist_temp["MONTH"].unique(), current_weather["MONTH"].unique()) #same for forecast data
forecast_weather = forecast_weather[forecast_weather["MONTH"].isin(forecast_weather_month)]

hist_vpd_month = np.setxor1d(hist_vpd["MONTH"].unique(), current_vpd["MONTH"].unique()) #get month that we do not have in current data 
hist_vpd = hist_vpd[hist_vpd["MONTH"].isin(hist_vpd_month)] #only keep month were we do not have the current data

In [12]:
print(current_weather["MONTH"].unique()) #check current month we have
print(forecast_weather["MONTH"].unique()) #check forecast we have
print(hist_ndvi["MONTH"].unique()) #check monthly mean ndvi we need
print(hist_vpd["MONTH"].unique()) #check monthly mean vpd we need

[ 1  2  3  9 10 11 12]
[4 5 6 7 8]
[4 5 6 7 8]
[4 5 6 7 8]


In [13]:
# PIVOT CURRENT WEATHER AND VPD DATA
current_weather_pivot = current_weather.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['RR', 'TN', 'TX', 'TM']
)

current_weather_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in current_weather_pivot.columns
]
current_weather_pivot = current_weather_pivot.reset_index()


# PIVOT FORECAST WEATHER AND VPD DATA
forecast_weather_pivot = forecast_weather.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['RR', 'TN', 'TX', 'TM']
)

forecast_weather_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in forecast_weather_pivot.columns
]
forecast_weather_pivot = forecast_weather_pivot.reset_index()


# PIVOT CURRENT NDVI DATA
current_ndvi_pivot = current_ndvi.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['ndvi_mean']
)

current_ndvi_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in current_ndvi_pivot.columns
]
current_ndvi_pivot = current_ndvi_pivot.reset_index()


# PIVOT HISTORICAL NDVI DATA
hist_ndvi_pivot = hist_ndvi.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['ndvi_mean']
)

hist_ndvi_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in hist_ndvi_pivot.columns
]
hist_ndvi_pivot = hist_ndvi_pivot.reset_index()

# PIVOT CURRENT VPD DATA
current_vpd_pivot = current_vpd.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['vpd_mean', 'vpd_max', 'vpd_min']
)

current_vpd_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in current_vpd_pivot.columns
]
current_vpd_pivot = current_vpd_pivot.reset_index()

# PIVOT HISTORICAL VPD DATA
hist_vpd_pivot = hist_vpd.pivot_table( # Pivot table for month datas as columns and not rows
    index=['DEP'],
    columns='MONTH',
    values=['vpd_mean', 'vpd_max', 'vpd_min']
)

hist_vpd_pivot.columns = [ # rename columns with month number
    f"{col[0]}{col[1]}" if isinstance(col, tuple) and col[1] != "" 
    else col for col in hist_vpd_pivot.columns
]
hist_vpd_pivot = hist_vpd_pivot.reset_index()

In [7]:
forecast_weather_pivot

Unnamed: 0,DEP,RR4,RR5,RR6,RR7,RR8,TM4,TM5,TM6,TM7,...,TN4,TN5,TN6,TN7,TN8,TX4,TX5,TX6,TX7,TX8
0,1,81.232589,118.093751,97.990148,60.714709,63.801718,8.104298,12.497284,16.753302,19.011011,...,-1.848504,3.974780,8.319371,10.812179,9.909310,19.258594,24.210653,28.209111,30.910529,30.429091
1,2,46.002930,72.351152,53.408238,39.734786,38.959923,8.845982,13.195575,16.636154,18.641242,...,-0.803804,4.531824,8.785764,11.178397,10.371056,19.483982,23.912374,27.136910,29.710625,29.442271
2,3,62.827392,94.360099,66.374677,41.375955,44.226455,9.371968,13.687729,17.180168,19.709084,...,-1.351845,4.308318,8.731300,14.208774,10.062853,20.109698,45.654902,28.794436,42.773462,31.543259
3,4,89.419619,104.406527,62.686330,29.133583,35.127103,5.659040,10.672032,15.658575,18.706517,...,-5.239765,1.598547,6.748795,9.883512,9.447941,15.669755,20.795894,26.337378,28.233101,29.914286
4,5,136.834148,123.607652,86.196035,45.508704,50.037390,5.129718,8.870906,13.348925,22.031392,...,-20.318850,-3.843945,5.900523,11.311833,8.581830,15.959362,19.360213,24.170899,26.577577,26.611266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,91,122.081341,66.391751,47.935473,33.091919,31.721262,9.493925,13.711799,17.542363,19.273052,...,0.196196,4.711477,9.043188,11.869942,11.274839,19.483982,24.395736,27.839468,30.440731,30.501414
91,92,45.999243,70.483265,50.659089,34.619002,31.721262,9.493925,13.711799,16.666137,19.239511,...,0.196196,4.531824,9.043188,11.192535,11.274839,19.483982,24.313560,27.136909,30.440731,29.821863
92,93,113.115040,70.483265,50.659089,34.619002,31.721262,9.493925,13.711799,16.778908,19.239511,...,0.125952,4.531824,9.043188,11.192535,11.264690,19.483982,24.407122,27.139050,30.440731,29.821863
93,94,127.296414,70.483265,50.659089,34.619002,31.721262,9.493925,13.711799,17.562905,19.239511,...,0.196196,4.531824,9.043188,11.199302,11.274839,19.483982,24.407122,27.554796,30.440731,29.896642


In [47]:
current_area = pd.read_csv(f"{CURRENT_SURFACE_URL}/SCR-GRC-hist_dep_surface_prod_cult_cer-A25.csv", encoding='utf-8')
current_area = current_area[(current_area["ESPECES"] == "Blé tendre") & (current_area["ANNEE"] == 2025)]
current_area["DEPARTEMENT"] = current_area["DEPARTEMENT"].str.strip()


corseSud = current_area[current_area['DEPARTEMENT'] == "CORSE-DU-SUD"]
corseHaute = current_area[current_area['DEPARTEMENT'] == "HAUTE-CORSE"]
corseArea = corseSud["CULT_SURF"].iloc[0] + corseHaute["CULT_SURF"].iloc[0]
corse = pd.DataFrame({"ESPECES": "Blé tendre", "DEPARTEMENT": "Corse", "DEP": 20, "ANNEE": 2025, "CULT_REND": 0, "CULT_SURF": corseArea, "CULT_PROD": 0}, index=[0])
current_area = pd.concat([current_area, corse])
current_area = current_area[(current_area['DEPARTEMENT'] != "CORSE-DU-SUD") & (current_area['DEPARTEMENT'] != "HAUTE-CORSE")].reset_index(drop=True)

current_area["DEP"] = current_area["DEP"].astype(int)

In [49]:
dfs = [current_weather_pivot, forecast_weather_pivot, current_ndvi_pivot, hist_ndvi_pivot, current_vpd_pivot, hist_vpd_pivot]

In [61]:
merged = reduce(lambda left, right: pd.merge(left, right, on="DEP", how="inner"), dfs) #use reduce to merge df1 and df2, then the result with df3, then df4... until there is no more df in the dfs list

#merge awc and som datas
merged = merged.merge(som_pom, on="DEP", how="left")
merged = merged.merge(maom_pom, on="DEP", how="left")
merged = merged.merge(awc, on="DEP", how="left")
merged = merged.merge(current_area[["DEP", "CULT_SURF"]], on="DEP", how="left")

merged["year"] = 2025

In [63]:
#match training data column order
# match training data column order
merged = merged[["DEP", "year", "CULT_SURF", "RR1", "RR2", "RR3", "RR4", "RR5", "RR6", "RR7", "RR8", "RR9", "RR10", "RR11", "RR12", 
                 "TM1", "TM2", "TM3", "TM4", "TM5", "TM6", "TM7", "TM8", "TM9", "TM10", "TM11", "TM12", 
                 "TN1", "TN2", "TN3", "TN4", "TN5", "TN6", "TN7", "TN8", "TN9", "TN10", "TN11", "TN12", 
                 "TX1", "TX2", "TX3", "TX4", "TX5", "TX6", "TX7", "TX8", "TX9", "TX10", "TX11", "TX12", 
                 "vpd_max1", "vpd_max2", "vpd_max3", "vpd_max4", "vpd_max5", "vpd_max6", "vpd_max7", "vpd_max8", "vpd_max9", "vpd_max10", "vpd_max11", "vpd_max12", 
                 "vpd_mean1", "vpd_mean2", "vpd_mean3", "vpd_mean4", "vpd_mean5", "vpd_mean6", "vpd_mean7", "vpd_mean8", "vpd_mean9", "vpd_mean10", "vpd_mean11", "vpd_mean12", 
                 "vpd_min1", "vpd_min2", "vpd_min3", "vpd_min4", "vpd_min5", "vpd_min6", "vpd_min7", "vpd_min8", "vpd_min9", "vpd_min10", "vpd_min11", "vpd_min12", 
                 "ndvi_mean1", "ndvi_mean2", "ndvi_mean3", "ndvi_mean4", "ndvi_mean5", "ndvi_mean6", "ndvi_mean7", "ndvi_mean8", "ndvi_mean9", "ndvi_mean10", "ndvi_mean11", "ndvi_mean12",
                 "pom","maom","awc"]]

In [65]:
merged = merged.dropna()

In [66]:
merged

Unnamed: 0,DEP,year,CULT_SURF,RR1,RR2,RR3,RR4,RR5,RR6,RR7,...,ndvi_mean6,ndvi_mean7,ndvi_mean8,ndvi_mean9,ndvi_mean10,ndvi_mean11,ndvi_mean12,pom,maom,awc
0,1,2025,31300.0,164.560000,43.906667,58.228571,81.232589,118.093751,97.990148,60.714709,...,0.700331,0.714235,0.693309,0.739466,0.643126,0.615236,0.549458,16.789692,26.925089,0.107088
1,2,2025,184000.0,126.800000,42.330769,11.707692,46.002930,72.351152,53.408238,39.734786,...,0.720327,0.616498,0.583740,0.562799,0.616162,0.473378,0.473378,5.252623,18.222063,0.095255
2,3,2025,46800.0,68.745455,34.690909,52.622727,62.827392,94.360099,66.374677,41.375955,...,0.716515,0.686770,0.644032,0.714644,0.749227,0.695991,0.596333,6.453841,21.526911,0.089040
3,4,2025,2994.0,95.166667,11.683333,113.169048,89.419619,104.406527,62.686330,29.133583,...,0.652279,0.614843,0.581315,0.620449,0.563894,0.513561,0.468704,16.788250,28.354742,0.118781
4,5,2025,2909.0,130.337476,19.483234,86.903580,136.834148,123.607652,86.196035,45.508704,...,0.716793,0.682611,0.642635,0.649130,0.589804,0.525526,0.474256,29.324879,31.984188,0.119110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,89,2025,111200.0,88.915000,47.490000,31.580000,51.254029,79.889287,58.134781,39.754453,...,0.674758,0.554936,0.524461,0.554821,0.596817,0.556510,0.454919,5.579002,19.594700,0.102762
88,90,2025,2900.0,150.842857,67.957143,34.371429,71.879419,108.083307,103.911655,71.560859,...,0.785406,0.781311,0.768241,0.806960,0.735240,0.611620,0.569550,16.129911,28.980818,0.105598
89,91,2025,26825.0,99.683333,41.833333,23.883333,122.081341,66.391751,47.935473,33.091919,...,0.719650,0.615899,0.591865,0.665300,0.668850,0.544350,0.544350,4.623960,16.732876,0.088218
90,94,2025,248.0,116.300000,42.100000,20.600000,127.296414,70.483265,50.659089,34.619002,...,0.629448,0.596490,0.590676,0.569400,0.552260,0.464000,0.464000,5.895328,20.545574,0.084148


In [67]:
merged.to_csv("../../wheat_model_current.csv")