In [62]:
import requests
from config import DATA_DOWNLOAD_URL
import pandas as pd
import io
import gzip
import numpy as np
from datetime import datetime

In [63]:
df = pd.DataFrame(columns=["DATE", "DEP", "TN", "TX", "TM", "UM"])
df2024 = pd.DataFrame(columns=["DATE", "DEP", "TN", "TX", "TM", "UM"])

In [64]:
#data for current market year
for i in range(95):
    urlTP = f"https://object.files.data.gouv.fr/meteofrance/data/synchro_ftp/BASE/QUOT/Q_{i+1:02d}_previous-1950-2023_RR-T-Vent.csv.gz"
    responseTP = requests.get(urlTP)

    urlRH = f"https://object.files.data.gouv.fr/meteofrance/data/synchro_ftp/BASE/QUOT/Q_{i+1:02d}_previous-1950-2023_autres-parametres.csv.gz" 
    responseRH = requests.get(urlRH)

    compressedDataTP = io.BytesIO(responseTP.content)
    decompressedDataTP = gzip.GzipFile(fileobj=compressedDataTP)
    compressedDataRH = io.BytesIO(responseRH.content)
    decompressedDataRH = gzip.GzipFile(fileobj=compressedDataRH)

    tmpTP = pd.read_csv(decompressedDataTP, sep=";")
    tmpRH = pd.read_csv(decompressedDataRH, sep=";")
    tmp = pd.merge(tmpTP, tmpRH, on=["NUM_POSTE", "AAAAMMJJ"], how="outer")
    tmp["DATE"] = pd.to_datetime(tmp["AAAAMMJJ"], format="%Y%m%d") #string to date
    #tmp = tmp[tmp["DATE"] >= "2020-01-01"].reset_index(drop=True) #filter by date
    tmp['DEP'] = i+1
    tmp = tmp[["DATE", "DEP", "TN", "TX", "TM", "UM"]] #keep only, in order : Date, departement code, temp min, temp max, moy temp and relative humidity
    df = pd.concat([df, tmp])
    
df['YEAR-MONTH'] = pd.to_datetime(df["DATE"]).dt.to_period('M') #new column with YYYY-MM format
df = df.dropna()
df = df.sort_values(by="DATE").reset_index(drop=True)

svpMin = 610.78 * np.exp(df["TN"] / (df["TN"] + 237.3) * 17.2694)
svpMean = 610.78 * np.exp(df["TM"] / (df["TM"] + 237.3) * 17.2694)
svpMax = 610.78 * np.exp(df["TX"] / (df["TX"] + 237.3) * 17.2694)
vpdMin = svpMin * (1 - df["UM"]/100) / 1000 #divided by 1000 to convert to kPa
vpdMean = svpMean * (1 - df["UM"]/100) / 1000
vpdMax = svpMax * (1 - df["UM"]/100) / 1000
df["vpd_min"] = vpdMin
df["vpd_mean"] = vpdMean
df["vpd_max"] = vpdMax

  df = pd.concat([df, tmp])


In [71]:
for i in range(95):
    urlTP = f"https://object.files.data.gouv.fr/meteofrance/data/synchro_ftp/BASE/QUOT/Q_{i+1:02d}_latest-2024-2025_RR-T-Vent.csv.gz"
    responseTP = requests.get(urlTP)

    urlRH = f"https://object.files.data.gouv.fr/meteofrance/data/synchro_ftp/BASE/QUOT/Q_{i+1:02d}_latest-2024-2025_autres-parametres.csv.gz" 
    responseRH = requests.get(urlRH)

    compressedDataTP = io.BytesIO(responseTP.content)
    decompressedDataTP = gzip.GzipFile(fileobj=compressedDataTP)
    compressedDataRH = io.BytesIO(responseRH.content)
    decompressedDataRH = gzip.GzipFile(fileobj=compressedDataRH)

    tmpTP = pd.read_csv(decompressedDataTP, sep=";")
    tmpRH = pd.read_csv(decompressedDataRH, sep=";")
    tmp = pd.merge(tmpTP, tmpRH, on=["NUM_POSTE", "AAAAMMJJ"], how="outer")
    tmp["DATE"] = pd.to_datetime(tmp["AAAAMMJJ"], format="%Y%m%d") #string to date
    tmp = tmp[tmp["DATE"] <= "2024-12-31"].reset_index(drop=True) #filter by date
    tmp['DEP'] = i+1
    tmp = tmp[["DATE", "DEP", "TN", "TX", "TM", "UM"]] #keep only, in order : Date, departement code, précipitations, temp min, temp max, moy temp and relative humidity
    df2024 = pd.concat([df2024, tmp])

df2024['YEAR-MONTH'] = pd.to_datetime(df2024["DATE"]).dt.to_period('M') #new column with YYYY-MM format
df2024 = df2024.dropna()
df2024 = df2024.sort_values(by="DATE").reset_index(drop=True)

svpMin = 610.78 * np.exp(df2024["TN"] / (df2024["TN"] + 237.3) * 17.2694)
svpMean = 610.78 * np.exp(df2024["TM"] / (df2024["TM"] + 237.3) * 17.2694)
svpMax = 610.78 * np.exp(df2024["TX"] / (df2024["TX"] + 237.3) * 17.2694)
vpdMin = svpMin * (1 - df2024["UM"]/100) / 1000 #divided by 1000 to convert to kPa
vpdMean = svpMean * (1 - df2024["UM"]/100) / 1000
vpdMax = svpMax * (1 - df2024["UM"]/100) / 1000
df2024["vpd_min"] = vpdMin
df2024["vpd_mean"] = vpdMean
df2024["vpd_max"] = vpdMax

In [72]:
fullDf = pd.concat([df, df2024])

In [73]:
fullDf

Unnamed: 0,DATE,DEP,TN,TX,TM,UM,YEAR-MONTH,vpd_min,vpd_mean,vpd_max
0,1950-01-01,6,4.2,15.2,8.4,79.0,1950-01,0.173196,0.231481,0.362735
1,1950-01-01,21,-3.0,2.3,-0.7,73.0,1950-01,0.132196,0.156696,0.194645
2,1950-01-01,42,-0.6,0.6,-0.2,91.0,1950-01,0.052616,0.054175,0.057417
3,1950-01-01,21,-7.5,2.0,-3.0,85.0,1950-01,0.052143,0.073442,0.105842
4,1950-01-01,11,4.2,9.8,7.0,88.0,1950-01,0.098969,0.120217,0.145384
...,...,...,...,...,...,...,...,...,...,...
269386,2024-12-31,45,1.4,3.6,2.4,98.0,2024-12,0.013518,0.014521,0.015812
269387,2024-12-31,51,-0.9,2.7,1.0,100.0,2024-12,0.000000,0.000000,0.000000
269388,2024-12-31,57,-3.0,-0.6,-2.2,100.0,2024-12,0.000000,0.000000,0.000000
269389,2024-12-31,58,-1.4,0.7,-0.7,97.0,2024-12,0.016538,0.017411,0.019278


In [74]:
yearlyVpd = fullDf[["DEP", "YEAR-MONTH", "vpd_min", "vpd_max", "vpd_mean"]].groupby(["DEP", "YEAR-MONTH"]).mean()[["vpd_min", "vpd_max", "vpd_mean"]].reset_index()
yearlyVpd

Unnamed: 0,DEP,YEAR-MONTH,vpd_min,vpd_max,vpd_mean
0,1,1950-01,0.065089,0.105477,0.082978
1,1,1950-02,0.165983,0.314905,0.218654
2,1,1950-03,0.158750,0.392141,0.248884
3,1,1950-04,0.188112,0.410970,0.275093
4,1,1950-05,0.339302,0.746062,0.509773
...,...,...,...,...,...
75150,95,2024-08,0.517661,1.078706,0.740100
75151,95,2024-09,0.276332,0.478038,0.353738
75152,95,2024-10,0.136135,0.221634,0.169487
75153,95,2024-11,0.112626,0.168526,0.137836


In [75]:
yearlyVpd.to_csv("../YieldModel/data/forecast/currentYear/vpd_historical_1950_2024.csv")