In [1]:
import pandas as pd
import folium

# load csv file in dataframe
df = pd.read_csv('data/weather/postesSynop.csv', sep=";")

# filter to keep only station on metropole
df_metro = df[(df['Latitude'] >= 41) & (df['Latitude'] <= 51) & (df['Longitude'] >= -5) & (df['Longitude'] <= 10)]

# create map of France center on middle of France
map_of_france = folium.Map(location=[46.603354, 1.888334], zoom_start=6)  # Ce sont les coordonnées approximatives du centre de la France

# add station to map
for _, row in df_metro.iterrows():
    lat, lon = row['Latitude'], row['Longitude']  # Remplacez 'latitude' et 'longitude' par les noms exacts des colonnes si elles sont différentes
    folium.Marker([lat, lon], tooltip=row['Nom']).add_to(map_of_france)  # Remplacez 'nom_de_la_station' par le nom exact de la colonne contenant le nom de la station si elle est différente

# plot map
map_of_france.save('stations_meteo_france.html')  # Sauvegarde la carte dans un fichier HTML
map_of_france


In [2]:
import pandas as pd



# files list to read
files = [f"data/weather/data/synop.{year}{month:02d}.csv" for year in range(2017, 2024) for month in range(1, 13)]

# list to stock temporary dataframe
dfs = []

for file in files:
    try:
        df = pd.read_csv(file, sep=";")

        # filter on selected column
        cols_of_interest = ['numer_sta', 'date', 't', 'u', 'pres', 'etat_sol', 'rr3']
        df = df[cols_of_interest]

        #transform numerical value in float or Nan if no value
        for col in ['t', 'u', 'pres', 'rr3']:
            df[col] = pd.to_numeric(df[col], errors = 'coerce')

        #convert temperature from Kelvin to Celsius
        df['t'] = df['t'] - 273.15

        # filter based on station existing in df_metro
        df = df[df['numer_sta'].isin(df_metro['ID'])]

        # data column transformation
        df['date'] = df['date'].astype(str).str[:8]  # keep only yyyymmdd 

        # group by dates and make statistics
        grouped = df.groupby(['numer_sta', 'date'])
        daily = grouped.agg({
            't': ['mean', 'min', 'max'],
            'u': 'mean',
            'pres': 'mean',
            'etat_sol': 'max',
            'rr3': 'sum'
        }).reset_index()



        # renamed column
        daily.columns = ['numer_sta', 'date', 't_mean', 't_min', 't_max', 'u_mean', 'pres_mean', 'etat_sol_max', 'rr3_sum']
        #round temperature to 1
        daily['t_mean'] = daily['t_mean'].round(1)
        dfs.append(daily)
        
    except FileNotFoundError:
        # if files doesn't exist, continue
        continue

# concatenante all dataframe in one
df_weather = pd.concat(dfs, ignore_index=True)




In [16]:
import pandas as pd


# replace station id by name
df_weather = df_weather.merge(df_metro[['ID', 'Nom']], left_on='numer_sta', right_on='ID', how='left')
df_weather.drop(columns=['numer_sta', 'ID'], inplace=True)
df_weather.rename(columns={'Name': 'station'}, inplace=True)

# convert date type in datetime
df_weather['date'] = pd.to_datetime(df_weather['date'], format='%Y%m%d')

# define data as index
df_weather.set_index('date', inplace=True)


print(df_weather)

# save dataframe
df_weather.to_csv("data/weather/weather_data.csv", index=False)

            t_mean  t_min  t_max     u_mean      pres_mean etat_sol_max  \
date                                                                      
2017-01-01    -1.9   -5.1    0.6  96.875000  101242.500000           mq   
2017-01-02     1.6    0.0    4.4  94.625000  101682.500000           mq   
2017-01-03     2.1   -1.9    6.0  88.500000  101971.250000           mq   
2017-01-04     4.9    0.3    6.8  84.375000  101523.750000           mq   
2017-01-05     2.1    0.2    5.2  91.375000  102366.250000           mq   
...            ...    ...    ...        ...            ...          ...   
2023-08-20    28.3   21.8   34.0  51.500000  101581.250000           mq   
2023-08-21    28.8   22.5   34.9  52.125000  101682.500000           mq   
2023-08-22    29.5   24.6   34.1  47.125000  101487.500000           mq   
2023-08-23    28.7   23.4   33.3  55.500000  101345.000000           mq   
2023-08-24    23.8   22.6   25.3  56.333333  101383.333333           mq   

            rr3_sum     