In [None]:
import pandas as pd
import glob
import os
import numpy as np

# Folder containing your CSV files
folder = "./data"

# Get all CSV files in the folder
files_hot_days = glob.glob(os.path.join(folder, "*hot_days*.csv"))
files_hot_nights = glob.glob(os.path.join(folder, "*hot_nights*.csv"))

print(files_hot_days)
print(files_hot_nights)

In [None]:
# Load and concatenate all CSVs - hot days
df_list_hot_days = []
for f in files_hot_days:
    df = pd.read_csv(f, sep=",",dtype={"id_ter": str})
    
    filename = os.path.basename(f)
    model_name = filename.split("resultats_")[1].split("_hot")[0]
    df.insert(len(df.columns)-1, 'model_name', model_name)    
    
    df_list_hot_days.append(df)

df_all_hot_days = pd.concat(df_list_hot_days, ignore_index=True)

# Load and concatenate all CSVs - hot nights
df_list_hot_nights = []
for f in files_hot_nights:
    df = pd.read_csv(f, sep=",",dtype={"id_ter": str})

    filename = os.path.basename(f)
    model_name = filename.split("resultats_")[1].split("_hot")[0]
    df.insert(len(df.columns)-1, 'model_name', model_name)    

    df_list_hot_nights.append(df)

df_all_hot_nights = pd.concat(df_list_hot_nights, ignore_index=True)

# Merge 
df_all_hot_days.insert(len(df_all_hot_days.columns) - 1, "variable", "TasMaxAdjust_noSup35")
df_all_hot_nights.insert(len(df_all_hot_nights.columns) - 1, "variable", "TasMinAdjust_noSup20")

df_all = pd.concat([df_all_hot_days, df_all_hot_nights], ignore_index=True)

df_all['experiment'] = df_all['modalite'].str.extract(r'(ssp\d{3})')

print(df_all)

In [None]:
# Assign warming levels

# Define conditions
conditions = [
    (df_all['period'] == '1985-2014'),
    # +2°C
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2037-2056'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2032-2051'),
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('NorESM2') & (df_all['period'] == '2037-2056'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('NorESM2') & (df_all['period'] == '2030-2049'),
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2027-2046'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2022-2041'),
    # +2.7°C
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2063-2082'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2052-2071'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2052-2061'),
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('NorESM2') & (df_all['period'] == '2058-2077'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('NorESM2') & (df_all['period'] == '2049-2068'),
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2044-2063'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2037-2056'),
    # +4°C
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('CNRM-ESM2-1') & (df_all['period'] == '2072-2091'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('NorESM2') & (df_all['period'] == '2069-2088'),
    (df_all['experiment'] == 'ssp370') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2065-2084'),
    (df_all['experiment'] == 'ssp585') & df_all['model_name'].str.contains('EC-Earth3-Veg') & (df_all['period'] == '2055-2074')
]


# Corresponding values for each condition
values = ['Reference', 
          'TRACC_2', 'TRACC_2', 'TRACC_2', 'TRACC_2', 'TRACC_2', 'TRACC_2',
          'TRACC_2.7', 'TRACC_2.7', 'TRACC_2.7', 'TRACC_2.7', 'TRACC_2.7', 'TRACC_2.7', 'TRACC_2.7', 
          'TRACC_4', 'TRACC_4', 'TRACC_4', 'TRACC_4']

# Apply conditions
df_all['niveau_rechauff'] = np.select(conditions, values, default='Other')

print(df_all)

In [None]:
# Check if all entries have a warming level assigned
other_rows = df_all[df_all['niveau_rechauff'] == 'Other']

if not other_rows.empty:
    print("Rows with 'Other' found:")
    print(other_rows)
    raise ValueError("Some entries haven't been assigned a warming level")

print(other_rows)

In [None]:
# Ensure "valeur" is numeric
df_all["valeur"] = pd.to_numeric(df_all["valeur"], errors="coerce")

print(df_all)

In [None]:
# Compute stats
stats = df_all.groupby(["id_ter", "variable", "niveau_rechauff"])["valeur"].agg(
    mean="mean",
    min="min",
    max="max"
).reset_index()

print(stats)

In [None]:
# Convert to long format with _mean, _min, _max
long_df = pd.melt(
    stats,
    id_vars=["id_ter", "variable", "niveau_rechauff"],
    value_vars=["mean", "min", "max"],
    var_name="variable_type",
    value_name="valeur"
)

# Round to 1 decimal
long_df['valeur'] = long_df['valeur'].round(1)

# Add suffix to variable
long_df["variable"] = long_df["variable"] + "_" + long_df["variable_type"]

print(long_df)

In [None]:
# Drop original modalite column and variable_type column and rename variable to modalite
long_df = long_df.drop(columns=["variable_type",], errors="ignore")
long_df = long_df.rename(columns={"variable": "modalite"})

# Drop original period column and rename niveau_rechauff to annee
long_df = long_df.rename(columns={"niveau_rechauff": "annee"})

# Change TRACC level names
long_df.loc[long_df['annee'] == 'Reference', 'annee'] = 'Référence'
long_df.loc[long_df['annee'] == 'TRACC_2', 'annee'] = '+2°C - TRACC 2030'
long_df.loc[long_df['annee'] == 'TRACC_2.7', 'annee'] = '+2.7°C - TRACC 2050'
long_df.loc[long_df['annee'] == 'TRACC_4', 'annee'] = '+4°C - TRACC 2100'

# Change variable name
long_df.loc[long_df['modalite'] == 'TasMaxAdjust_noSup35_mean', 'modalite'] = 'tasmaxadjust_nosup35_mean'
long_df.loc[long_df['modalite'] == 'TasMaxAdjust_noSup35_min', 'modalite'] = 'tasmaxadjust_nosup35_min'
long_df.loc[long_df['modalite'] == 'TasMaxAdjust_noSup35_max', 'modalite'] = 'tasmaxadjust_nosup35_max'
long_df.loc[long_df['modalite'] == 'TasMinAdjust_noSup20_mean', 'modalite'] = 'tasminadjust_nosup20_mean'
long_df.loc[long_df['modalite'] == 'TasMinAdjust_noSup20_min', 'modalite'] = 'tasminadjust_nosup20_min'
long_df.loc[long_df['modalite'] == 'TasMinAdjust_noSup20_max', 'modalite'] = 'tasminadjust_nosup20_max'

print(long_df)

In [None]:
# Save results to file
output_file = os.path.join(folder, "stats.csv")
long_df.to_csv(output_file, index=False)

print("Done! Results saved to:", output_file)
print(long_df)