In [1]:
import os
os.getcwd()

'/home/jovyan/ArditArifi'

In [2]:
source_path="/home/jovyan/prepared-data/POLAR_EMISS_DATA/CMIP6"
output_path="/home/jovyan/student-storages/GROUP3/ArditArifi/output"

! mkdir -p output_path

Analysis

In [3]:
import glob
# ANALYSIS
import xarray as xr
import pandas as p
import numpy as np
# PLOTTING
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.colors as colors
# USER 
import area_weight as aw


In [4]:

# REGIONS
region_label = ["ARC", "ANT"]
arctic_lat = [60, 90]
antarctic_lat = [-90, -60]

# SEASONS
seasons_label = ["DJF", "MAM", "JJA", "SON"]
seasons = {
    "DJF": [12, 1, 2],   # Dec, Jan, Feb
    "MAM": [3, 4, 5],    # Mar, Apr, May
    "JJA": [6, 7, 8],    # Jun, Jul, Aug
    "SON": [9, 10, 11],  # Sep, Oct, Nov
}

# Create combined region-season labels
region_season_label = [f"{region} {season}" for region in region_label for season in seasons_label]

# EXPERIMENTS
experiments = ["2xss"]#, "2xdust", "2xfire", "2xDMS"]

# MODELS
models = ["IPSL-CM6A-LR-INCA", "UKESM1-0-LL", "NorESM2-LM"]

# PATTERN FUNCTION
patter = lambda model, var, exp: f"{source_path}/{var}*{model}*-{exp}*.nc"

In [5]:
# CONSTRUCT TABLE
mean_values = np.full(
    shape=(len(region_season_label), len(experiments), len(models)),
    fill_value=np.nan
)
std_values = np.full(
    shape=(len(region_season_label), len(experiments), len(models)),
    fill_value=np.nan
)


# LOAD DATA
model = ""
var = "rtmt"
weight_function = aw.area_weight_haversine

# Calculate total steps for progress
total_steps = len(models) * len(experiments) * len(seasons_label)
current_step = 0

In [6]:
# MAIN LOOP
for idx_model, model in enumerate(models):
    print('hi')
    try:
        # Open control dataset for this model
        ds_ctl = xr.open_mfdataset(patter(model, var, "control"), combine="by_coords")
    except Exception as e:
        print(f"Error opening control files {patter(model, var, 'control')}: {e}")
        # Skip all seasons for this model if control is missing
        current_step += len(seasons_label) * len(experiments)
        continue
    
    for idx_exp, exp in enumerate(experiments):
        # Try opening the experiment dataset
        try:
            ds_exp = xr.open_mfdataset(patter(model, var, exp), combine="by_coords")
        except Exception as e:
            print(f"Error opening experiment files {patter(model, var, exp)}: {e}")
            current_step += len(seasons_label)
            continue

        # Compute bias as experiment minus control
        ds_bias = ds_exp[var] - ds_ctl[var]

        for idx_season, season in enumerate(seasons_label):
            # Increment progress
            current_step += 1
            pct = (current_step / total_steps) * 100
            print(f"Progress: {current_step}/{total_steps} -> {pct:.2f}%")

            # 1) Select months for this season
            ds_season = ds_bias.sel(time=ds_bias.time.dt.month.isin(seasons[season]))

            # ---------------------
            # ARCTIC
            # ---------------------
            ds_season_arctic = ds_season.sel(
                lat=slice(arctic_lat[0], arctic_lat[1]),
            )
            # First, take time mean (removes the 'time' dimension)
            ds_arctic_time_mean = ds_season_arctic.mean(dim="time")

            # Define weights after subsetting region
            weights_arctic_2d = aw.area_weight_haversine( ds_season_arctic.lat.values , ds_season_arctic.lon.values  )
            ds_arctic_time_mean = ds_arctic_time_mean * weights_arctic_2d
            
            # Now do the weighted mean/std over lat/lon
            arc_mean = ds_arctic_time_mean.mean(dim=("lat","lon")).values
            arc_std  = ds_arctic_time_mean.std(dim=("lat","lon")).values
            print(arc_mean)

            mean_values[idx_season, idx_exp, idx_model] = arc_mean
            std_values[idx_season, idx_exp, idx_model]  = arc_std
            
            # ---------------------
            # ANTARCTIC
            # ---------------------
            ds_season_antarctic = ds_season.sel(
                lat=slice(antarctic_lat[0], antarctic_lat[1]),
            )
            # Time mean first
            ds_antarctic_time_mean = ds_season_antarctic.mean(dim="time")

            # Define weights after subsetting region
            weights_antarctic_2d = aw.area_weight_haversine( ds_season_antarctic.lat.values , ds_season_antarctic.lon.values  )
            ds_antarctic_time_mean = ds_antarctic_time_mean * weights_antarctic_2d

            ant_mean = ds_antarctic_time_mean.mean(dim=("lat","lon")).values
            ant_std  = ds_antarctic_time_mean.std(dim=("lat","lon")).values

            mean_values[idx_season + 4, idx_exp, idx_model] = ant_mean
            print(ant_mean)
            std_values[idx_season + 4, idx_exp, idx_model]  = ant_std
            
# Finally, display the populated array
print("Shape of mean_values:", mean_values.shape)
print(mean_values)

Progress: 1/12 -> 8.33%
0.1760444846214725
-0.33843733467809894
Progress: 2/12 -> 16.67%
-0.27108387318322547
-0.07892540282463568
Progress: 3/12 -> 25.00%
-0.4867704097039545
0.014462311218400779
Progress: 4/12 -> 33.33%
-0.10809622689804982
-0.024917476263721
Progress: 5/12 -> 41.67%
-0.3314394852181358
-0.3736942795446541
Progress: 6/12 -> 50.00%
0.14247118344685075
-0.18367146334947282
Progress: 7/12 -> 58.33%
-0.23740043691358328
0.0899258598975459
Progress: 8/12 -> 66.67%
-0.08501090363067462
-0.31398046627966114
Progress: 9/12 -> 75.00%
-0.6142359111554327
-1.0149925489307943
Progress: 10/12 -> 83.33%
-0.16232835782899752
-0.25854572717065183
Progress: 11/12 -> 91.67%
-0.298342368100128
-0.1189752583983575
Progress: 12/12 -> 100.00%
-0.13904817291858132
-0.59636298166125
Shape of mean_values: (8, 1, 3)
[[[ 0.17604448 -0.33143949 -0.61423591]]

 [[-0.27108387  0.14247118 -0.16232836]]

 [[-0.48677041 -0.23740044 -0.29834237]]

 [[-0.10809623 -0.0850109  -0.13904817]]

 [[-0.33843

In [7]:
import pandas as pd

bias = mean_values
for idx_exp in range(len(experiments)):
    # This will skip the control experiment at index 0
    exp_name = experiments[idx_exp]
    print("Experiment:", exp_name)
    
    # bias[:, idx_exp, :].shape -> (len(region_season_label), len(models))
    # bias[:, idx_exp, :].transpose().shape -> (len(models), len(region_season_label))
    # We want models as rows and region_season_label as columns
    df = pd.DataFrame(
        data=bias[:, idx_exp, :].transpose(),
        index=models,
        columns=region_season_label
    )

    print(df)
    print()  # Blank line for readability

Experiment: 2xss
                    ARC DJF   ARC MAM   ARC JJA   ARC SON   ANT DJF   ANT MAM  \
IPSL-CM6A-LR-INCA  0.176044 -0.271084 -0.486770 -0.108096 -0.338437 -0.078925   
UKESM1-0-LL       -0.331439  0.142471 -0.237400 -0.085011 -0.373694 -0.183671   
NorESM2-LM        -0.614236 -0.162328 -0.298342 -0.139048 -1.014993 -0.258546   

                    ANT JJA   ANT SON  
IPSL-CM6A-LR-INCA  0.014462 -0.024917  
UKESM1-0-LL        0.089926 -0.313980  
NorESM2-LM        -0.118975 -0.596363  



In [23]:
import pandas as pd
import numpy as np

# Suppose the below variables are already defined
# bias : shape (len(region_season_label), len(experiments)-1, len(models))
# region_season_label : list of region/season names
# experiments : list of experiments, with experiment[0] = "control"
# models : list of model names

all_data = []  # Will hold the melted DataFrame for each experiment

for idx_exp in range(len(experiments) - 1):
    exp_name = experiments[idx_exp + 1]
    print("Experiment:", exp_name)

    # bias[:, idx_exp, :].shape -> (n_region_season, n_models)
    # transpose -> (n_models, n_region_season)
    df = pd.DataFrame(
        data=bias[:, idx_exp, :].transpose(),
        index=models,
        columns=region_season_label
    )

    # Reshape df to "long" format with columns [Model, region_season_label, value]
    df_long = df.reset_index().melt(
        id_vars="index",                 # "index" column is the model
        var_name="RegionSeason",         # name for the melted columns
        value_name="Bias"               # name for the melted values
    )

    # Rename "index" -> "Model"
    df_long.rename(columns={"index": "Model"}, inplace=True)

    # Add a column for Experiment
    df_long["Experiment"] = exp_name

    # Append to the list
    all_data.append(df_long)

# Concatenate all experiments into one DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Optionally reorder columns (Experiment, Model, RegionSeason, Bias)
final_df = final_df[["Experiment", "Model", "RegionSeason", "Bias"]]

# Save to CSV
final_df.to_csv("all_bias_results.csv", index=False)

print("CSV file saved: all_bias_results.csv")


Experiment: 2xss
Experiment: 2xdust
Experiment: 2xfire
Experiment: 2xDMS
CSV file saved: all_bias_results.csv
