## Regional Analysis

### Steps

#### Step 1: Selection of regions of interest

In this step we rank regions by their CO2 flux seasonal amplitude variance over time.
For each region, the temporal CO2 amp variance and total area are calculated.
Region to analyze further can then be selcted by ranking them according to variance or weighted variance

1. Load regions data
2. Loop over regions. For each:
    - Calculate the variance in co2 flux seasonal amplitude
    - Calculate the total area
3. Calculate global sum of variance*area
4. Replace raster region indices with variance values and plot maps

### Notes:

For regions, currently using the Koppen-Geiger data from : http://koeppen-geiger.vu-wien.ac.at/


In [None]:
import os
import pandas as pd
import rioxarray as rio
import xarray as xr
import geopandas as gpd
import matplotlib as mpl
from matplotlib.colors import ListedColormap
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

In [None]:
# reading in regions and setting nodata values

# kopp = rio.open_rasterio('../data_input/KoeppenGeiger3_KG3_CRUTS32_Hist_7100.tif', mask_and_scale=False)
kopp = rio.open_rasterio(
    '../data_input/VU-VIENA/KG_1986-2010.grd', mask_and_scale=False)
kopp.rio.write_nodata(32, inplace=True)
kopp = kopp.where(kopp != kopp.rio.nodata)
kopp.rio.write_nodata(32, encoded=True, inplace=True)
kopp = kopp.rio.reproject("EPSG:4326")
# print(f"nodata: {kopp.rio.nodata}")
# print(f"encoded_nodata: {kopp.rio.encoded_nodata}")

In [None]:

# Define required variables

cont_bounds = {'Asia': [24,0,190,81], 'Europe': [-31, 35, 69, 81], 'North America': [-178, 0, -15, 84]}

kopp_ind = np.unique(kopp.data[~np.isnan(kopp.data)]).astype(int)

# Color list and labels (including oceans as layer 32)
kopp_cols = np.array(["#960000", "#FF0000", "#FF6E6E", "#FFCCCC", "#CC8D14", "#CCAA54", "#FFCC00", "#FFFF64", "#007800", "#005000",
          "#003200", "#96FF00", "#00D700", "#00AA00", "#BEBE00", "#8C8C00", "#5A5A00", "#550055", "#820082", "#C800C8",
          "#FF6EFF", "#646464", "#8C8C8C", "#BEBEBE", "#E6E6E6", "#6E28B4", "#B464FA", "#C89BFA", "#C8C8FF", "#6496FF",
          "#64FFFF", "#F5FFFF"])

kopp_labels = np.array(['Af', 'Am', 'As', 'Aw', 'BSh', 'BSk', 'BWh', 'BWk', 'Cfa', 'Cfb',
            'Cfc', 'Csa', 'Csb', 'Csc', 'Cwa', 'Cwb', 'Cwc', 'Dfa', 'Dfb', 'Dfc',
            'Dfd', 'Dsa', 'Dsb', 'Dsc', 'Dsd', 'Dwa', 'Dwb', 'Dwc', 'Dwd', 'EF',
            'ET', 'Ocean'])
            
# If setting ocean layer 32 as missing, do:
kopp_cols = kopp_cols[0:-1]
kopp_labels = kopp_labels[0:-1]


In [None]:
# Plotting function: map plot of flux trends in the northern hemisphere

def plot_global(rast, cols, ticks, labels, fig_size):
    # cols is either a lost of colors or the name of an inbuilt matplotlib colormap
    
    if isinstance(cols, str):
        cmap = mpl.cm.get_cmap(cols)
    else:
        cmap = ListedColormap(cols)

    norm = mpl.colors.BoundaryNorm(np.append(ticks, ticks[-1]+1), ncolors=cmap.N)
    fig = plt.figure(figsize=fig_size)
    ax = fig.add_subplot(
        111,
        projection=ccrs.PlateCarree(),
        facecolor="white"
    )
    p = rast.plot(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cmap=cmap,
        norm=norm,
        add_colorbar=False,
        # vmin=1., # activate vmin and vmax if not using norm
        # vmax=31., 
    )
    # ax.set_global()
    ax.coastlines()
    ax.set_title('Koeppen-Geiger Regions')
    cbar = fig.colorbar(p, label=None, ticks=ticks+np.append(np.diff(ticks)/2, 0.5)) # This sets the tick positions at the middle of the colorbar boxes
    cbar = cbar.ax.set_yticklabels(labels)


In [None]:
# Plot regions to check all looks good.

plot_global(kopp, kopp_cols, kopp_ind, kopp_labels, [17,7])

## Regional analysis

In [None]:
# read in files: co2 data and continent vector file
conts = gpd.read_file('../data_input/continents.geojson')
co2fluxamp = rio.open_rasterio('../data_output/co2fluxAmp_Inv1999.nc')


# Create a template dataframe to save results
regind = np.unique(kopp.data[~np.isnan(kopp.data)]).astype(int)
df_regions = pd.DataFrame({'regind': regind, 'regname': kopp_labels,
                          'regcolor': kopp_cols, 'area': np.nan, 'co2amp_var': np.nan})
# Create a dictionary to hold dataframes for each continent
conts_dict = dict()

# Create area grid for co2fluxamp grid cells
latcos = np.cos(np.deg2rad(co2fluxamp.y))
latcos.name = "weights"
area_co2res = co2fluxamp[1].squeeze()
# Aproximate grid area at Equator
area_co2res = area_co2res.where(area_co2res.isnull(), other=(2 * 111) * (2.5 * 111))

# Reproject kopp to co2 resolution
kopp_rp = kopp.rio.reproject_match(co2fluxamp)


In [None]:
# Function to get region area and variance for a continent

def reg_analysis(cont_name):
    # cont_name is the continent name string as found in the conts geopandas object
    cont = conts[conts['CONTINENT'] == cont_name]
    kopp_sel = kopp_rp.rio.clip(cont.geometry, cont.crs, all_touched=True).rio.clip_box(
        minx=cont_bounds[cont_name][0],
        miny=cont_bounds[cont_name][1],
        maxx=cont_bounds[cont_name][2],
        maxy=cont_bounds[cont_name][3],
        crs="EPSG:4326"
    ).squeeze()  # Get rid of the 'band' dimension
    del kopp_sel['band']
    
    # Make copy of DF to store results
    df_results = df_regions.copy()

    # Create list of region indices
    region_index = np.unique(kopp_sel.data[~np.isnan(kopp_sel.data)]).astype(int)

    # plot_global(kopp_sel, kopp_cols[(region_index-1)], kopp_ind[(region_index-1)], kopp_labels[(region_index-1)], [20,7])
    plot_global(kopp_sel, kopp_cols[(region_index-1)], region_index, kopp_labels[(region_index-1)], [20,7])

    # Loop over region indices and calculate statistics of CO2 fluxes
    for i in range(len(region_index)):  # [15]: #
        roi = region_index[i]
        reg_co2amp = co2fluxamp.where(kopp_sel == roi)
        reg_area = area_co2res.where(kopp_sel == roi).weighted(latcos)
        totarea = reg_area.sum().data
        mean_var = reg_co2amp.var(dim='year').mean().data
        df_results.loc[df_results['regind'] ==
                       roi, 'ampvar'] = np.round(mean_var, 3)
        df_results.loc[df_results['regind'] == roi, 'area'] = np.round(totarea)

    df_results = df_results.loc[~df_results['area'].isna(),:]
    df_results['ampvararea'] = np.round(
        df_results['area'] * df_results['ampvar'])
    df_results = df_results.sort_values('ampvararea', ascending=False)
    df_results['ampvararea_sum_cont'] = df_results['ampvararea'].cumsum()
    amparea_sum = df_results['ampvararea'].sum()

    print('Total amparea sum = : ', amparea_sum, '\n')
    print('80% of amparea sum = : ', amparea_sum * 0.8, '\n')

    # Explore data
    df_results.sort_values('area', ascending=False).plot.bar('regname', 'area')
    df_results.sort_values('ampvar', ascending=False).plot.bar(
        'regname', 'ampvar')
    df_results.sort_values('ampvararea', ascending=False).plot.bar(
        'regname', 'ampvararea')
    
    df_results['continent'] = cont_name

    return (df_results)


### Analysis for each NH continent

In [None]:
df_co2_as = reg_analysis("Asia")

In [None]:
df_co2_eu = reg_analysis("Europe")

In [None]:
df_co2_na = reg_analysis("North America")

In [None]:
# Concatenate data frames
df_co2_glob = pd.concat([df_co2_as, df_co2_eu, df_co2_na], axis=0)
df_co2_glob = df_co2_glob.sort_values('ampvararea', ascending=False)
df_co2_glob['ampvararea_sum_glob'] = df_co2_glob['ampvararea'].cumsum()

In [None]:
# Calculating total area
amparea_sum = df_co2_glob['ampvararea'].sum()
print('Total amparea sum = : ', amparea_sum, '\n')
print('80% of amparea sum = : ', amparea_sum * 0.8, '\n')

In [None]:
# Create rasters with amp and area*amp values instead of region indices

ds_kopp_co2 = xr.Dataset({'region_index': kopp_rp})

def recodeConts():

    ampvararea_out = dict()
    ampvar_out = dict()
    cont_names = ['Asia', 'Europe', 'North America']

    for cont_name in cont_names:
        cont = conts[conts['CONTINENT'] == cont_name]
        kopp_sel = kopp_rp.rio.clip(cont.geometry, cont.crs, all_touched=True).rio.clip_box(
            minx=cont_bounds[cont_name][0],
            miny=cont_bounds[cont_name][1],
            maxx=cont_bounds[cont_name][2],
            maxy=cont_bounds[cont_name][3],
            crs="EPSG:4326"
        ).squeeze()  # Get rid of the 'band' dimension
        del kopp_sel['band']

        # Create list of region indices
        region_index = np.unique(kopp_sel.data[~np.isnan(kopp_sel.data)]).astype(int)
        kopp_ampvararea = kopp_sel
        kopp_ampvar = kopp_sel

        for i in range(len(region_index)):
            regind = region_index[i]
            ampvararea = df_co2_glob.loc[(df_co2_glob['continent'] == cont_name) & (df_co2_glob['regind'] == regind), 'ampvararea'].to_numpy()
            ampvar = df_co2_glob.loc[(df_co2_glob['continent'] == cont_name) & (df_co2_glob['regind'] == regind), 'ampvar'].to_numpy()
            kopp_ampvararea = xr.where(kopp_ampvararea == regind, ampvararea, kopp_ampvararea)
            kopp_ampvar = xr.where(kopp_ampvar == regind, ampvar, kopp_ampvar)
        
        ampvararea_out[cont_name] = kopp_ampvararea
        ampvar_out[cont_name] = kopp_ampvar
    
    kopp_ampvararea = ampvararea_out['Asia'].combine_first(ampvararea_out['Europe']).combine_first(ampvararea_out['North America'])
    kopp_ampvar = ampvar_out['Asia'].combine_first(ampvar_out['Europe']).combine_first(ampvar_out['North America'])
    ds_kopp_co2['ampvararea'] = kopp_ampvararea
    ds_kopp_co2['ampvar'] = kopp_ampvar

recodeConts()

In [None]:
# Make map plots of (area weighted) seasonal co2 flux amp variance

def plotGradMap(rast, cols, title):
    cmap = mpl.cm.get_cmap(cols)
    fig = plt.figure(figsize=[30,7])
    ax = fig.add_subplot(
        111,
        projection=ccrs.PlateCarree(),
        facecolor="white"
    )
    p = rast.plot(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cmap=cmap,
        # add_colorbar=False,
    )
    ax.coastlines()
    ax.set_title(title)

ds_kopp_co2['ampvararea'].attrs['long_name'] = 'CO2 Flux Amplitude Variance * Region Area (sqkm)'
plotGradMap(ds_kopp_co2['ampvararea'], 'Oranges', 'Regional Area-Weighted Temporal Variance in CO2 Seasonal Amplitude')

ds_kopp_co2['ampvar'].attrs['long_name'] = 'CO2 Flux Amplitude Variance'
plotGradMap(ds_kopp_co2['ampvar'], 'Oranges', 'Regional Temporal Variance in CO2 Seasonal Amplitude')