In [1]:
import pyam
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from pathlib import Path

<IPython.core.display.Javascript object>

Step 1: Let us read in the necessary data.

In [2]:
df = pyam.IamDataFrame(
    Path(
        '../data/101_data.xlsx'
    )
)

pyam - INFO: Running in a notebook, setting up a basic logging at level INFO
pyam.core - INFO: Reading file ../data/101_data.xlsx
pyam.core - INFO: Reading meta indicators


In [3]:
df_cdr = (
    df
    .filter(
        variable='*Carbon Removal*'
    )
    .convert_unit(
        current='Gt CO2/yr',
        to='Gt CO2/yr',
        factor=-1
    )
)

In [4]:
df_compiled = pyam.concat(
    [
        df_cdr,
        df.filter(
            variable='*Carbon Removal*',
            keep=False
        )
    ]
)

Step 2: We first want to identify the regional contributions to the total mitigation burden (i.e., net GHG emission reductions) in the following time periods:
* 2020 - 2030
* 2030 - 2050
* 2050 - 2100

In [5]:
time_periods = [
    (2020, 2030),
    (2030, 2050),
    (2050, 2100)
]

In [6]:
variable = 'AR6 Reanalysis|OSCARv3.2|Emissions|Kyoto Gases|Direct Only'

In [7]:
for start, end in time_periods:
    # First filter for the necessary data
    df_net = (
        df_compiled
        .filter(year=range(start, end+1))
        .offset(year=start)
    )
    # Next calculate the total mitigation burden
    df_compiled.set_meta(
        meta=(
            df_net
            .filter(variable=variable, year=end, region='World')
            .timeseries()
            .droplevel(['region', 'variable', 'unit'])[end]
        ),
        name=f'total_mitigation_{start}_{end}'
    )
    # Per region, calculate the contribution to these reductions
    for region in df_compiled.region:
        ts = (
            df_net
            .filter(region=region, variable=variable)
            .timeseries()
            .apply(
                lambda x: x * 100 / df_compiled.meta.loc[x.name[0:2], f'total_mitigation_{start}_{end}'],
                axis=1
            )
            .round(0)
        )
        df_compiled.set_meta(
            meta=ts.droplevel(['region', 'variable', 'unit'])[end],
            name=f'contribution_{region}_{start}_{end}'
        )

In [8]:
cols = [
    x for x in df_compiled.meta.columns if 'contribution' in x
]


In [9]:
cols.append('Category')

In [10]:
data_net = df_compiled.meta.loc[:, cols]

In [11]:
data_net = data_net.reset_index()

In [12]:
def format_output_data_frame(df, cols_select,  variable_name, summarise=True):
    """Function to help format the output dataframe"""
    cols_to_melt = set(cols_select) - set('Category')
    melted_data = pd.melt(
        frame=df,
        id_vars=['model', 'scenario', 'Category'],
        value_vars=cols_to_melt
    )
    #Now, we split the variable column
    melted_data.loc[:, 'region'] = (
        melted_data.loc[:,'variable'].apply(
            lambda x: x.split('_')[1]
        )
    )
    melted_data.loc[:, 'timeframe'] = (
        melted_data.loc[:,'variable'].apply(
            lambda x: x.split('_')[2] + '-' + x.split('_')[3]
        )
    )
    melted_data.loc[:, 'variable'] = variable_name
    if summarise:
        summarised_data_raw = (
            melted_data
            .groupby(['Category', 'region', 'timeframe'])['value']
            .describe()
            .loc[:, ['25%', '50%', '75%']]
        )
        summarised_data_raw.loc[:, 'summary'] = (
            summarised_data_raw.apply(
                lambda x: f"{x['50%']}% ({x['25%']}-{x['75%']})",
                axis=1
            )
            
        )
        summarised_data_clean = summarised_data_raw.loc[:, 'summary'].unstack(level=-1)
        summarised_data_clean.loc[:, 'variable'] = variable_name
    return melted_data, summarised_data_clean

In [13]:
formatted_net_data, summary_net_data = format_output_data_frame(
    data_net, 
    cols, 
    'contribution_net_kyoto'
)

In [14]:
print(summary_net_data.to_markdown())

|                       | 2020-2030            | 2030-2050            | 2050-2100            | variable               |
|:----------------------|:---------------------|:---------------------|:---------------------|:-----------------------|
| ('C1', 'R5ASIA')      | 45.0% (40.0-48.0)    | 35.0% (32.0-39.0)    | 47.0% (26.5-85.75)   | contribution_net_kyoto |
| ('C1', 'R5LAM')       | 11.0% (9.0-12.25)    | 12.0% (11.0-13.0)    | 7.5% (-41.0-20.25)   | contribution_net_kyoto |
| ('C1', 'R5MAF')       | 9.5% (8.0-11.0)      | 15.0% (13.0-16.0)    | 14.5% (0.75-48.75)   | contribution_net_kyoto |
| ('C1', 'R5OECD90+EU') | 25.0% (24.0-28.0)    | 30.0% (25.0-31.0)    | 19.5% (4.75-26.0)    | contribution_net_kyoto |
| ('C1', 'R5REF')       | 8.0% (7.0-8.0)       | 8.0% (6.0-9.0)       | 9.0% (5.75-12.0)     | contribution_net_kyoto |
| ('C1', 'World')       | 100.0% (100.0-100.0) | 100.0% (100.0-100.0) | 100.0% (100.0-100.0) | contribution_net_kyoto |
| ('C2', 'R5ASIA')      | 41.5% (28.25-4

In [15]:
summary_net_data.to_excel(
    Path(
        'figures/SI_table2.xlsx'
    )
)

Step 3: Now, for each region, we want to take a closer look at the role of gross reductions, non CO2 and CDR.

In [16]:
def crunch_through_and_summarise_per_region(df, region):
    """Helper function to crunch through the regional summaries"""
    total_variable = 'AR6 Reanalysis|OSCARv3.2|Emissions|Kyoto Gases|Direct Only'
    component_variables = [
        ('AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross', 'gross'),
        ('AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only', 'nonco2'),
        ('AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total', 'cdr')    
    ]
    # Step 1: Pull out the regional data
    regional_data = (
        df
        .filter(
            region=region
        )
    )
    # Step 2
    for start, end in time_periods:
        # First calculate the total mitigation
        df_net = (
            regional_data
            .filter(year=range(start, end+1))
            .offset(year=start)
        )
        # Calculate the total mitigation burden
        regional_data.set_meta(
            meta=(
                df_net
                .filter(variable=total_variable, year=end)
                .timeseries()
                .droplevel(['region', 'variable', 'unit'])[end]
            ),
            name=f'total_mitigation_{start}_{end}'
        )
        # Per variable calculate the contributions to these reductions
        for variable, label in component_variables:
            ts = (
                df_net
                .filter(variable=variable)
                .timeseries()
                .apply(
                    lambda x: x * 100 / regional_data.meta.loc[x.name[0:2], f'total_mitigation_{start}_{end}'],
                    axis=1
                )
                .round(0)
            )
            regional_data.set_meta(
                meta=ts.droplevel(['region', 'variable', 'unit'])[end],
                name=f'{label}_{region}_{start}_{end}'
            )
    # Step 3
    cols = [x[1] for x in component_variables]
    df = []
    df_sum = []
    for var, label in component_variables:
        cols = [x for x in regional_data.meta if f'{label}_{region}' in x]
        cols.append('Category')
        out, out_sum = (
            format_output_data_frame(
                regional_data.meta.reset_index(),
                cols_select=cols,
                variable_name=var
            )
        )
        df.append(out)
        df_sum.append(out_sum)
    
    compiled = pd.concat(df)
    summary = (
        pd.concat(df_sum)
        .set_index('variable', append=True)
        .sort_index(level=0)
    )
    return compiled, summary

In [17]:
regs = [x for x in df_compiled.region if 'World' not in x]

In [18]:
regs

['R5ASIA', 'R5LAM', 'R5MAF', 'R5OECD90+EU', 'R5REF']

In [19]:
final_reg_summary = []
for r in regs:
    _, sum = crunch_through_and_summarise_per_region(
        df_compiled,
        r
    )
    final_reg_summary.append(sum)

In [21]:
final_reg_summary_pd = pd.concat(final_reg_summary)

In [24]:
final_reg_summary_pd.to_excel(
    Path(
        'figures/SI_table3.xlsx'
    )
)