In [1]:
import pyam
import pandas as pd
import numpy as np
import copy

import matplotlib.pyplot as plt

from pathlib import Path

<IPython.core.display.Javascript object>

In [2]:
%load_ext nb_black

<IPython.core.display.Javascript object>

Step 1: Let us read in the necessary data.

In [3]:
df = pyam.IamDataFrame(
    Path(
        '../data/101_data.xlsx'
    )
)

pyam - INFO: Running in a notebook, setting up a basic logging at level INFO
pyam.core - INFO: Reading file ../data/101_data.xlsx
pyam.core - INFO: Reading meta indicators


<IPython.core.display.Javascript object>

In [4]:
df_cdr = (
    df
    .filter(
        variable='*Carbon Removal*'
    )
    .convert_unit(
        current='Gt CO2/yr',
        to='Gt CO2/yr',
        factor=-1
    )
)

<IPython.core.display.Javascript object>

In [5]:
df_compiled = pyam.concat(
    [
        df_cdr,
        df.filter(
            variable='*Carbon Removal*',
            keep=False
        )
    ]
)

<IPython.core.display.Javascript object>

Step 2: Now we want to assess the following elements for all the regions (including the World region):
* Cumulative gross CO2 emissions between 2020 - netzero and netzero - 2100
* Cumulative CDR between 2020 - netzero and netzero - 2100
We want to assess this for each pathway category separately.

In [6]:
plot_data_panel_a = dict()

<IPython.core.display.Javascript object>

In [7]:
df_compiled.region

['R5ASIA', 'R5LAM', 'R5MAF', 'R5OECD90+EU', 'R5REF', 'World']

<IPython.core.display.Javascript object>

In [8]:
variable_name_maps = [
    ('AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross', 'gross'),
    ('AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only', 'nonco2'),
    ('AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total', 'cdr')    
]

<IPython.core.display.Javascript object>

In [9]:
df_compiled.meta['Year of netzero CO2 emissions (Harm-Infilled) table'].unique()

array([2090, 2077, 2064, 2046, 2100, 2067, 2060, 2070, 2074, 2075, 2055,
       2059, 2062, 2069, 2065, 2072, 2080, 2085, 2061, 2045, 2049, 2030,
       2035, 2033, 2040, 2037, 2041, 2050, 2054, 2058, 2066, 2078, 2094,
       2079, 2073, 2071, 2052, 2086, 2087, 2051, 2048, 2047, 2043, 2082,
       2068, 2056, 2088, 2076, 2063, 2053, 2057, 2095, 2097, 2081, 2099,
       2096, 2044, 2092, 2083, 2084, 2089])

<IPython.core.display.Javascript object>

In [10]:
df_compiled_alt = copy.deepcopy(df_compiled)

<IPython.core.display.Javascript object>

In [11]:
cols_panel_a = ['Category']
for reg in df_compiled.region:
    for var, name in variable_name_maps:
        df_to_assess = (
            df_compiled
            .filter(
                region=reg,
                variable=var
            )
            .timeseries()
        )
        # First set the cumulative value between 2020 and net zero
        cum_2020_netzero = (
            df_to_assess.apply(
                lambda x: pyam.timeseries.cumulative(
                    x,
                    first_year=2020,
                    last_year=df_compiled.meta.loc[x.name[0:2], 'Year of netzero CO2 emissions (Harm-Infilled) table']
                ),
                axis=1
            )
            .round(2)
        )
        # Then set the cumulative value between net zero and 2100
        cum_netzero_2100 = (
            df_to_assess.apply(
                lambda x: pyam.timeseries.cumulative(
                    x,
                    first_year=df_compiled.meta.loc[x.name[0:2], 'Year of netzero CO2 emissions (Harm-Infilled) table'],
                    last_year=2100
                ),
                axis=1
            )
            .round(2)
        )
        # Now assign both of these to the metadata
        df_compiled_alt.set_meta(
            meta=cum_2020_netzero,
            name=f'{name}_{reg}_2020-netzero'
        )
        df_compiled_alt.set_meta(
            meta=cum_netzero_2100,
            name=f'{name}_{reg}_netzero-2100'
        )
        cols_panel_a.append(f'{name}_{reg}_2020-netzero')
        cols_panel_a.append(f'{name}_{reg}_netzero-2100')

<IPython.core.display.Javascript object>

In [12]:
plot_data_panel_a = df_compiled_alt.meta[cols_panel_a]

<IPython.core.display.Javascript object>

Now, we want to add the two gross components to get the total gross emissions.

After this, we want to find the proportion of CO2 versus non-CO2 emissions.

In [13]:
for reg in df_compiled.region:
    for year in ['2020-netzero', 'netzero-2100']:
        plot_data_panel_a.loc[:, f'total-gross_{reg}_{year}'] = (
            plot_data_panel_a.loc[:, f'gross_{reg}_{year}']
            + 
            plot_data_panel_a.loc[:, f'nonco2_{reg}_{year}']
        )
        plot_data_panel_a.loc[:, f'share-grossco2_{reg}_{year}'] = (
            plot_data_panel_a.loc[:, f'gross_{reg}_{year}']
            / 
            plot_data_panel_a.loc[:, f'total-gross_{reg}_{year}']
        )
        plot_data_panel_a.loc[:, f'share-nonco2_{reg}_{year}'] = (
            plot_data_panel_a.loc[:, f'nonco2_{reg}_{year}']
            / 
            plot_data_panel_a.loc[:, f'total-gross_{reg}_{year}']
        )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plot_data_panel_a.loc[:, f'total-gross_{reg}_{year}'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plot_data_panel_a.loc[:, f'share-grossco2_{reg}_{year}'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plot_data_panel_a.loc[:, f'share-nonco2_{reg}_{year}'] = (
A value is trying to be set

<IPython.core.display.Javascript object>

Step 3: Now, reformat the data so we have the variable, region, and timeframe in a long dataframe.

In [14]:
plot_data_panel_a = (
    plot_data_panel_a
    .set_index('Category', append=True)
)

<IPython.core.display.Javascript object>

In [15]:
panel_a_reshaped = pd.melt(
    plot_data_panel_a.reset_index(), 
    id_vars=['model', 'scenario', 'Category'], 
    var_name='agg_variable', 
    value_name='Value'
)

<IPython.core.display.Javascript object>

In [16]:
panel_a_reshaped['agg_variable'].str.split('_')[0]

['gross', 'R5ASIA', '2020-netzero']

<IPython.core.display.Javascript object>

In [17]:
panel_a_reshaped.loc[:, 'variable'] = (
    panel_a_reshaped['agg_variable']
    .apply(
        lambda x: x.split('_')[0]
    )
)

<IPython.core.display.Javascript object>

In [18]:
panel_a_reshaped.loc[:, 'region'] = (
    panel_a_reshaped['agg_variable']
    .apply(
        lambda x: x.split('_')[1]
    )
)

<IPython.core.display.Javascript object>

In [19]:
panel_a_reshaped.loc[:, 'timeframe'] = (
    panel_a_reshaped['agg_variable']
    .apply(
        lambda x: x.split('_')[2]
    )
)

<IPython.core.display.Javascript object>

In [20]:
panel_a_reshaped.drop(
    columns='agg_variable',
    inplace=True
)

<IPython.core.display.Javascript object>

In [21]:
(
    panel_a_reshaped
    .groupby(
        ['Category', 'region', 'variable', 'timeframe']
    )
    .describe()
    .round(2)
    .loc[
        :, pd.IndexSlice[:, ['count', '25%', '50%', '75%']]
    ]
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Value,Value,Value,Value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,25%,50%,75%
Category,region,variable,timeframe,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
C1,R5ASIA,cdr,2020-netzero,70.0,-100.33,-66.16,-52.99
C1,R5ASIA,cdr,netzero-2100,70.0,-194.53,-141.68,-123.41
C1,R5ASIA,gross,2020-netzero,70.0,233.99,276.80,311.16
C1,R5ASIA,gross,netzero-2100,70.0,84.14,121.16,190.51
C1,R5ASIA,nonco2,2020-netzero,68.0,102.67,119.64,139.50
...,...,...,...,...,...,...,...
C3,World,share-grossco2,netzero-2100,229.0,0.49,0.59,0.65
C3,World,share-nonco2,2020-netzero,229.0,0.27,0.30,0.33
C3,World,share-nonco2,netzero-2100,229.0,0.35,0.41,0.51
C3,World,total-gross,2020-netzero,229.0,1506.77,1674.83,1860.66


<IPython.core.display.Javascript object>

In [22]:
panel_a_reshaped.to_csv(
    Path(
        '../data/203_regional_data_panel_a.csv'
    )
)

<IPython.core.display.Javascript object>

Step 4: We want to identify the regional patterns in contributions to the regional net GHG reductions:
* 2020 - 2030
* 2030 - 2050
* 2050 - 2100

In [23]:
time_periods = [
    (2020, 2030),
    (2030, 2050),
    (2050, 2100)
]

<IPython.core.display.Javascript object>

In [24]:
def format_output_data_frame(df, cols_select,  variable_name, summarise=True):
    """Function to help format the output dataframe"""
    cols_to_melt = set(cols_select) - set('Category')
    melted_data = pd.melt(
        frame=df,
        id_vars=['model', 'scenario', 'Category'],
        value_vars=cols_to_melt
    )
    #Now, we split the variable column
    melted_data.loc[:, 'region'] = (
        melted_data.loc[:,'variable'].apply(
            lambda x: x.split('_')[1]
        )
    )
    melted_data.loc[:, 'timeframe'] = (
        melted_data.loc[:,'variable'].apply(
            lambda x: x.split('_')[2] + '-' + x.split('_')[3]
        )
    )
    melted_data.loc[:, 'variable'] = variable_name
    if summarise:
        summarised_data_raw = (
            melted_data
            .groupby(['Category', 'region', 'timeframe'])['value']
            .describe()
            .loc[:, ['25%', '50%', '75%']]
        )
        summarised_data_raw.loc[:, 'summary'] = (
            summarised_data_raw.apply(
                lambda x: f"{x['50%']}% ({x['25%']}-{x['75%']})",
                axis=1
            )
            
        )
        summarised_data_clean = summarised_data_raw.loc[:, 'summary'].unstack(level=-1)
        summarised_data_clean.loc[:, 'variable'] = variable_name
    return melted_data, summarised_data_clean

<IPython.core.display.Javascript object>

Step 5: Now, for each region, we want to take a closer look at the role of gross reductions, non CO2 and CDR.

In [25]:
def crunch_through_and_summarise_per_region(df, region):
    """Helper function to crunch through the regional summaries"""
    total_variable = 'AR6 Reanalysis|OSCARv3.2|Emissions|Kyoto Gases|Direct Only'
    component_variables = [
        ('AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross', 'gross'),
        ('AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only', 'nonco2'),
        ('AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total', 'cdr')    
    ]
    # Step 1: Pull out the regional data
    regional_data = (
        df
        .filter(
            region=region
        )
    )
    # Step 2
    for start, end in time_periods:
        # First calculate the total mitigation
        df_net = (
            regional_data
            .filter(year=range(start, end+1))
            .offset(year=start)
        )
        # Calculate the total mitigation burden
        regional_data.set_meta(
            meta=(
                df_net
                .filter(variable=total_variable, year=end)
                .timeseries()
                .droplevel(['region', 'variable', 'unit'])[end]
            ),
            name=f'total_mitigation_{start}_{end}'
        )
        # Per variable calculate the contributions to these reductions
        for variable, label in component_variables:
            ts = (
                df_net
                .filter(variable=variable)
                .timeseries()
                .apply(
                    lambda x: x * 100 / regional_data.meta.loc[x.name[0:2], f'total_mitigation_{start}_{end}'],
                    axis=1
                )
                .round(0)
            )
            regional_data.set_meta(
                meta=ts.droplevel(['region', 'variable', 'unit'])[end],
                name=f'{label}_{region}_{start}_{end}'
            )
    # Step 3
    cols = [x[1] for x in component_variables]
    df = []
    df_sum = []
    for var, label in component_variables:
        cols = [x for x in regional_data.meta if f'{label}_{region}' in x]
        cols.append('Category')
        out, out_sum = (
            format_output_data_frame(
                regional_data.meta.reset_index(),
                cols_select=cols,
                variable_name=var
            )
        )
        df.append(out)
        df_sum.append(out_sum)
    
    compiled = pd.concat(df)
    summary = (
        pd.concat(df_sum)
        .set_index('variable', append=True)
        .sort_index(level=0)
    )
    return compiled, summary

<IPython.core.display.Javascript object>

In [26]:
regs = [x for x in df_compiled.region if 'World' not in x]

<IPython.core.display.Javascript object>

In [27]:
regs

['R5ASIA', 'R5LAM', 'R5MAF', 'R5OECD90+EU', 'R5REF']

<IPython.core.display.Javascript object>

In [28]:
final_reg_summary = []
final_reg_comp = []
for r in regs:
    comp, sum = crunch_through_and_summarise_per_region(
        df_compiled,
        r
    )
    final_reg_summary.append(sum)
    final_reg_comp.append(comp)

<IPython.core.display.Javascript object>

In [29]:
final_reg_summary_pd = pd.concat(final_reg_summary)

<IPython.core.display.Javascript object>

In [30]:
final_reg_summary_pd.to_excel(
    Path(
        'figures/SI_table2.xlsx'
    )
)

<IPython.core.display.Javascript object>

Statements: Patterns for the R5ASIA region.

For the Asia region, we note a dominant contribution (median of around 80%) from gross CO2 emission reductions across all three time periods in the C1 pathways with the balance largely coming from non-CO2 reductions followed by CDR. However, in C2 and C3 pathways where there is slower global near-term action, CDR plays a larger role beyond 2050, accounting for over 20% of the regional net GHG reductions.

In [31]:
final_reg_summary_pd.loc[
    pd.IndexSlice['C1', 'R5ASIA', 'AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross'],
    :
]

timeframe
2020-2030    78.0% (66.75-82.0)
2030-2050    79.0% (71.0-85.25)
2050-2100    80.0% (60.0-123.5)
Name: (C1, R5ASIA, AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross), dtype: object

<IPython.core.display.Javascript object>

In [32]:
final_reg_summary_pd.loc[
    pd.IndexSlice[:, 'R5ASIA', 'AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total'],
    :
]

Unnamed: 0_level_0,Unnamed: 1_level_0,timeframe,2020-2030,2030-2050,2050-2100
Category,region,variable,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C1,R5ASIA,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,7.0% (3.0-17.0),12.5% (4.0-20.0),-5.5% (-30.0-18.5)
C2,R5ASIA,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,4.0% (2.0-11.0),9.0% (4.0-18.25),27.0% (10.75-32.0)
C3,R5ASIA,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,4.0% (-0.0-15.0),7.0% (4.0-12.0),21.0% (11.0-42.0)


<IPython.core.display.Javascript object>

Statement: Patterns for OECD+EU

In [33]:
final_reg_summary_pd.loc[
    pd.IndexSlice[:, 'R5OECD90+EU', :],
    :
].sort_index(level=-1)

Unnamed: 0_level_0,Unnamed: 1_level_0,timeframe,2020-2030,2030-2050,2050-2100
Category,region,variable,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C1,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,3.0% (2.0-6.0),21.5% (13.0-33.0),42.5% (-28.25-73.75)
C2,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,2.0% (-0.0-4.0),15.0% (8.75-25.5),48.0% (34.0-66.0)
C3,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total,2.0% (1.0-4.0),11.0% (7.0-24.5),35.0% (24.0-66.0)
C1,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross,76.5% (73.0-82.0),71.0% (62.75-77.0),52.0% (22.0-105.0)
C2,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross,78.0% (75.0-84.0),73.0% (67.0-79.0),47.0% (30.75-60.0)
C3,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|CO2|Gross,74.0% (68.0-78.0),74.0% (66.0-80.0),58.0% (29.0-69.0)
C1,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only,17.5% (14.0-23.0),7.0% (5.0-14.0),8.0% (2.0-19.25)
C2,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only,18.0% (15.0-21.0),11.0% (9.0-14.0),6.0% (2.0-8.0)
C3,R5OECD90+EU,AR6 Reanalysis|OSCARv3.2|Emissions|NonCO2|Direct Only,23.0% (19.0-28.5),10.0% (7.0-15.0),5.0% (2.0-8.0)


<IPython.core.display.Javascript object>

Statement: Patterns for Latin America

In [34]:
final_reg_summary_pd.loc[
    pd.IndexSlice['C1', 'R5LAM', 'AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total'],
    :
]

timeframe
2020-2030      18.0% (14.0-28.5)
2030-2050      32.0% (22.0-42.0)
2050-2100    79.5% (48.75-136.0)
Name: (C1, R5LAM, AR6 Reanalysis|OSCARv3.2|Carbon Removal|Total), dtype: object

<IPython.core.display.Javascript object>