In [1]:
import pyam
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

<IPython.core.display.Javascript object>

In [2]:
%load_ext nb_black

<IPython.core.display.Javascript object>

Step 1: Read in the compiled CDR dataset.

In [3]:
df = pyam.IamDataFrame(
    Path(
        '../data/101_data.xlsx'
    )
)

pyam - INFO: Running in a notebook, setting up a basic logging at level INFO
pyam.core - INFO: Reading file ../data/101_data.xlsx
pyam.core - INFO: Reading meta indicators


<IPython.core.display.Javascript object>

Step 2: Read in the filter sets for land and pe bio that we prepared in the previous notebook.

In [4]:
filter_set_land = pd.read_csv(
    Path(
        '../data/103_filter_set_land.csv'
    ),
    index_col=[0,1]
)

<IPython.core.display.Javascript object>

In [5]:
filter_set_bio = pd.read_csv(
    Path(
        '../data/103_filter_set_bio.csv'
    ),
    index_col=[0,1]
)

<IPython.core.display.Javascript object>

Step 3: Concatenate the two so that we can assign this to the global CDR dataset.

In [6]:
filter_all = pd.concat(
    [
        filter_set_land,
        filter_set_bio
    ],
    axis=1
)

<IPython.core.display.Javascript object>

In [7]:
filter_all_overlap = (
    filter_all
    .loc[
        df.meta.index.intersection(filter_all.index)
    ]
)

<IPython.core.display.Javascript object>

Step 4: Create a new metadata column and assign the two sets of netzero values.

In [8]:
filter_all_overlap.columns

Index(['exclude', 'land_cover_max', 'land_cover_netzero', 'land_sustainable',
       'exclude', 'pe_bio_max', 'pe_bio_netzero', 'bio_sustainable'],
      dtype='object')

<IPython.core.display.Javascript object>

In [9]:
cols = ['land_cover_netzero', 'pe_bio_netzero']

<IPython.core.display.Javascript object>

In [10]:
for c in cols:
    df.set_meta(
        meta=filter_all_overlap[c],
        name=c
    )

<IPython.core.display.Javascript object>

Step 5: For each world region, we now want to calculate the cumulative land and novel CDR over two time periods:
* 2020 to global net zero CO2
* Global net zero CO2 to 2100

In [11]:
netzero_year = df.meta.loc[
    :,
    'Year of netzero CO2 emissions (Harm-Infilled) table'
]

<IPython.core.display.Javascript object>

In [12]:
netzero_year.unique()

array([2090, 2077, 2064, 2046, 2100, 2067, 2060, 2070, 2074, 2075, 2055,
       2059, 2062, 2069, 2065, 2072, 2080, 2085, 2061, 2045, 2049, 2030,
       2035, 2033, 2040, 2037, 2041, 2050, 2054, 2058, 2066, 2078, 2094,
       2079, 2073, 2071, 2052, 2086, 2087, 2051, 2048, 2047, 2043, 2082,
       2068, 2056, 2088, 2076, 2063, 2053, 2057, 2095, 2097, 2081, 2099,
       2096, 2044, 2092, 2083, 2084, 2089])

<IPython.core.display.Javascript object>

In [13]:
novel_cdr = 'AR6 Reanalysis|OSCARv3.2|Carbon Removal|Non-Land'
conventional_cdr = 'AR6 Reanalysis|OSCARv3.2|Carbon Removal|Land'

<IPython.core.display.Javascript object>

In [14]:
for reg in df.region:
    if reg == 'World':
        continue
    for var in [novel_cdr, conventional_cdr]:
        if var == novel_cdr:
            meta_prefix = 'novel_cdr'
        else:
            meta_prefix = 'conventional_cdr'
        ts_data = (
            df
            .filter(
                region=reg,
                variable=var
            )
            .timeseries()
        )
        # The cumulative estimate to net zero
        df.set_meta(
            meta=ts_data.apply(
               lambda x: pyam.cumulative(
                   x,
                   first_year=2020,
                   last_year=netzero_year[x.name[0:2]]
               ),
               axis=1
            ),
            name=f'{meta_prefix}_{reg}_2020_netzero'
        )
        # The cumulative estimate to 2100
        df.set_meta(
            meta=ts_data.apply(
                lambda x: pyam.cumulative(
                    x,
                    first_year=netzero_year[x.name[0:2]],
                    last_year=2100
                ),
                axis=1
            ),
            name=f'{meta_prefix}_{reg}_netzero_2100'
        )

<IPython.core.display.Javascript object>

Step 6: Now, we want a processing function that takes the following information:
* The column to categorise by
* Bins
* Bin labels

And then returns a melted dataframe with the categories that are ready to plot.

In [15]:
def process_data(
        col,
        bins,
        bin_labels
):
    """Helper function to process and bin data"""
    df.meta.loc[:, f'category_{col}'] = (
        pd.cut(
            df.meta.loc[:, col],
            bins=bins
        )
        .map(bin_labels)
    )
    # Step 6.1: Decide which columns need to be melted.
    cols_to_melt = [
        x for x in df.meta.columns if 'novel_cdr' in x or 'conventional_cdr' in x
    ]
    # Step 6.2: Melt the dataframe
    melted_data = (
        pd.melt(
            frame=df.meta.reset_index(),
            id_vars=['model', 'scenario', 'Category', f'category_{col}'],
            value_vars=cols_to_melt
        )
    )
    # Step 6.3: Split the variable column
    melted_data['variable'] = melted_data['variable'].apply(
        lambda x: x.replace('cdr', 'cdr_World') if 'R5' not in x else x
    )
    melted_data.loc[:, 'cdr_type'] = melted_data['variable'].apply(
        lambda x: x.split('_')[0] + '_' + x.split('_')[1]
    )
    melted_data.loc[:, 'region'] = melted_data['variable'].apply(
        lambda x: x.split('_')[2]
    )
    melted_data.loc[:, 'timeframe'] = melted_data['variable'].apply(
        lambda x: x.split('_')[3] + '_' + x.split('_')[4]
    )
    return melted_data

<IPython.core.display.Javascript object>

Step 7: Let us start with the land area filters.

In [16]:
bins_for_land = (
    pd
    .IntervalIndex
    .from_tuples(
        [
            (0, 100),
            (100, 400),
            (400, 2000)
        ]
    )
)
land_labels = dict(
    zip(
        bins_for_land,
        [
            '0-100',
            '100-400',
            '400+'
        ]
    )
)
land_estimates = process_data(
    col='land_cover_netzero',
    bins=bins_for_land,
    bin_labels=land_labels
)

<IPython.core.display.Javascript object>

Step 8: Now let us move on to the bioenergy demand filters.

In [17]:
bins_for_pe = (
    pd
    .IntervalIndex
    .from_tuples(
        [
            (0, 100),
            (100, 200),
            (200, 1000)
        ]
    )
)
pe_labels = dict(
    zip(
        bins_for_pe,
        [
            '0-100',
            '100-200',
            '200+'
        ]
    )
)
bio_estimates = process_data(
    col='pe_bio_netzero',
    bins=bins_for_pe,
    bin_labels=pe_labels
)

<IPython.core.display.Javascript object>

Step 9: Write this out for further processing.

In [18]:
land_estimates.to_csv(
    Path(
        '../data/105_land_categories.csv'
    )
)

<IPython.core.display.Javascript object>

In [19]:
bio_estimates.to_csv(
    Path(
        '../data/105_bio_categories.csv'
    )
)

<IPython.core.display.Javascript object>

Step 10L Calculate the global cumulative removals (total) and write this out separately for assessment.

In [20]:
total_removals = (
    df
    .filter(
        variable='*Carbon Removal|Total',
        region='World'
    )
    .timeseries()
)

<IPython.core.display.Javascript object>

In [21]:
df.set_meta(
    total_removals.apply(
        lambda x: pyam.cumulative(
            x,
            first_year=2020,
            last_year=df.meta.loc[x.name[0:2],  'Year of netzero CO2 emissions (Harm-Infilled) table']
        ),
        axis=1
    ),
    name='cumulative_removals_2020_netzero'
)

<IPython.core.display.Javascript object>

In [22]:
df_world_analysis = df.meta[
    [
        'Category',
        'cumulative_removals_2020_netzero',
        'category_pe_bio_netzero',
        'category_land_cover_netzero'
    ]
]

<IPython.core.display.Javascript object>

In [23]:
df_world_analysis.to_csv(
    Path(
        '../data/105_world.csv'
    )
)

<IPython.core.display.Javascript object>