# 501 Prepare data for global assessment

In this notebook, we prepare the data necessary for our global assessment.

In [7]:
import pyam
import pandas as pd
import numpy as np
import scmdata
from pathlib import Path
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

Step 1: Read in the necessary data.

In [8]:
cdr_original = pyam.IamDataFrame(
    Path(
        '../data/100_novel_cdr.csv'
    )
)

pyam.core - INFO: Reading file ../data/100_novel_cdr.csv


In [9]:
cdr_climate = pyam.IamDataFrame(
    Path(
        '../data/405_cdr_170324.csv'
    )
)

pyam.core - INFO: Reading file ../data/405_cdr_170324.csv


In [10]:
metrics = pd.read_csv(
    Path('../data/401_lookup.csv'),
    index_col=[0,1]
)

Step 2: Now, per ensemble member, we want to calculate the additional cumulative CDR between net zero CO2 and 2100.

In [11]:
additional_cdr_climate = []

In [12]:
cdr_original.interpolate(
    time=range(2015, 2101),
    inplace=True
)

In [13]:
for (model, scenario) in tqdm(cdr_original.meta.index):
    # Filter for original CDR
    cdr_orig = (
        cdr_original
        .filter(
            model=model,
            scenario=scenario
        )
        .timeseries()
    )
    # Filter for adapted CDR
    cdr_alt = (
        cdr_climate
        .filter(
            model=model,
            scenario=scenario,
        )
        .timeseries()
    )
    # Subtract original from adapted
    cdr_diff = pyam.IamDataFrame(cdr_alt - cdr_orig)
    additional_cdr_climate.append(
        pyam.IamDataFrame(cdr_diff)
    )

  0%|          | 0/95 [00:00<?, ?it/s]

In [14]:
additional_cdr_climate = pyam.concat(additional_cdr_climate)

In [15]:
additional_cdr_climate.rename(
    variable={
        'Carbon Dioxide Removal|Novel':'Carbon Dioxide Removal|Novel|Additional'
    },
    inplace=True
)

Step 3: Convert all the necessary units to Gt CO2

In [16]:
additional_cdr_climate.convert_unit(
    current='Mt CO2/yr',
    to='Gt CO2/yr',
    inplace=True
)

In [17]:
cdr_original.convert_unit(
    current='Mt CO2/yr',
    to='Gt CO2/yr',
    inplace=True
)

Step 4: Now, we want to calculate the cumulative additional CDR from net zero CO2 to 2100. We can just sum up across all values for each timeseries entry, since we have already calculated the difference from the original model trajectory.

In [18]:
cumulative_cdr_additional = (
    additional_cdr_climate
    .timeseries()
    .apply(
        lambda x: format(pyam.timeseries.cumulative(
            x,
            first_year=2020,
            last_year=2100
        ),'f'),
        axis=1
    )
)

Step 5: Assign this as an additional column to the metrics dataframe. Then write this out for further analysis.

In [19]:
metrics = metrics.set_index('run_id', append=True)

In [20]:
for (mod, scen, run) in metrics.index:
    if mod == 'GEM-E3_V2021':
        continue
    metrics.loc[(mod, scen, run), 'additional_cdr_gtco2_final'] = (
        cumulative_cdr_additional
        .loc[pd.IndexSlice[mod, scen, :, :, :, :, run]]
        .values[0]
    )

In [21]:
metrics['additional_cdr_gtco2_final'] = metrics['additional_cdr_gtco2_final'].astype('float').round(2)

In [22]:
metrics.to_excel(
    Path(
        '../data/501_metrics_with_final_cdr.xlsx'
    ),
    merge_cells=False
)

In [23]:
metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,drawdown_co2,ZEC,change_non_co2,2015_warming,2100_warming,peak_warming,year_peak_warming,cum_emissions_to_peak_GtCO2,cum_emissions_after_peak_GtCO2,eTCREup,eTCREdown,cooling_to_1p5,eTCREdown_first_guess,additional_cdr_gtco2_first_guess,netzero|CO2,additional_cdr_gtco2_final
model,scenario,run_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AIM/CGE 2.1,CD-LINKS_NPi2020_400,0,0.153213,-0.086132,0.024094,1.107815,1.391478,1.614685,2036,656.867099,-284.181724,0.000772,-0.000539,0.000000,-0.000539,-0.0,2046,0.00
AIM/CGE 2.1,CD-LINKS_NPi2020_400,1,-0.025027,-0.016988,0.054163,1.393747,2.494802,2.498626,2036,656.867099,-284.181724,0.001682,0.000088,-0.994802,-0.001682,591.0,2046,1203.96
AIM/CGE 2.1,CD-LINKS_NPi2020_400,2,0.164106,-0.065352,-0.009230,1.239155,1.542584,1.788063,2036,656.867099,-284.181724,0.000836,-0.000577,-0.042584,-0.000577,74.0,2046,73.90
AIM/CGE 2.1,CD-LINKS_NPi2020_400,3,0.122711,-0.119902,-0.081853,0.983549,1.031367,1.357680,2028,505.812623,-119.595330,0.000740,-0.001026,0.000000,-0.001026,-0.0,2046,0.00
AIM/CGE 2.1,CD-LINKS_NPi2020_400,4,0.143798,-0.097182,0.035314,1.194385,1.483014,1.695355,2037,668.530771,-297.241602,0.000749,-0.000484,0.000000,-0.000484,-0.0,2046,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_400,595,0.205725,-0.155350,-0.189144,1.088553,0.818536,1.368591,2060,667.382684,-398.041085,0.000420,-0.000517,0.000000,-0.000517,-0.0,2055,0.00
WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_400,596,0.245548,-0.010975,-0.053872,1.295575,1.659452,1.918303,2069,630.549343,-364.192470,0.000988,-0.000674,-0.159452,-0.000674,236.0,2055,238.62
WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_400,597,0.143254,-0.004426,-0.003011,1.224026,1.486971,1.641801,2069,630.549343,-364.192470,0.000663,-0.000393,0.000000,-0.000393,-0.0,2055,0.00
WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_400,598,0.167401,-0.042595,-0.142359,1.045557,0.993811,1.350887,2069,630.549343,-364.192470,0.000484,-0.000460,0.000000,-0.000460,-0.0,2055,0.00


# Checks!
Here, we do some checks to ensure we're getting sensible outcomes.
1. Make sure that the additional CDR is zero in the time step before the year of net zero CO2.

In [26]:
additional_cdr_climate_ts = (
    additional_cdr_climate
    .timeseries()
)

In [29]:
for (mod, scen) in tqdm(additional_cdr_climate.meta.index):
    for run_id in range(600):
        nz_year = metrics.loc[(mod, scen, run_id), 'netzero|CO2']
        value = (
            additional_cdr_climate
            .filter(
                model=mod,
                scenario=scen,
                year=nz_year - 1,
                ensemble_member=run_id
            )
            .timeseries()
            .values[0]
        )
        assert int(value) == 0

  0%|          | 0/95 [00:00<?, ?it/s]

2. Now, let us sum up the numbers and calculate the 90th percentile for each model-scenario combination

In [39]:
additional_cdr_climate_ts.sum(axis=1).groupby(['model', 'scenario']).quantile(q=0.9).describe().astype('int')

count      95
mean      531
std       286
min         0
25%       268
50%       568
75%       762
max      1127
dtype: int64

Ok, need to change this in the text!

In [42]:
additional_cdr_climate_ts.sum(axis=1).groupby(['model', 'scenario']).quantile(q=0.9).astype(int).head(30)

model                  scenario                
AIM/CGE 2.1            CD-LINKS_NPi2020_400        408
AIM/CGE 2.2            EN_NPi2020_300f             456
                       EN_NPi2020_600              857
AIM/Hub-Global 2.0     1.5C                        395
C-ROADS-5.005          Ratchet-1.5-limCDR-noOS     220
                       Ratchet-1.5-noCDR           684
                       Ratchet-1.5-noCDR-noOS      460
COFFEE 1.1             EN_NPi2020_400              963
GCAM 4.2               SSP1-19                     604
GCAM 5.3               R_MAC_30_n0                 752
                       R_MAC_35_n8                 188
                       R_MAC_40_n8                 298
                       R_MAC_45_n8                 462
                       R_MAC_50_n8                 667
IMAGE 3.2              SSP1_SPA1_19I_D_LB          462
                       SSP1_SPA1_19I_LIRE_LB       335
                       SSP1_SPA1_19I_RE_LB         520
                 