# 402 First guess CDR pathways

In this notebook, we compile a dataset of the first guess CDR pathways based purely on the assessed metric eTCREdown.

In [1]:
import pyam
import pandas as pd

from pathlib import Path
import sys
import concurrent.futures
from tqdm.notebook import tqdm

sys.path.append('../scripts')

from cdr import construct_new_cdr_pathway

%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

Step 1: Read in the dataset of novel CDR estimates.

In [2]:
novel_cdr_compiled = pyam.IamDataFrame(
    Path(
        '../data/100_novel_cdr.csv'
    )
)

pyam - INFO: Running in a notebook, setting up a basic logging at level INFO
pyam.core - INFO: Reading file ../data/100_novel_cdr.csv


Step 2: Read in the metrics dataset.

In [3]:
metrics_first_guess = pd.read_csv(
    Path(
        '../data/401_lookup.csv'
    ),
    index_col=[0,1,2]
)

In [4]:
missing_scenarios = metrics_first_guess.reset_index(level=-1).index.difference(novel_cdr_compiled.meta.index)

In [5]:
metrics_first_guess = metrics_first_guess.drop(missing_scenarios)

Step 3: Construct the new CDR pathways (first guess)

In [6]:
dfs = []

def process_cdr_pathway(model, scenario, ensemble_member):
    try:
        return construct_new_cdr_pathway(novel_cdr_compiled, metrics_first_guess, model, scenario, ensemble_member)
    except:
        print(model, scenario)

with concurrent.futures.ProcessPoolExecutor() as executor:
    futures = [executor.submit(process_cdr_pathway, model, scenario, ensemble_member) for model, scenario, ensemble_member in metrics_first_guess.index]

    for i, future in tqdm(
        enumerate(concurrent.futures.as_completed(futures)), total=len(futures)
    ):
    #for future in tqdm(concurrent.futures.as_completed(futures)):
        dfs.append(future.result())

executor.shutdown()

  0%|          | 0/57000 [00:00<?, ?it/s]

In [7]:
df_compiled = pyam.concat(dfs)

In [8]:
df_compiled.to_csv(
    Path(
        '../data/402_first_guess.csv'
    )
)