In [None]:
import pandas as pd
import ixmp4
import pyam
import nomenclature

In [None]:
iamc_args = dict(
    model="State of CDR (2024) - Research and Development",
    scenario="Observed Data",
)

In [None]:
df_publications = pyam.IamDataFrame(
    (
        pd.read_excel("source/2_Research_Development/SoCDRv2_Scientific_Publications.xlsx")
        .drop(columns=["Unnamed: 0", "description"])
    ),
    **iamc_args,
).rename(region={"USA + CA": "North America"})

df_publications.rename(
    variable=dict(
        [(i, i.replace("Research|", "Research|Removal|")) for i in df_publications.variable]
    ), 
    inplace=True,
)

In [None]:
groupby = df_publications.timeseries().groupby(
    ["model", "scenario", "region"]
)

for i, _data in groupby:
    failing = _data.sum().between(0.1, 0.99)
    if any(failing):
        print(i)
        print(_data.columns[failing])

In [None]:
data = (
    pd.read_excel("source/2_Research_Development/chapter-2-template_BP_v2.xlsx", sheet_name="variable")
    .drop(columns="description")
)

df_patents = pyam.IamDataFrame(
    data,
    **iamc_args,
).filter(variable="Research|Patents*")

df_patents.rename(
    variable=dict(
        [(i, (
            i.replace("Research|Patents|Carbon Dioxide Removal", "Research|Removal|Patents")
            .replace("Research|Patents|Climate Change Mitigation", "Research|Mitigation|Patents")        
        )) for i in df_patents.variable]
    ), 
    inplace=True,
)

In [None]:
data = (
    pd.read_excel("source/2_Research_Development/chapter-2_grants_formatted+funding_corrected2.xlsx")
    .drop(columns="description")
)

df_grants_funding = pyam.IamDataFrame(
    data,
    **iamc_args,
)

df_grants_funding.rename(
    variable=dict(
        [(i, i.replace("Research|", "Research|Removal|")) for i in df_grants_funding.variable]
    ), 
    inplace=True,
)

# Merge data

In [None]:
df = pyam.concat([df_patents, df_publications, df_grants_funding])

In [None]:
mapping = dict(
    [(i, i.replace("[share per year]", " [Share]")) for i in df.variable]
)
df.rename(variable=mapping, inplace=True)

In [None]:
mapping = dict(
    [(i, i.replace("[share per year and region]", " [Share]")) for i in df.variable]
)
df.rename(variable=mapping, inplace=True)

In [None]:
mapping = dict(
    [(i, (
        i.replace("AR", "Afforestation and Reforestation")
        .replace("BECCS", "Bioenergy with CCS")
        .replace("DAC(CS)", "Direct Air Capture with CCS")
        .replace("Restoration of Landscapes and Peats", "Peatland and Wetland Restoration")
        .replace("Ocean alkalinity enhancement", "Ocean Alkalinity Enhancement")
        .replace("Forest Management", "Improved Forest Management")
        .replace("Ocean alkalinity enhancement", "Ocean Alkalinity Enhancement")
        .replace("Ocean fertilization & Artificial upwelling", "Ocean Fertilization")
        .replace("Restoration of landscapes/peats", "Restoration of Landscapes and Peats")
        .replace("Enhanced Weathering (land based)", "Enhanced Weathering")
        .replace(" Literature on CDR/NET", "")
        .replace("General CDR", "General")
    )) for i in df.variable]
)
df.rename(variable=mapping, inplace=True)

In [None]:
mapping = dict(
    [
        (i, i.replace("Restoration of Landscapes and Peats", "Peatland and Wetland Restoration"))
        for i in df.variable]
)
df.rename(variable=mapping, inplace=True)

In [None]:
dsd = nomenclature.DataStructureDefinition("../definitions/")

In [None]:
dsd.validate(df)

In [None]:
shares_df = df.filter(variable="* [Share]")
shares_df._data = shares_df._data * 100

df = pyam.concat([df.filter(variable="* [Share]", keep=False), shares_df])

In [None]:
groupby = df.filter(variable="* [Share]").timeseries().groupby(
    ["model", "scenario", "region"]
)

for i, _data in groupby:
    failing = _data.sum().between(0.1, 95)
    if any(failing):
        print(i)
        print(_data.columns[failing])

In [None]:
df_active_share = df.filter(variable="*Active Grants*", region="World")

In [None]:
df_active_share.rename(variable={"Research|Removal|Active Grants": "foo"}, inplace=True)

In [None]:
df_active_share.aggregate("Research|Removal|Active Grants", append=True)

In [None]:
df_list = []

for v in df_active_share.filter(variable="Research|Removal|Active Grants|*").variable:
    _df = df_active_share.divide(v, "Research|Removal|Active Grants", f"{v} [Share]", ignore_units="%")
    _df._data = _df._data * 100
    df_list.append(_df)

df.append(pyam.concat(df_list), inplace=True)

In [None]:
dsd.validate(df)

In [None]:
df.append(
    df.filter(region="World", keep=False).aggregate("Research|Removal|Publications"),
    inplace=True,
)

In [None]:
#df.to_ixmp4("socdr-dev")

In [None]:
df.to_excel("socdr-chapter-2-data_v4.xlsx")

In [None]:
import ixmp4

In [None]:
platform = ixmp4.Platform("socdr-dev")

In [None]:
run = platform.runs.get(**iamc_args)

In [None]:
run.iamc.remove(run.iamc.tabulate())

In [None]:
run.iamc.add(df.data)

In [None]:
run.meta["Chapter"] = "Chapter 2"