In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt 
import matplotlib.patches as mpatches
import matplotlib as mpl

### Colors and groups

In [None]:
mrv_topic_colors = {
    'Quantification': '#7da7b8',   # darker bluish-grey
    'Monitoring': '#8dc39f',        # stronger soft green
    'Quality': '#c76a85',           # berry red
    'Governance': '#b39bc8',        # stronger lavender
    'Reporting': '#e1a6b8',         # stronger soft pink
    'Verification': '#a8a8a8',      # medium grey
    'External impacts': '#d0c97b',   # stronger soft yellow
    'General MRV': '#e5b138',      # muted yellow
    
}

# Define the custom color scheme for each CDR group
cdr_group_colors = {
    "SCS": '#C1A28A' ,            # brown for SCS
    "Forestry": '#8fbc8f',         # green for Forestry
    "Terrestrial CDR": '#df65b0',  # green for Terrestrial CDR
 
    "Marine CDR": '#7eaed3',    # bluew for Marine CDR
    "CCS": '#ff7f00',            # orange for CCS
    "General CDR/Other": '#e5e5e5', # grey for General/Other
}


In [None]:
cdr_methods = [
 'SCS',
 'A/R',
 'General forestry',
 'Agroforestry',
 'Forest management',
 'Biochar',
 'Restoration of landscapes and peats',
 'EW',
 'OAE',
 'OIF/AU',
 'DOC',
 'Algae ',
 'Blue carbon',
 'CCS',
 'BECCS',
 'DACCS',
 'CCUS',
 'General CDR',
 'Other'
] 

cdr_group_mapping = {
     # Marine CDR group
    "OIF/AU":"Marine CDR",
    "Blue carbon": "Marine CDR",
    "OAE": "Marine CDR",
    "DOC": "Marine CDR",
    "Algae ": "Marine CDR",
    
    # Terrestrial CDR group
    "EW": "Terrestrial CDR",
    "Biochar": "Terrestrial CDR",
    "Restoration of landscapes and peats": "Terrestrial CDR",
    
    # CCS group
    "DACCS": "CCS",
    "BECCS": "CCS",
    "CCUS": "CCS",
    "CCS": "CCS",
    
    # General CDR/Other group
    "General CDR": "General CDR/Other",
    "Other": "General CDR/Other",

    # SCS
    "SCS": "SCS",

    # Forestry
    "General forestry": "Forestry",
    "A/R": "Forestry",
    "Agroforestry": "Forestry",
    "Forest management": "Forestry",
}

### Read and clean data

In [None]:
df = pd.read_excel("MRVdata_2210.xlsx") # older version: MRVdata_0810 ?
df_filtered = df[df['Year'] != 2024]
df_included = df_filtered[df_filtered['Inclusion/exclusion'] == 'Inclusion'].copy()

### additonal data

def extract_CDR_method_focus(row):
    CDR_method = []
    CDR_focus = []
    
    for column in cdr_methods:  # previously defined list with columns that contain CDR methods
        if row[column] > 0:
            CDR_method.append(column)
            CDR_focus.append(row[column])
    
    return [CDR_method, CDR_focus]
# add new columns to the data frame
df_included['CDR_focus'] = df_included.apply(lambda row: extract_CDR_method_focus(row), axis=1)
df_included['CDR_method'] = df_included.CDR_focus.str[0]
df_included['CDR_focus'] = df_included.CDR_focus.str[1]


#add new column for MRV topic and study focus to the dataset
def extract_MRV_topic_focus(row):
    MRV_topic = []
    MRV_focus = []
    
    for column in mrv_topic_colors.keys():  # previously defined list with columns that contain MRV topics
        if row[column] > 0:
            MRV_topic.append(column)
            MRV_focus.append(row[column])
    
    return [MRV_topic, MRV_focus]
# add new columns to the data frame
df_included['MRV_focus'] = df_included.apply(lambda row: extract_MRV_topic_focus(row), axis=1)
df_included['MRV_topic'] = df_included.MRV_focus.str[0]
df_included['MRV_focus'] = df_included.MRV_focus.str[1]


# create a dictionary for the assignment of countries to continents
continent_data = pd.read_excel('continents.xlsx')
country_to_continent = dict(zip(continent_data['Country'], continent_data['Continent']))

# Function to determine the continent based on the country
def get_continent(study_location, continent_column): #part of the function, not column names of the df; will be defined later when function is applied
    if pd.isna(study_location):
        return "Not specified"
    if study_location == "Multiple countries":
        return continent_column  # uses the manually definied continents for the category "Multiple countries"
    return country_to_continent.get(study_location.strip(), "Not specified")  # Search for the country in the dictionary and return the continent or 'not specified'

# apply function to df_included
df_included['Study location_continent'] = df_included.apply(
    lambda row: get_continent(row['Study location'], row['Study location: Continent']),
    axis=1
)

### explode
# explode CDR method and focus
df_CDRexplode = df_included.explode(['CDR_method', 'CDR_focus'])
df_CDRexplode["CDR_group"] = df_CDRexplode.CDR_method.replace(cdr_group_mapping)

# explode MRV topic and focus
df_MRVexplode = df_included.explode(['MRV_topic', 'MRV_focus'])

# Set the order of continents to be shown on the x-axis
location_order = ['Africa', 'Asia', 'Europe', 'North America', 'South America', 'Oceania', 'USA', 'China', 'Global', 'Not specified']  

### Absolute numbers

In [None]:
# Convert the Study location_continent column to a categorical type with the specified order
df_included['Study location_continent'] = pd.Categorical(
    df_included['Study location_continent'],
    categories=location_order,
    ordered=True
)

In [None]:
loc_size = df_included.groupby("Study location_continent",as_index=False).size().rename(columns={"size":"total_size"})

In [None]:
fig, ax = plt.subplots()
width=0.6

ax.bar(loc_size["Study location_continent"], loc_size["total_size"], width, color="grey")
_=ax.set_xticks([i for i in range(len(xticks))], countries["Study location_continent"], rotation=50, ha="right")

### Shares

In [None]:
locs_cdr_total = df_CDRexplode.groupby(['Study location_continent'],as_index=False).size().rename(columns={"size":"total_size"}) 
loc_meth = df_CDRexplode.groupby(['Study location_continent', 'CDR_group'],as_index=False).size() 
for m in loc_meth["CDR_group"].unique():
    for l in locations:
        if loc_meth.loc[(loc_meth.CDR_group==m)&(loc_meth["Study location_continent"]==l)].shape[0]==0:
                one_row = pd.DataFrame({'CDR_group': [m], 'Study location_continent': [l], 'size': [0]})
                loc_meth = pd.concat([loc_meth, one_row])
loc_meth = loc_meth.merge(locs_cdr_total, on="Study location_continent")
loc_meth["rel"] = loc_meth["size"]/loc_meth["total_size"]

loc_meth["col"] = loc_meth.CDR_group.replace(cdr_group_colors)

loc_meth['Study location_continent'] = pd.Categorical(
    loc_meth['Study location_continent'],
    categories=location_order,
    ordered=True
)
loc_meth = loc_meth.sort_values("Study location_continent")

In [None]:
for c in cdr_groups[:]:
    print(c)

In [None]:
fig, ax = plt.subplots()
cdr_groups = list(cdr_group_colors.keys())

prev_values = np.zeros(len(loc_meth.loc[loc_meth["CDR_group"]==cdr_groups[0]]))
for i, f in enumerate(cdr_groups):
    #print(topic)
    ax.bar(location_order, loc_meth.loc[loc_meth["CDR_group"]==f,"rel"], 
              width, bottom=prev_values, 
              label=f,
              color=loc_meth.loc[loc_meth["CDR_group"]==f,"col"])
    prev_values += np.array(loc_meth.loc[loc_meth["CDR_group"]==f,"rel"])

legend_group = []
for c in reversed(cdr_groups):
    legend_group.append(mpatches.Patch(color=cdr_group_colors[c], label=c))    
leg_gr = plt.legend(handles=legend_group, bbox_to_anchor=(1.,1), fontsize=10, frameon=False,ncol=1)
    
ax.set_ylabel('Share of CDR method')
#_=ax.set_yticks([i for i in range(len(cdr_method_colors.keys()))],[i for i in cdr_method_colors.keys()])

In [None]:
locs_mrv_total = df_MRVexplode.groupby(['Study location_continent'],as_index=False).size().rename(columns={"size":"total_size"}) 
loc_mrv = df_MRVexplode.groupby(['Study location_continent', 'MRV_topic'],as_index=False).size() 
for m in loc_mrv["MRV_topic"].unique():
    for l in locations:
        if loc_mrv.loc[(loc_mrv.MRV_topic==m)&(loc_mrv["Study location_continent"]==l)].shape[0]==0:
                one_row = pd.DataFrame({'MRV_topic': [m], 'Study location_continent': [l], 'size': [0]})
                loc_mrv = pd.concat([loc_mrv, one_row])
loc_mrv = loc_mrv.merge(locs_mrv_total, on="Study location_continent")
loc_mrv["rel"] = loc_mrv["size"]/loc_mrv["total_size"]

loc_mrv["col"] = loc_mrv.MRV_topic.replace(mrv_topic_colors)

loc_mrv['Study location_continent'] = pd.Categorical(
    loc_mrv['Study location_continent'],
    categories=location_order,
    ordered=True
)
loc_mrv = loc_mrv.sort_values("Study location_continent")

In [None]:
fig, ax = plt.subplots()
mrv_topics = list(mrv_topic_colors.keys())

prev_values = np.zeros(len(loc_mrv.loc[loc_mrv["MRV_topic"]==mrv_topics[0]]))
for i, f in enumerate(mrv_topics):
    #print(topic)
    ax.bar(location_order, loc_mrv.loc[loc_mrv["MRV_topic"]==f,"rel"], 
              width, bottom=prev_values, 
              label=f,
              color=loc_mrv.loc[loc_mrv["MRV_topic"]==f,"col"])
    prev_values += np.array(loc_mrv.loc[loc_mrv["MRV_topic"]==f,"rel"])

legend_topic = []
for c in reversed(mrv_topics):
    legend_topic.append(mpatches.Patch(color=mrv_topic_colors[c], label=c))    
leg_gr = plt.legend(handles=legend_topic, bbox_to_anchor=(1.,1), fontsize=10, frameon=False,ncol=1)
    
ax.set_ylabel('Share of MRV topic')

In [None]:
fig, ax = plt.subplots(3,1,figsize=(6,6), sharex=True)
width=0.6

##### absolute numbers
ax[0].bar(loc_size["Study location_continent"], loc_size["total_size"], width, color="grey")
ax[0].set_ylabel("Documents per \nRegion")

##### CDR groups
prev_values = np.zeros(len(loc_meth.loc[loc_meth["CDR_group"]==cdr_groups[0]]))
for i, f in enumerate(cdr_groups):
    #print(topic)
    ax[1].bar(location_order, loc_meth.loc[loc_meth["CDR_group"]==f,"rel"], 
              width, bottom=prev_values, 
              label=f,
              color=loc_meth.loc[loc_meth["CDR_group"]==f,"col"])
    prev_values += np.array(loc_meth.loc[loc_meth["CDR_group"]==f,"rel"])

legend_group = []
for c in reversed(cdr_groups):
    legend_group.append(mpatches.Patch(color=cdr_group_colors[c], label=c))    
leg_gr = ax[1].legend(handles=legend_group, bbox_to_anchor=(1.,1.2), fontsize=10, frameon=False,ncol=1)
    
ax[1].set_ylabel('Share of \nCDR method')
ax[1].set_yticks([0,0.5,1],[0,0.5,1])


##### MRV topics
prev_values = np.zeros(len(loc_mrv.loc[loc_mrv["MRV_topic"]==mrv_topics[0]]))
for i, f in enumerate(mrv_topics):
    #print(topic)
    ax[2].bar(location_order, loc_mrv.loc[loc_mrv["MRV_topic"]==f,"rel"], 
              width, bottom=prev_values, 
              label=f,
              color=loc_mrv.loc[loc_mrv["MRV_topic"]==f,"col"])
    prev_values += np.array(loc_mrv.loc[loc_mrv["MRV_topic"]==f,"rel"])

legend_topic = []
for c in reversed(mrv_topics):
    legend_topic.append(mpatches.Patch(color=mrv_topic_colors[c], label=c))    
leg_gr = ax[2].legend(handles=legend_topic, bbox_to_anchor=(1.,1.2), fontsize=10, frameon=False,ncol=1)
    
ax[2].set_ylabel('Share of \nMRV topic')
ax[2].set_yticks([0,0.5,1],[0,0.5,1])

_=ax[2].set_xticks([i for i in range(len(xticks))], countries["Study location_continent"], rotation=50, ha="right")
fig.savefig('figures/fig3.png', bbox_inches="tight", facecolor='white', edgecolor='none')