In [1]:
import pandas as pd
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, pipeline
import pickle
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
model_name = "HuggingFaceH4/zephyr-7b-beta"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4'
)

replacements_default = pickle.load(open("../data/prompts/replacements_default.pickle", "rb"))
generation_args_default = pickle.load(open("../data/prompts/generation_args_default.pickle", "rb"))

In [3]:
base_model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             torch_dtype=torch.bfloat16,
                                             trust_remote_code=True,
                                             quantization_config=quantization_config,
                                             )

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)

In [5]:
pipe = pipeline("text-generation", model=base_model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto")

In [6]:
def set_adapter(pipe, base_model, trained_on):
    adapter_name = f"../trained_adapters/{model_name}/checkpoint-{trained_on}"
    model_with_adapter = PeftModel.from_pretrained(base_model, adapter_name)
    pipe.model = model_with_adapter
    return pipe


def generate_output(pipe, abstract, message, replacements=replacements_default, generation_args=generation_args_default):
    replacements['user']['Abstract_Text'] = abstract
    _user_prompt = message[1]['content'].format_map(replacements['user'])
    _system_prompt = message[0]['content'].format_map(replacements['system'])

    messages = [
        {
            "role": "system",
            "content": _system_prompt,
        },
        {
            "role": "user",
            "content": _user_prompt
        }
    ]

    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt").to(pipe.device)

    outputs = pipe.model.generate(
        input_ids=input_ids,
        max_new_tokens=generation_args['max_new_tokens'],
        do_sample=generation_args['do_sample'],
        temperature=generation_args['temperature'],
        top_k=generation_args['top_k'],
        top_p=generation_args['top_p'],
        num_return_sequences=generation_args['num_return_sequences']
    )

    return pipe.tokenizer.decode(outputs[0], skip_special_tokens=True)


def get_assistant_output(full_output):
    return full_output.split(generation_args_default['sep_token'])[1]

In [7]:
import re


def extract_sdg_nr(text):
    match = re.search(r'(\d+)', text)
    if match:
        return int(match.group(1))
    else:
        return 0
    
# the allowed numbers are 0-17
def extract_all_sdg_nr_from(text):
    match = re.findall(r'(\d+)', text)
    if match:
        sdg_list = [int(sdg) for sdg in match if 17 >= int(sdg) >= 0]
        sdg_set = set(sdg_list)
        return list(sdg_set)
    else:
        return [0]

def check_expected_in_classification(list_of_lists, expected_sdgs):
    return [True if expected_sdg in sdg_list else False for expected_sdg, sdg_list in zip(expected_sdgs, list_of_lists)]

# Data

In [8]:
dataset_name = 'extraction_test.csv'
dataset = pd.read_csv(f"../data/{dataset_name}")
print(dataset.shape)
dataset.head()

(116, 13)


Unnamed: 0.1,Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,final_classification,most_relevant_sdg,all_relevant_sdgs,single,multi
0,0,oai:www.zora.uzh.ch:157456,Long-term Investment Choices for Quinoa Farmer...,2,\nThe abstract presents a Real Options Case St...,\nIn terms of the Sustainable Development Goal...,\nSDG 2: Zero Hunger\n\nThe abstract addresses...,\nThe abstract presents a Real Options Case St...,"\nBased on the insights, it can be determined ...",\nSDG 13 - Climate Action,"\nSDG 2, 8, 13, 15",2.0,"2, 8, 13, 15"
1,1,oai:www.zora.uzh.ch:167650,"Antimicrobial resistance, multilocus sequence ...",3,\nThe abstract presents a study on antimicrobi...,"\nBased on the provided abstract, some relevan...",\nSDG 3: Good Health and Well-being\n\nThe stu...,\nThe study's findings contribute to SDG 3 (Go...,\nBased on the abstract and the insights provi...,\nSDG 3 (Good Health and Well-being) is the mo...,"\nSDGs directly impacted: 3, 15\nRanking based...",3.0,"3, 15"
2,2,oai:www.zora.uzh.ch:95626,Lifting the veil of ignorance: An experiment o...,16,\nThis abstract explores the phenomenon of nor...,\nThis abstract does not directly relate to an...,\nSDG 12: Responsible Consumption and Producti...,"\nThe study ""Lifting the veil of ignorance: An...","\nThe abstract ""Lifting the veil of ignorance:...",\nSDG 15: Life on Land\n\nJustification: The s...,\nSDGs directly impacted by the scientific abs...,15.0,"15, 16, 12"
3,3,oai:www.zora.uzh.ch:151370,Integrating animal movement with habitat suita...,13,\nThe abstract presents a method for estimatin...,"\nThis abstract relates to several SDGs, parti...",\nSDG 15: Life on Land\n\nThe presented abstra...,\nThe presented abstract contributes to SDG 15...,\nThe abstract presented contributes significa...,"\n[output]""SDG 15""","\nSDG 15, SDG 11, SDG 13",15.0,"15, 11, 13"
4,4,oai:www.zora.uzh.ch:167344,Plant functional trait change across a warming...,13,\nThe abstract discusses the impact of warming...,\nThis abstract relates to several Sustainable...,\nSDG 13: Climate Action\nThis abstract direct...,\nThe abstract explores the effects of climate...,\nThe abstract's contributions towards the SDG...,\nSDG 13 - Climate Action,\nSDGs directly impacted by the scientific abs...,13.0,"13, 15"


In [9]:
generation_args_default['sep_token'] = '<|assistant|>'
generation_args_default['max_new_tokens'] = 32

# Single Extraction No Adapter

In [10]:
result_message_single_sdg = pickle.load(open("../data/prompts/single_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()
replacements = replacements_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20
generation_args['max_new_tokens'] = 8

In [11]:
for index, row in dataset.iterrows():
    abstract = row['abstract']
    final_classification = row['final_classification']

    replacements['user']['DSRE_Response'] = final_classification

    most_relevant_sdg = get_assistant_output(generate_output(pipe, abstract, result_message_single_sdg, replacements=replacements, generation_args=generation_args))

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("Most relevant SDG:")
    print(most_relevant_sdg)
    print("\n")
    print("Expected SDG:")
    print(row['single'])
    print("\n\n")
    print('-'*100)
    print("\n\n")

    dataset.at[index, 'most_relevant_sdg_a'] = most_relevant_sdg

0
Abstract:
Long-term Investment Choices for Quinoa Farmers in Puno, Peru: A Real Options Case Study
The goal of this article is to assess the optimal choices of a smallholder quinoa farmer in the Puno region of Peru, in terms of his decision if and when to undertake certain investments that are expected to increase quinoa yield and crop resistance to harsh weather conditions, such as frost. We focus on two irreversible options, namely quinoa variety management andWaruWaru. The former alternative considers the option of the farmer to switch from his business-as-usual quinoa variety to one that has different yield and frost resistance characteristics. The latter alternative refers to the implementation of an ancestral cultivation practice that is estimated to offer benefits in terms of yield increase and resistance to harsh climate conditions.
We rely on Real Options Analysis to assess the two types of investment opportunities for the farmer. This approach allows us to determine not on

In [12]:
single_sdg = dataset.apply(lambda row: extract_sdg_nr(row['most_relevant_sdg_a']), axis=1).tolist()
expected_sdg = dataset['single']

In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_singlelabel_classification(y_true, y_pred):
    """
    Evaluates singlelabel classification performance in a multiclass setting.
    
    Parameters:
    y_true (list): True labels.
    y_pred (list): Predicted labels.
    
    Returns:
    dict: A dictionary containing accuracy, precision, recall, and F1 score.
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }


In [14]:
evaluate_singlelabel_classification(expected_sdg, single_sdg) 

{'accuracy': 0.853448275862069,
 'precision': 0.9168103448275862,
 'recall': 0.853448275862069,
 'f1_score': 0.8728345629286387}

# All Extraction No Adapter

In [10]:
result_message_all_sdg = pickle.load(open("../data/prompts/all_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()
replacements = replacements_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20

In [11]:
for index, row in dataset.iterrows():
    abstract = row['abstract']
    final_classification = row['final_classification']

    replacements['user']['DSRE_Response'] = final_classification

    all_relevant_sdgs = get_assistant_output(generate_output(pipe, abstract, result_message_all_sdg, replacements=replacements, generation_args=generation_args))

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("All relevant SDGs:")
    print(all_relevant_sdgs)
    print("\n")
    print("Expected SDG:")
    print(row['multi'])
    print("\n\n")
    print('-'*100)
    print("\n\n")

    dataset.at[index, 'all_relevant_sdgs'] = all_relevant_sdgs

0
Abstract:
Long-term Investment Choices for Quinoa Farmers in Puno, Peru: A Real Options Case Study
The goal of this article is to assess the optimal choices of a smallholder quinoa farmer in the Puno region of Peru, in terms of his decision if and when to undertake certain investments that are expected to increase quinoa yield and crop resistance to harsh weather conditions, such as frost. We focus on two irreversible options, namely quinoa variety management andWaruWaru. The former alternative considers the option of the farmer to switch from his business-as-usual quinoa variety to one that has different yield and frost resistance characteristics. The latter alternative refers to the implementation of an ancestral cultivation practice that is estimated to offer benefits in terms of yield increase and resistance to harsh climate conditions.
We rely on Real Options Analysis to assess the two types of investment opportunities for the farmer. This approach allows us to determine not on

In [12]:
all_sdg_extracted = dataset.apply(lambda row: extract_all_sdg_nr_from(row['all_relevant_sdgs']), axis=1).tolist()
print(all_sdg_extracted)
all_sdg_expected = dataset['multi'].tolist()
# convert the list of strings to a list of lists i.e. the string is a list of numbers '1,2,3' -> [1,2,3]
all_sdg_expected = [list(map(int, sdg_list.split(','))) for sdg_list in all_sdg_expected]
print(all_sdg_expected)
all_labels = list(range(18))

[[8, 2, 13, 15], [3, 15], [16, 12, 15], [11, 13, 15], [13, 15], [16, 3, 15], [16, 17, 10], [13, 14, 15], [16, 10, 3, 5], [16, 0, 12, 15], [17, 10, 3, 15], [16, 17, 12], [0, 13, 15], [12, 6, 15], [0, 13, 15], [16, 17, 3], [1, 3, 4, 9, 10], [1, 2, 11, 13, 15], [1, 2, 3, 4, 5, 8], [8, 16, 1], [8, 17, 13, 15], [16, 3, 15], [11, 12, 13, 14, 15], [13, 6, 15], [3, 13, 15], [16, 17, 13, 7], [3, 13, 15], [1, 12, 13, 15], [9, 15, 6, 7], [2, 12, 15], [12, 15, 6, 7], [11, 13, 6], [3, 4, 6, 8, 9, 11, 13, 15], [8, 0, 7], [9, 12, 13, 6], [8, 9, 16, 17], [8, 10], [16], [3, 12, 13, 15], [17, 13, 15], [1, 11, 15], [9, 13, 6], [15, 7], [8, 10, 13, 15], [8, 17, 10], [1, 2, 3, 4, 5, 6], [9, 4], [1, 13, 15], [1, 10, 13, 16, 17], [17, 10], [9, 10, 4], [6, 15], [8, 1, 10], [15, 7], [9, 4], [3, 11], [3, 15], [3, 15], [3, 15], [15], [15], [1, 3, 13, 14, 16, 17], [15, 6, 14], [1, 2, 3, 5, 6, 7, 11], [0, 10, 12, 15], [13, 6, 15], [16, 1, 3, 15], [1, 3, 8, 15, 16], [1, 4, 5, 8, 9], [8, 17], [17, 10, 4], [1, 3, 6, 

In [13]:
from sklearn.metrics import jaccard_score, hamming_loss


def evaluate_multilabel_classification(y_true, y_pred, all_labels):
    """
    Evaluates multilabel classification performance.
    
    Parameters:
    y_true (list of lists): True labels.
    y_pred (list of lists): Predicted labels.
    all_labels (list): All possible labels in the dataset.
    
    Returns:
    dict: A dictionary containing accuracy, precision, recall, and F1 score.
    """
    mlb = MultiLabelBinarizer(classes=all_labels)
    y_true_binary = mlb.fit_transform(y_true)
    y_pred_binary = mlb.transform(y_pred)

    accuracy = accuracy_score(y_true_binary, y_pred_binary)
    precision = precision_score(y_true_binary, y_pred_binary, average='samples', zero_division=0)
    recall = recall_score(y_true_binary, y_pred_binary, average='samples', zero_division=0)
    f1 = f1_score(y_true_binary, y_pred_binary, average='samples', zero_division=0)
    jaccard = jaccard_score(y_true_binary, y_pred_binary, average='samples', zero_division=0)
    hamming = hamming_loss(y_true_binary, y_pred_binary)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }


In [16]:
evaluate_multilabel_classification(all_sdg_expected, all_sdg_extracted, all_labels)

{'accuracy': 0.49137931034482757,
 'precision': 0.8147783251231526,
 'recall': 0.9504618226600986,
 'f1_score': 0.8511422888464631}

# Single Extraction Adapter

In [17]:
result_message_single_sdg = pickle.load(open("../data/prompts/single_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()
replacements = replacements_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20
generation_args['max_new_tokens'] = 7

In [18]:
trained_on = "extraction_single"
pipe = set_adapter(pipe, base_model, trained_on)

In [19]:
for index, row in dataset.iterrows():
    abstract = row['abstract']
    final_classification = row['final_classification']

    replacements['user']['DSRE_Response'] = final_classification

    most_relevant_sdg = get_assistant_output(
        generate_output(pipe, abstract, result_message_single_sdg, replacements=replacements,
                        generation_args=generation_args))

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("Most relevant SDG:")
    print(most_relevant_sdg)
    print("\n")
    print("Expected SDG:")
    print(row['single'])
    print("\n\n")
    print('-' * 100)
    print("\n\n")

    dataset.at[index, 'most_relevant_sdg'] = most_relevant_sdg

0
Abstract:
Long-term Investment Choices for Quinoa Farmers in Puno, Peru: A Real Options Case Study
The goal of this article is to assess the optimal choices of a smallholder quinoa farmer in the Puno region of Peru, in terms of his decision if and when to undertake certain investments that are expected to increase quinoa yield and crop resistance to harsh weather conditions, such as frost. We focus on two irreversible options, namely quinoa variety management andWaruWaru. The former alternative considers the option of the farmer to switch from his business-as-usual quinoa variety to one that has different yield and frost resistance characteristics. The latter alternative refers to the implementation of an ancestral cultivation practice that is estimated to offer benefits in terms of yield increase and resistance to harsh climate conditions.
We rely on Real Options Analysis to assess the two types of investment opportunities for the farmer. This approach allows us to determine not on

In [20]:
single_sdg = dataset.apply(lambda row: extract_sdg_nr(row['most_relevant_sdg']), axis=1).tolist()
expected_sdg = dataset['single']

In [21]:
evaluate_singlelabel_classification(expected_sdg, single_sdg)

{'accuracy': 0.9482758620689655,
 'precision': 0.9286330049261085,
 'recall': 0.9482758620689655,
 'f1_score': 0.9364071659483284}

# Multi Extraction Adapter

In [22]:
result_message_all_sdg = pickle.load(open("../data/prompts/all_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()
replacements = replacements_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20

In [23]:
trained_on = "extraction_multi"
pipe = set_adapter(pipe, base_model, trained_on)

In [24]:
for index, row in dataset.iterrows():
    abstract = row['abstract']
    final_classification = row['final_classification']

    replacements['user']['DSRE_Response'] = final_classification

    all_relevant_sdgs = get_assistant_output(generate_output(pipe, abstract, result_message_all_sdg, replacements=replacements, generation_args=generation_args))

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("All relevant SDGs:")
    print(all_relevant_sdgs)
    print("\n")
    print("Expected SDG:")
    print(row['multi'])
    print("\n\n")
    print('-'*100)
    print("\n\n")

    dataset.at[index, 'all_relevant_sdgs'] = all_relevant_sdgs

0
Abstract:
Long-term Investment Choices for Quinoa Farmers in Puno, Peru: A Real Options Case Study
The goal of this article is to assess the optimal choices of a smallholder quinoa farmer in the Puno region of Peru, in terms of his decision if and when to undertake certain investments that are expected to increase quinoa yield and crop resistance to harsh weather conditions, such as frost. We focus on two irreversible options, namely quinoa variety management andWaruWaru. The former alternative considers the option of the farmer to switch from his business-as-usual quinoa variety to one that has different yield and frost resistance characteristics. The latter alternative refers to the implementation of an ancestral cultivation practice that is estimated to offer benefits in terms of yield increase and resistance to harsh climate conditions.
We rely on Real Options Analysis to assess the two types of investment opportunities for the farmer. This approach allows us to determine not on

In [25]:
all_sdg_extracted = dataset.apply(lambda row: extract_all_sdg_nr_from(row['all_relevant_sdgs']), axis=1).tolist()
print(all_sdg_extracted)
all_sdg_expected = dataset['multi'].tolist()
# convert the list of strings to a list of lists i.e. the string is a list of numbers '1,2,3' -> [1,2,3]
all_sdg_expected = [list(map(int, sdg_list.split(','))) for sdg_list in all_sdg_expected]
print(all_sdg_expected)
all_labels = list(range(18))

[[8, 2, 13, 15], [3, 15], [16, 12, 15], [0, 11, 13, 15], [13, 15], [16, 11, 3, 15], [16, 17, 10], [13, 14, 15], [16, 10, 3, 5], [16, 0, 12, 15], [3, 10, 12, 15, 17], [16, 17, 12], [0, 13, 14, 15], [0, 12, 6, 15], [0, 13, 15], [3, 6, 8, 9, 12, 13, 15, 16, 17], [9, 10, 3, 4], [2, 8, 9, 11, 13, 15], [1, 2, 3, 4, 5, 8], [8, 16, 1, 0], [8, 17, 13, 15], [2, 3, 4, 6, 8, 9, 13, 15, 16], [11, 12, 13, 14, 15], [0, 13, 6, 15], [3, 13, 15], [16, 17, 13, 7], [3, 13, 15], [12, 13, 15], [0, 6, 7, 9, 15], [0, 2, 12, 15], [12, 15, 6, 7], [11, 13, 6], [3, 4, 6, 8, 9, 11, 13, 15, 16], [8, 0, 7], [6, 9, 12, 13, 17], [8, 9, 16, 17], [8, 0, 10], [0, 8, 9, 14, 15, 16], [3, 12, 13, 15], [17, 13, 15], [11, 15], [9, 13, 6], [0, 15, 7], [0, 8, 10, 13, 15], [8, 17, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 15], [16, 9, 4], [0, 13, 15], [1, 10, 13, 16, 17], [0, 10, 13, 16, 17], [9, 10, 4], [6, 15], [8, 1, 10, 9], [0, 15, 7], [9, 4], [0, 3, 11, 12, 15], [3, 15], [3, 15], [0, 3, 15], [17, 15], [0, 15], [1, 3, 13, 14, 16, 17]

In [26]:
evaluate_multilabel_classification(all_sdg_expected, all_sdg_extracted, all_labels)

{'accuracy': 0.47413793103448276,
 'precision': 0.7714319923371648,
 'recall': 0.9991379310344828,
 'f1_score': 0.8420801899250175}