# Data

In [2]:
import pandas as pd

In [3]:
path = "../data/"
file_name = "z_test.csv"

results_path = '../results/DSRE/'

In [4]:
data_df = pd.read_csv(path + file_name)
print(data_df.shape)
data_df.head()

(114, 22)


Unnamed: 0,sdg,abstract,id,Aurora SDG 1,Aurora SDG 2,Aurora SDG 3,Aurora SDG 4,Aurora SDG 5,Aurora SDG 6,Aurora SDG 7,...,Aurora SDG 10,Aurora SDG 11,Aurora SDG 12,Aurora SDG 13,Aurora SDG 14,Aurora SDG 15,Aurora SDG 16,Aurora SDG 17,sdg_desc_short,sdg_desc_long
0,1,"Behavioural economics, experimentalism and the...",oai:www.zora.uzh.ch:121514,0.994933,0.131553,0.017696,0.002004,0.002599,0.001851,0.002009,...,0.288061,0.808725,0.003878,0.003744,0.002202,0.007218,0.003276,0.942661,No Poverty,Aims to end poverty in all its forms everywhere.
1,1,On the psychology of poverty\nPoverty remains ...,oai:www.zora.uzh.ch:99546,0.994616,0.026574,0.013165,0.008163,0.006769,0.002298,0.002273,...,0.196953,0.032675,0.005052,0.006424,0.002547,0.004685,0.41461,0.033908,No Poverty,Aims to end poverty in all its forms everywhere.
2,2,"Influence of temperature, humidity duration an...",oai:www.zora.uzh.ch:160626,0.004219,0.985724,0.321397,0.001793,0.001952,0.972292,0.014487,...,0.007813,0.214602,0.788679,0.933661,0.113531,0.360856,0.003073,0.008953,Zero Hunger,"Aims to end hunger, achieve food security and ..."
3,2,A global meta-analysis of yield stability in o...,oai:www.zora.uzh.ch:166251,0.004158,0.995758,0.029615,0.001911,0.002012,0.023792,0.002014,...,0.003968,0.068729,0.816045,0.018914,0.017451,0.98727,0.002955,0.009016,Zero Hunger,"Aims to end hunger, achieve food security and ..."
4,2,"Lemon technologies and adoption: measurement, ...",oai:www.zora.uzh.ch:137320,0.994911,0.995744,0.025641,0.341974,0.002015,0.017901,0.002458,...,0.021698,0.022601,0.367416,0.00365,0.004256,0.150547,0.003025,0.034098,Zero Hunger,"Aims to end hunger, achieve food security and ..."


In [5]:
# rename column 'sdg' if it exists to 'expected sdg'
if 'sdg' in data_df.columns:
    data_df = data_df.rename(columns={'sdg': 'expected sdg'})
    
if 'gold_label' in data_df.columns:
    data_df = data_df.rename(columns={'gold_label': 'expected sdg'})

In [6]:
result_df = pd.DataFrame(columns=['id', 'abstract', 'expected_sdg', 'core_themes', 'sdg_related_themes', 'similarity_search_relevance', 'merged_text', 'most_relevant_sdg', 'all_relevant_sdgs'])
result_df = result_df.astype({'expected_sdg': 'int64', 'abstract': 'object', 'core_themes': 'object', 'sdg_related_themes': 'object', 'similarity_search_relevance': 'object', 'merged_text': 'object', 'most_relevant_sdg': 'object', 'all_relevant_sdgs': 'object'})
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs


In [7]:
import time

date = "20240515-154132"#time.strftime("%Y%m%d-%H%M%S")

In [8]:
print(date)

20240515-154132


In [9]:
# save results
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [8]:
# load results
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# Model

In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from transformers import pipeline
import pickle

In [10]:
# Preprocessing
replacements_default = pickle.load(open("../data/prompts/replacements_default.pickle", "rb"))

generation_args_default = pickle.load(open("../data/prompts/generation_args_default.pickle", "rb"))

In [11]:
base_model_name = "HuggingFaceH4/zephyr-7b-beta"
generation_args_default['sep_token'] = '<|assistant|>'

In [13]:
generation_args_default['max_new_tokens'] = 512

In [14]:
# model
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type='nf4'
)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name,
                                                  device_map="auto",
                                                  torch_dtype=torch.bfloat16,
                                                  trust_remote_code=True,
                                                  quantization_config=quantization_config,
                                                  low_cpu_mem_usage=True
                                                  )

tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=True, trust_remote_code=True)
pipe = pipeline("text-generation", model=base_model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [15]:
def generate(pipe, abstract, message, replacements=replacements_default, generation_args=generation_args_default):
    replacements['user']['Abstract_Text'] = abstract
    _user_prompt = message[1]['content'].format_map(replacements['user'])
    _system_prompt = message[0]['content'].format_map(replacements['system'])

    messages = [
        {
            "role": "system",
            "content": _system_prompt,
        },
        {
            "role": "user",
            "content": _user_prompt
        }
    ]

    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt").to(pipe.device)

    outputs = pipe.model.generate(
        input_ids=input_ids,
        max_new_tokens=generation_args['max_new_tokens'],
        do_sample=generation_args['do_sample'],
        temperature=generation_args['temperature'],
        top_k=generation_args['top_k'],
        top_p=generation_args['top_p'],
        num_return_sequences=generation_args['num_return_sequences']
    )

    return pipe.tokenizer.decode(outputs[0], skip_special_tokens=True)

In [16]:
def get_assistant_output(full_output):
    return full_output.split(generation_args_default['sep_token'])[1]

# Extract Core Themes

In [20]:
core_themes_message = pickle.load(open("../data/prompts/core_themes_extraction.pickle", "rb"))

In [21]:
times = []

for index, row in data_df.iterrows():
    abstract = row['abstract']
    t_0 = time.time()
    core_themes_full = generate(pipe, abstract, core_themes_message)
    times.append(time.time() - t_0)
    core_themes = get_assistant_output(core_themes_full)
    
    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Core Themes:")
    print(core_themes)
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df = pd.concat([result_df, pd.DataFrame([[row['id'], abstract, row['expected sdg'], core_themes, None, None, None, None, None]], columns=['id', 'abstract', 'expected_sdg', 'core_themes', 'sdg_related_themes', 'similarity_search_relevance', 'merged_text', 'most_relevant_sdg', 'all_relevant_sdgs'])])
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Core Themes:

The scientific abst

In [22]:
result_df = result_df.reset_index(drop=True)
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,,,,,
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",,,,,
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,,,,,
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,,,,,
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,,,,,


In [25]:
# save results
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [26]:
# load results
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# SDG Related Themes

In [27]:
replacements = replacements_default.copy()
generation_args = generation_args_default.copy()

generation_args['max_new_tokens'] = 512

In [28]:
sdg_related_themes_message = pickle.load(open("../data/prompts/sdg_concepts_extraction_core_themes.pickle", "rb"))

In [29]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    core_themes = row['core_themes']
    replacements['user']['Core_Themes'] = core_themes
    t_0 = time.time()
    sdg_related_themes = get_assistant_output(generate(pipe, abstract, sdg_related_themes_message, replacements=replacements))
    times.append(time.time() - t_0)
    
    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Core Themes:")
    print(core_themes)
    print("\n\n")
    print("SDG Related Themes:")
    print(sdg_related_themes)
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df.at[index, 'sdg_related_themes'] = sdg_related_themes
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Core Themes:

The scientific abst

This scientific abstract directly relates to the following SDGs:

SDG 1 - No Poverty: The abstract discusses the integration of behavioral economics and experimentalism into pro-poor development policies in the global South. This integrative approach has the potential to help alleviate poverty by providing new insights into the behavioral aspects of poverty and developing more effective policies to address it.

SDG 4 - Quality Education: The abstract mentions the analysis of academic interventions, which indicates that the behavioral approach to poverty is shifting the focus from the market to the individual as a subject of behavioral engineering. This highlights the importance of education in understanding and addressing poverty, as individuals with the right knowledge and skills are better equipped to improve their economic situation.

SDG 17 - Partnerships for the Goals: The abstract mentions the combined work of behavioral economic knowledge and socio-technical market devices in ac

1
Abstract:
On the psychology of poverty
Poverty remains one of the most pressing problems facing the world; the mechanisms through which poverty arises and perpetuates itself, however, are not well understood. Here, we examine the evidence for the hypothesis that poverty may have particular psychological consequences that can lead to economic behaviors that make it difficult to escape poverty. The evidence indicates that poverty causes stress and negative affective states which in turn may lead to short-sighted and risk-averse decision-making, possibly by limiting attention and favoring habitual behaviors at the expense of goal-directed ones. Together, these relationships may constitute a feedback loop that contributes to the perpetuation of poverty. We conclude by pointing toward specific gaps in our knowledge and outlining poverty alleviation programs that this mechanism suggests.



Core Themes:

The scientific abstract titled "On the psychology of poverty" explores the understudie

In [30]:
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,\nThis scientific abstract directly relates to...,,,,
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",\nThis scientific abstract has direct relevanc...,,,,
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,\nThis scientific abstract has direct relevanc...,,,,
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,\nThis scientific abstract directly relates to...,,,,
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,\nThis scientific abstract is directly relevan...,,,,


In [31]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [22]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# Similarity Search Relevance

In [15]:
replacements = replacements_default.copy()

In [16]:
similarity_search_relevance_message = pickle.load(open("../data/prompts/retrieval_relevance_extraction_core_themes.pickle", "rb"))

In [17]:
gold_label_data_df = pd.read_csv('../data/z_train.csv')
osdg_gold_label_data_df = pd.read_csv('../data/o.csv')

gold_label_data_df = pd.concat([gold_label_data_df, osdg_gold_label_data_df])
print(gold_label_data_df.shape)

(26339, 24)


In [18]:
from src.DSRE import SDGAbstractsAnalyzer

In [19]:
generation_args = generation_args_default.copy()
generation_args['max_new_tokens'] = 512

In [20]:
abstract_analyzer = SDGAbstractsAnalyzer(gold_label_data_df)

Loaded gold embeddings from file


In [23]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    core_themes = row['core_themes']
    replacements['user']['Core_Themes'] = core_themes
    
    closest_abstracts, closest_sdg = abstract_analyzer.find_closest_abstracts_and_sdg(abstract)
    if closest_sdg in closest_abstracts['sdg'].tolist():
        closest_sdg = ""
    replacements['user']['closest_sdg_centroid'] = closest_sdg
    replacements['user']['closest_abstract_sdgs'] = abstract_analyzer.closest_abstract_sdg_as_string(closest_abstracts)
    
    t_0 = time.time()
    similarity_search_relevance = get_assistant_output(generate(pipe, abstract, similarity_search_relevance_message, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Core Themes:")
    print(core_themes)
    print("\n\n")
    print("Extracted SDGs:")
    print(closest_abstracts['sdg'].tolist() + [closest_sdg])
    print("\n\n")
    
    print(similarity_search_relevance)
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df.at[index, 'similarity_search_relevance'] = similarity_search_relevance
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Core Themes:

The scientific abst

This abstract directly contributes to the following SDGs:

1. No Poverty (SDG 1): The abstract explores the use of market-based pro-poor development policies in the global South, with a focus on behavioral economics and experimentalism. These approaches aim to alleviate poverty by encouraging pro-poor economic behavior among individuals through market-based interventions.

2. Zero Hunger (SDG 2): While not explicitly mentioned in the abstract, the use of market-based policies to alleviate poverty could also contribute to SDG 2 by reducing hunger and malnutrition among the poor population.

8. Decent Work and Economic Growth (SDG 8): The abstract discusses the rise of behavioral economics and experimentalism as a challenge to traditional neoclassical orthodoxy. This shift could contribute to SDG 8 by promoting innovative and effective economic policies that promote economic growth and decent work opportunities.

9. Industry, Innovation and Infrastructure (SDG 9): The use of socio-techni

1
Abstract:
On the psychology of poverty
Poverty remains one of the most pressing problems facing the world; the mechanisms through which poverty arises and perpetuates itself, however, are not well understood. Here, we examine the evidence for the hypothesis that poverty may have particular psychological consequences that can lead to economic behaviors that make it difficult to escape poverty. The evidence indicates that poverty causes stress and negative affective states which in turn may lead to short-sighted and risk-averse decision-making, possibly by limiting attention and favoring habitual behaviors at the expense of goal-directed ones. Together, these relationships may constitute a feedback loop that contributes to the perpetuation of poverty. We conclude by pointing toward specific gaps in our knowledge and outlining poverty alleviation programs that this mechanism suggests.



Core Themes:

The scientific abstract titled "On the psychology of poverty" explores the understudie

In [24]:
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,\nThis scientific abstract directly relates to...,\nThis abstract directly contributes to the fo...,,,
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",\nThis scientific abstract has direct relevanc...,\nThis scientific abstract contributes directl...,,,
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,\nThis scientific abstract has direct relevanc...,\nThe scientific abstract directly contributes...,,,
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,\nThis scientific abstract directly relates to...,\nThe scientific abstract directly contributes...,,,
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,\nThis scientific abstract is directly relevan...,\nThis scientific abstract contributes directl...,,,


In [25]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [26]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# Merge

In [27]:
replacements = replacements_default.copy()
generation_args = generation_args_default.copy()

In [28]:
merge_message = pickle.load(open("../data/prompts/second_merge_prompt_similarity.pickle", "rb"))

In [29]:
generation_args['max_new_tokens'] = 768

In [30]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    core_themes = row['core_themes']
    sdg_related_themes = row['sdg_related_themes']
    similarity_search_relevance = row['similarity_search_relevance']
    
    replacements['user']['Core_Themes'] = core_themes
    replacements['user']['SDG_Concepts'] = sdg_related_themes
    replacements['user']['Retrieval'] = similarity_search_relevance
    
    t_0 = time.time()
    merged_text = get_assistant_output(generate(pipe, abstract, merge_message, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)
    
    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Core Themes:")
    print(core_themes)
    print("\n\n")
    print("SDG Related Themes:")
    print(sdg_related_themes)
    print("\n\n")
    print("Similarity Search Relevance:")
    print(similarity_search_relevance)
    print("\n\n")
    print("Merged Text:")
    print(merged_text)
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df.at[index, 'merged_text'] = merged_text
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Core Themes:

The scientific abst

The scientific abstract explores the integration of behavioral economics and experimentalism into pro-poor development policies in the global South, as a challenge to traditional neoclassical orthodoxy. This approach has the potential to alleviate poverty (SDG 1) by providing new insights into the behavioral aspects of poverty and developing more effective policies. The analysis of academic interventions highlights the importance of education (SDG 4) in understanding and addressing poverty, as individuals with the right knowledge and skills are better equipped to improve their economic situation. Partnerships between academic researchers, policymakers, and market actors are necessary (SDG 17) to develop and implement effective policies to address poverty. While the use of market-based policies to alleviate poverty could also contribute to SDG 2 (Zero Hunger) and SDG 9 (Industry, Innovation, and Infrastructure), the abstract does not directly address how the behavioral approach to pover

1
Abstract:
On the psychology of poverty
Poverty remains one of the most pressing problems facing the world; the mechanisms through which poverty arises and perpetuates itself, however, are not well understood. Here, we examine the evidence for the hypothesis that poverty may have particular psychological consequences that can lead to economic behaviors that make it difficult to escape poverty. The evidence indicates that poverty causes stress and negative affective states which in turn may lead to short-sighted and risk-averse decision-making, possibly by limiting attention and favoring habitual behaviors at the expense of goal-directed ones. Together, these relationships may constitute a feedback loop that contributes to the perpetuation of poverty. We conclude by pointing toward specific gaps in our knowledge and outlining poverty alleviation programs that this mechanism suggests.



Core Themes:

The scientific abstract titled "On the psychology of poverty" explores the understudie

In [31]:
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,\nThis scientific abstract directly relates to...,\nThis abstract directly contributes to the fo...,\nThe scientific abstract explores the integra...,,
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",\nThis scientific abstract has direct relevanc...,\nThis scientific abstract contributes directl...,"\nThe scientific abstract ""On the psychology o...",,
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,\nThis scientific abstract has direct relevanc...,\nThe scientific abstract directly contributes...,\nThe scientific abstract explores the effects...,,
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,\nThis scientific abstract directly relates to...,\nThe scientific abstract directly contributes...,\nThis scientific abstract focuses on enhancin...,,
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,\nThis scientific abstract is directly relevan...,\nThis scientific abstract contributes directl...,\nThe scientific abstract explores the low ado...,,


In [32]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [33]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# defect analysis

In [34]:
replacements = replacements_default.copy()
generation_args = generation_args_default.copy()

In [35]:
defect_analysis_message = pickle.load(open("../data/prompts/defect_analysis_prompt.pickle", "rb"))

In [36]:
generation_args['max_new_tokens'] = 512

In [37]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    response_text = row['merged_text']

    replacements['user']['Response_Text'] = response_text

    t_0 = time.time()
    defect_analysis = get_assistant_output(generate(pipe, abstract, defect_analysis_message, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Response Text:")
    print(response_text)
    print("\n\n")
    print("Defect analysis:")
    print(defect_analysis)
    print("\n\n")
    print('-'*100)
    print("\n\n")

    result_df.at[index, 'defect_analysis'] = defect_analysis
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Response Text:

The scientific ab

In [38]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [39]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")


# Guided Optimization

In [40]:
replacements = replacements_default.copy()
generation_args = generation_args_default.copy()

In [41]:
guided_optimization_message = pickle.load(open("../data/prompts/guided_optimization_prompt.pickle", "rb"))

In [42]:
generation_args['max_new_tokens'] = 512

In [43]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    defective_response_text = row['merged_text']
    defect_analysis_text = row['defect_analysis']

    replacements['user']['Defective_Response_Text'] = defective_response_text
    replacements['user']['Identified_Issues_Text'] = defect_analysis_text

    t_0 = time.time()
    guided_optimization = get_assistant_output(generate(pipe, abstract, guided_optimization_message, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)

    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Response Text:")
    print(defective_response_text)
    print("\n\n")
    print("Defect analysis:")
    print(defect_analysis_text)
    print("\n\n")
    print("Guided Optimization:")
    print(guided_optimization)
    print("\n\n")
    print('-'*100)
    print("\n\n")

    result_df.at[index, 'guided_optimization'] = guided_optimization
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Response Text:

The scientific ab

In [44]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [17]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# Most important SDG Extraction

In [54]:
from peft import PeftModel

In [55]:
trained_on = "extraction_single"

In [56]:
def set_adapter(pipe, base_model, trained_on):
    adapter_name = f"../trained_adapters/{base_model_name}/checkpoint-{trained_on}"
    model_with_adapter = PeftModel.from_pretrained(base_model, adapter_name)
    pipe.model = model_with_adapter
    return pipe

In [57]:
pipe = set_adapter(pipe, base_model, trained_on)

In [60]:
result_message_single_sdg = pickle.load(open("../data/prompts/single_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()
replacements = replacements_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20

generation_args['max_new_tokens'] = 8

In [61]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    final_classification = row['guided_optimization']
    
    replacements['user']['DSRE_Response'] = final_classification
    
    t_0 = time.time()
    most_relevant_sdg = get_assistant_output(generate(pipe, abstract, result_message_single_sdg, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)
    
    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("Most relevant SDG:")
    print(most_relevant_sdg)
    print("\n")
    print("Expected SDG:")
    print(row['expected_sdg'])
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df.at[index, 'most_relevant_sdg'] = most_relevant_sdg
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Final Classification:

Scientific

In [62]:
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs,defect_analysis,guided_optimization
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,\nThis scientific abstract directly relates to...,\nThis abstract directly contributes to the fo...,\nThe scientific abstract explores the integra...,\nSDG 1.0.0,"\n1, 4, 8","\nBased on the provisional classification, the...","\nScientific Abstract and Title:\n""Behavioural..."
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",\nThis scientific abstract has direct relevanc...,\nThis scientific abstract contributes directl...,"\nThe scientific abstract ""On the psychology o...",\nSDG 1.0.0,"\n1, 4, 2, 3, 8, 6, 7, 10, 13, 15, 16",\nThe provisional classification suggests that...,\nRevised Classification:\n\nThis scientific a...
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,\nThis scientific abstract has direct relevanc...,\nThe scientific abstract directly contributes...,\nThe scientific abstract explores the effects...,\nSDG 2.0.0,"\n2, 3, 12, 13, 8, 14, 15 (in descending order...",\nThe provisional classification for this scie...,\nRevised Classification:\n\nThis scientific a...
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,\nThis scientific abstract directly relates to...,\nThe scientific abstract directly contributes...,\nThis scientific abstract focuses on enhancin...,\nSDG 2.0.0,"\n2, 12, 15, 16",\nThe provisional classification aligns well w...,"\nRevised Classification:\n""\nThis scientific ..."
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,\nThis scientific abstract is directly relevan...,\nThis scientific abstract contributes directl...,\nThe scientific abstract explores the low ado...,\nSDG 1.0.0,"\n1, 2, 15",\nThe provisional classification suggests that...,"\nRevised Classification:\n""\nThis research co..."


In [63]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [64]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# All relevant SDGs Extraction

In [65]:
trained_on = "extraction_multi"

In [66]:
pipe = set_adapter(pipe, base_model, trained_on)

In [67]:
result_message_all_sdg = pickle.load(open("../data/prompts/all_sdg_finetuned.pickle", "rb"))

generation_args = generation_args_default.copy()

generation_args['temperature'] = 0.2
generation_args['top_k'] = 30
generation_args['top_p'] = 0.20

In [68]:
times = []

for index, row in result_df.iterrows():
    abstract = row['abstract']
    final_classification = row['guided_optimization']
    
    replacements['user']['DSRE_Response'] = final_classification
    
    t_0 = time.time()
    all_relevant_sdgs = get_assistant_output(generate(pipe, abstract, result_message_all_sdg, replacements=replacements, generation_args=generation_args))
    times.append(time.time() - t_0)
    
    print(index)
    print("Abstract:")
    print(abstract)
    print("\n\n")
    print("Final Classification:")
    print(final_classification)
    print("\n\n")
    print("All relevant SDGs:")
    print(all_relevant_sdgs)
    print("\n")
    print("Expected SDG:")
    print(row['expected_sdg'])
    print("\n\n")
    print('-'*100)
    print("\n\n")
    
    result_df.at[index, 'all_relevant_sdgs'] = all_relevant_sdgs
    
print(f"Average time: {sum(times) / len(times)}")

0
Abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



Final Classification:

Scientific

In [69]:
result_df.head()

Unnamed: 0,id,abstract,expected_sdg,core_themes,sdg_related_themes,similarity_search_relevance,merged_text,most_relevant_sdg,all_relevant_sdgs,defect_analysis,guided_optimization
0,oai:www.zora.uzh.ch:121514,"Behavioural economics, experimentalism and the...",1,\nThe scientific abstract discusses the integr...,\nThis scientific abstract directly relates to...,\nThis abstract directly contributes to the fo...,\nThe scientific abstract explores the integra...,\nSDG 1.0.0,"\n1, 4, 8","\nBased on the provisional classification, the...","\nScientific Abstract and Title:\n""Behavioural..."
1,oai:www.zora.uzh.ch:99546,On the psychology of poverty\nPoverty remains ...,1,"\nThe scientific abstract titled ""On the psych...",\nThis scientific abstract has direct relevanc...,\nThis scientific abstract contributes directl...,"\nThe scientific abstract ""On the psychology o...",\nSDG 1.0.0,"\n1, 4, 2, 3, 8, 6, 7, 10, 13, 15, 16",\nThe provisional classification suggests that...,\nRevised Classification:\n\nThis scientific a...
2,oai:www.zora.uzh.ch:160626,"Influence of temperature, humidity duration an...",2,\nThe scientific abstract explores the effects...,\nThis scientific abstract has direct relevanc...,\nThe scientific abstract directly contributes...,\nThe scientific abstract explores the effects...,\nSDG 2.0.0,"\n2, 3, 12, 13, 8, 14, 15 (in descending order...",\nThe provisional classification for this scie...,\nRevised Classification:\n\nThis scientific a...
3,oai:www.zora.uzh.ch:166251,A global meta-analysis of yield stability in o...,2,\nThe scientific abstract discusses the issue ...,\nThis scientific abstract directly relates to...,\nThe scientific abstract directly contributes...,\nThis scientific abstract focuses on enhancin...,\nSDG 2.0.0,"\n2, 12, 15, 16",\nThe provisional classification aligns well w...,"\nRevised Classification:\n""\nThis scientific ..."
4,oai:www.zora.uzh.ch:137320,"Lemon technologies and adoption: measurement, ...",2,\nThe scientific abstract discusses the topics...,\nThis scientific abstract is directly relevan...,\nThis scientific abstract contributes directl...,\nThe scientific abstract explores the low ado...,\nSDG 1.0.0,"\n1, 2, 15",\nThe provisional classification suggests that...,"\nRevised Classification:\n""\nThis research co..."


In [70]:
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)

In [71]:
result_df = pd.read_csv(f"{results_path}/{file_name}_result_{date}.csv")

# Print all results

In [72]:
for index, row in result_df.iterrows():
    for column in result_df.columns:
        print(f"{column}:")
        print(row[column])
        print("\n\n")
    print('-'*100)

id:
oai:www.zora.uzh.ch:121514



abstract:
Behavioural economics, experimentalism and the marketization of development
Using market-based pro-poor development policy in the global South as an example, this paper engages with the rise of behaviourism and experimentalism as a challenge to the neoclassical orthodoxy and the more recent transformation into an influential policy script. After charting the rise of behavioural economics and discussing the key conceptual building blocks of the emerging behavioural mainstream in economics, the paper turns to the marketization of anti-poverty policy in the global South. Based on an analysis of policy documents, project reports and academic interventions, it is argued that the behavioural approach to poverty shifts the focus from the market to the market subject and engages in often thinly veiled attempts at behavioural engineering. This is achieved with the combined work of behavioural economic knowledge and socio-technical market devices.



e

# Accuracy

In [73]:
import re


def extract_sdg_nr(text):
    match = re.search(r'\b(1[0-7]|[0-9])(\.0)?\b', text)
    if match:
        return int(float(match.group(0)))
    else:
        return 0

def extract_all_sdg_nr_from(text):
    match = re.findall(r'\b(1[0-7]|[0-9])(\.0)?\b', text)
    if match:
        sdg_list = [int(float(sdg[0])) for sdg in match]
        sdg_set = set(sdg_list)
        return list(sdg_set)
    else:
        return [0]

def check_expected_in_classification(list_of_lists, expected_sdgs):
    return [True if expected_sdg in sdg_list else False for expected_sdg, sdg_list in zip(expected_sdgs, list_of_lists)]

In [74]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, hamming_loss, jaccard_score, precision_score, recall_score, f1_score

def calc_main_label_metrics(y_true, y_pred):
    """
    Calculate metrics for the main multi-class label.
    
    Parameters:
    y_true (list or array): Actual main labels.
    y_pred (list or array): Predicted main labels.

    Returns:
    dict: A dictionary with accuracy, precision, recall, and F1-score.
    """
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
        'recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
        'f1_score': f1_score(y_true, y_pred, average='macro')
    }
    return metrics

def calc_secondary_labels_metrics(y_true, y_pred, class_count=18):
    """
    Calculate Hamming loss, Jaccard similarity, precision, recall, and F1-score for multi-label data.
    
    Parameters:
    y_true (list of lists): Actual multi-labels.
    y_pred (list of lists): Predicted multi-labels.
    class_count (int): Number of unique classes.

    Returns:
    dict: A dictionary with Hamming loss, Jaccard similarity, precision, recall, and F1-score.
    """
    mlb = MultiLabelBinarizer(classes=range(class_count))
    # create a list of lists for each entry in y_true
    y_true = [[i] for i in y_true.tolist()]
    y_true_binary = mlb.fit_transform(y_true)
    y_pred_binary = mlb.transform(y_pred)

    metrics = {
        'hamming_loss': hamming_loss(y_true_binary, y_pred_binary),
        'jaccard_similarity': jaccard_score(y_true_binary, y_pred_binary, average='samples'),
        'precision': precision_score(y_true_binary, y_pred_binary, average='micro', zero_division=0),
        'recall': recall_score(y_true_binary, y_pred_binary, average='micro', zero_division=0),
        'f1_score': f1_score(y_true_binary, y_pred_binary, average='micro')
    }
    return metrics

In [75]:
single_sdg = result_df.apply(lambda row: extract_sdg_nr(row['most_relevant_sdg']), axis=1).tolist()
all_sdg = result_df.apply(lambda row: extract_all_sdg_nr_from(row['all_relevant_sdgs']), axis=1).tolist()
expected_sdg = result_df['expected_sdg']

In [76]:
main_label_metrics = calc_main_label_metrics(expected_sdg, single_sdg)
print(main_label_metrics)

{'accuracy': 0.5263157894736842, 'precision': 0.5401826643176124, 'recall': 0.47098115723435413, 'f1_score': 0.4380018674136321}


In [77]:
secondary_label_metrics = calc_secondary_labels_metrics(expected_sdg, all_sdg)
print(secondary_label_metrics)

{'hamming_loss': 0.17348927875243664, 'jaccard_similarity': 0.2667911888306625, 'precision': 0.225, 'recall': 0.868421052631579, 'f1_score': 0.3574007220216607}


# Save

In [78]:
# save results
result_df.to_csv(f"{results_path}/{file_name}_result_{date}.csv", index=False)