# TFM: Análisis de Políticas de Sostenibilidad mediante técnicas de Argumentacion Computacional

## Detección de Argumentos con Gemma 1B

- ollama serve
- ollama run gemma3:1b

In [1]:
#%pip install langchain pymupdf openai openpyxl --quiet



In [2]:
from typing import List
from pydantic import BaseModel, Field, ValidationError
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.exceptions import OutputParserException
import requests
import json
import re
from openai import OpenAI
import openai
import httpx
import pandas as pd
import numpy as np
import os
import openpyxl

process_text_path = "..\\Data\\Processed Files (sections)\\"

model_name="gemma3:4b"

## Input text processing

In [3]:
# 1. Define your Pydantic schema for output
class ArgumentResponse(BaseModel):
    arguments: List[str] = Field(..., description="List of arguments extracted directly from the text.")

# 2. Setup output parser
pydantic_parser = PydanticOutputParser(pydantic_object=ArgumentResponse)

# 3. Extend text with first sentence from the next page
def extend_pages_with_next_sentence(pages):
    def get_first_sentence(text):
        match = re.search(r'(.+?\.)', text.strip())
        return match.group(1).strip() if match else ""

    extended_pages = []
    for i, page in enumerate(pages):
        current_text = page["text"]
        if i + 1 < len(pages):
            next_sentence = get_first_sentence(pages[i + 1]["text"])
            current_text += " " + next_sentence
        extended_pages.append({
            "page": page["page"],
            "text": current_text
        })
    return extended_pages

# 4. Build the prompt and call the LLM to extract arguments
def extract_arguments_json(text, topic, model_name) -> ArgumentResponse:
    format_instructions = pydantic_parser.get_format_instructions()

    prompt = PromptTemplate(
        template=(
            "Task: Text Span Identification for Arguments related to Sustainable Development Goal: {topic}\n\n"
            "Role: You are an expert in logical reasoning, sustainability reporting, and argument analysis. "
            "Your job is to identify and extract **verbatim arguments** about {topic} from long-form sustainability texts.\n\n"
            "Instructions:\n"
            "1. Carefully read the entire input text.\n"
            "2. Identify all sentences or phrases that:\n"
            "   - Clearly support or argue **for or against** the topic \"{topic}\"\n"
            "   - Contain policy recommendations, problem statements, evaluations, or actions related only to {topic} ignore arguments related to other SGDs\n"
            "3. For each relevant argument, extract the **exact span of text** without paraphrasing.\n"
            "4. Ensure each argument:\n"
            "   - Stands as a full statement\n"
            "   - Matches the original text exactly\n"
            "   - Includes only the necessary context\n\n"
            "Output Rules:\n"
            "- Use **only the exact text** from the original\n"
            "- Do **not** add or reword anything\n"
            "- Return only valid JSON\n"
            "- No markdown (```), no extra explanation\n\n"
            "Text:\n\"\"\"\n{text}\n\"\"\"\n\n"
            "Respond ONLY with a JSON object like this:\n\n"
            "{format_instructions}"
        ),
        input_variables=["text", "topic"],
        partial_variables={"format_instructions": format_instructions}
    )

    final_prompt = prompt.format_prompt(text=text, topic=topic).to_string()

    payload = {
        "model": model_name,
        "prompt": final_prompt,
        "temperature": 0,
        "stream": False
    }

    response = requests.post("http://localhost:11434/api/generate", json=payload)
    if response.status_code != 200:
        raise Exception(f"Ollama error: {response.text}")

    raw_output = response.json()["response"]
    print("Model Output:", raw_output)

    try:
        return pydantic_parser.parse(raw_output)
    except OutputParserException as err:
        print("Parse failed:", err)
        return ArgumentResponse(arguments=[])

# 5. Wrapper function for pipeline
def extract_arguments_from_text(text, topic, model_name) -> List[str]:
    result = extract_arguments_json(text, topic, model_name)
    return result.arguments

# 6. Main document-level processor
def process_document(pages, model_name, topic=""):
    extended_pages = extend_pages_with_next_sentence(pages)
    processed = []
    for page in extended_pages:
        print(f"\n--- Processing Page {page['page']} ---")
        #print("Text to analyze:\n", page["text"])
        
        arguments = extract_arguments_from_text(page["text"], topic, model_name)
        
        print("Extracted Arguments:")
        for i, arg in enumerate(arguments, 1):
            print(f"{i}. {arg}")

        processed.append({
            "page": page["page"],
            "text": page["text"],
            "arguments": arguments
        })
    return processed


# 7. File I/O
def save_to_json(processed, output_path):
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(processed, f, indent=2, ensure_ascii=False)

def process_directory(input_dir, output_dir, prefix, model_name, topic="", sgd_number=None):
    os.makedirs(output_dir, exist_ok=True)
    all_results = []

    for filename in os.listdir(input_dir):
        if filename.endswith(".json") and filename.startswith(prefix):
            filepath = os.path.join(input_dir, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                pages = json.load(f)

            section_name = filename.replace(".json", "")
            processed = process_document(pages, model_name, topic)

            for item in processed:
                item["section"] = section_name  # Add section identifier
                all_results.append(item)
                
    return all_results



## SGD 1: Poverty

In [None]:
topic = "SGD 1 (Poverty): End poverty in all its forms everywhere"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "1"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Text to analyze:
 vi   
Executive Summary
At the midpoint of the 2030 Agenda, all of the SDGs are seriously off track. From 2015 to 2019, the world 
made some progress on the SDGs, although this was already vastly insufficient to achieve the goals. Since the 
outbreak of the pandemic in 2020 and other simultaneous crises, SDG progress has stalled globally. In most high-
income countries (HICs), automatic stabilizers, emergency expenditure, and recovery plans mitigated the impacts 
of these multiple crises on socioeconomic outcomes. Only limited progress is being made on the environmental 
and biodiversity goals, including SDG 12 (Responsible Consumption and Production), SDG 13 (Climate Action), 
SDG 14 (Life Below Water), and SDG 15 (Life on Land), even in countries that are largely to blame for the climate and 
biodiversity crises. The disruptions caused by these multiple crises has aggravated fiscal-space issues in low-income 
countries (LICs) and in lower-

## SGD 2: Hunger

In [5]:
topic = "SGD 2 (Hunger): End hunger, achieve food security and improved nutrition and promote sustainable agriculture"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "2"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "The Stimulus’ urgent objective is to address the chronic shortfall of international SDG financing confronting the LICs and LMICs, and to ramp up financing flows by at least US$500 billion by 2025.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Al

## SGD 3: Health

In [6]:
topic = "SGD 3 (Health): Ensure healthy lives and promote well-being for all at all ages"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "3"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises;",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}


## SGD 4: Education

In [7]:
topic = "SGD 4 (Education): Ensure inclusive and equitable quality education"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "4"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises;",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}


## SGD 5: Gender

In [8]:
topic = "SGD 5 (Gender): Achieve gender equality and empower all women and girls"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "5"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises;",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}


## SGD 6: Water and sanitation

In [9]:
topic = "SGD 6 (Water and sanitation): Ensure availability and sustainable management of water and sanitation for all"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "6"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)




--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}
`

## SGD 7: Clean Energy

In [4]:
topic = "SGD 7 (Clean Energy): Ensure access to affordable, reliable, sustainable and modern energy for all"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "7"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}
`

## SGD 8: Decent Work, Economic Growth

In [None]:
topic = "SGD 8 (decent work, economic growth): Promote sustained, inclusive and sustainable economic growth, full and productive employment and decent work for all"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "8"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "It is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.",
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "The Stimulus’ urgent objective is to address the chronic shortfall of international SDG financing confronting the LICs and LMICs, and to ramp up financing flows by at least US$500 billion by 2025.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises

## SGD 9: Infrastructure, industrilization, innovation

In [None]:
topic = "SGD 9 (Infrastructure, industrilization, innovation): Build resilient infrastructure, promote inclusive and sustainable industrialization and foster innovation"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "9"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 10: Inequality

In [None]:
topic = "SGD 10 (Inequality): Reduce inequality within and among countries"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "10"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 11: Sustainable cities

In [None]:
topic = "SGD 11 (Sustainable Cities, Sustainable Communities): Make cities and human settlements inclusive, safe, resilient and sustainable"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "11"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 12: Responsible Consumption, Responsible Production

In [None]:
topic = "SGD 12 (Responsible Consumption, Responsible Production): Ensure sustainable consumption and production patterns"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "12"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 13: Climate change

In [None]:
topic = "SGD 13 (Climate change): Take urgent action to combat climate change and its impacts"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "13"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 14: Life bellow water

In [None]:
topic = "SGD 14 (Life bellow Water): Conserve and sustainably use the oceans, seas and marine resources for sustainable development"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "14"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 15: Life on land

In [None]:
topic = "SGD 15 (Life on land): Protect, restore and promote sustainable use of terrestrial ecosystems, sustainably manage forests, combat desertification, and halt and reverse land degradation and halt biodiversity loss"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "15"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 16: Peace, Justice, Strong Institutions

In [None]:
topic = "SGD 16 (Peace, Justice, Strong Institutions): Promote peaceful and inclusive societies for sustainable development, provide access to justice for all and build effective, accountable and inclusive institutions at all levels"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "16"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)

## SGD 17: Partnerships, sustainable development

In [None]:
topic = "SGD 17 (Partnerships, sustainable development):Strengthen the means of implementation and revitalize the Global Partnership for Sustainable Development"
prefix = 'GLOBAL_SGD2023_'
output_dir = "..\\Data\\Extracted Arguments No Keywords (all text)\\"
sgd_number = "17"

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  sgd_number =sgd_number)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)