# TFM: Análisis de Políticas de Sostenibilidad mediante técnicas de Argumentacion Computacional

## Detección de Argumentos con Qwen 3B

- ollama serve
- ollama run qwen2.5:3b

In [None]:
#%pip install langchain pymupdf openai openpyxl --quiet

In [1]:
from typing import List
from pydantic import BaseModel, Field, ValidationError
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.exceptions import OutputParserException
import requests
import json
import re
from openai import OpenAI
import openai
import httpx
import pandas as pd
import numpy as np
import os
import openpyxl

process_text_path = "..\\Data\\Processed Files (sections)\\"
output_dir = "..\\Data\\Extracted Arguments Keywords (all text)\\"

prefix = 'GLOBAL_SGD2023_'
model_name="qwen2.5:3b"

## Input text processing

In [2]:
# 1. Define your Pydantic schema for output
class ArgumentResponse(BaseModel):
    arguments: List[str] = Field(..., description="List of arguments extracted directly from the text.")

# 2. Setup output parser
pydantic_parser = PydanticOutputParser(pydantic_object=ArgumentResponse)

# 3. Extend text with first sentence from the next page
def extend_pages_with_next_sentence(pages):
    def get_first_sentence(text):
        match = re.search(r'(.+?\.)', text.strip())
        return match.group(1).strip() if match else ""

    extended_pages = []
    for i, page in enumerate(pages):
        current_text = page["text"]
        if i + 1 < len(pages):
            next_sentence = get_first_sentence(pages[i + 1]["text"])
            current_text += " " + next_sentence
        extended_pages.append({
            "page": page["page"],
            "text": current_text
        })
    return extended_pages

# 4. Build the prompt and call the LLM to extract arguments
def extract_arguments_json(text, topic, keywords, model_name) -> ArgumentResponse:
    format_instructions = pydantic_parser.get_format_instructions()

    # Keywords for filtering arguments
    positive_keywords = (keywords or {}).get('in_favor', [])
    negative_keywords = (keywords or {}).get('against', [])

    # Normalize & join for readability
    def to_str(xs):
        return ", ".join(sorted({s.strip().lower() for s in xs if isinstance(s, str) and s.strip()}))
    pos_kw_str = to_str(positive_keywords)
    neg_kw_str = to_str(negative_keywords)

    prompt = PromptTemplate(
        template=(
            "Task: Text Span Identification for Arguments related ONLY to Sustainable Development Goal: {topic}\n"

            "Role: You are an expert in logical reasoning, sustainability reporting, and argument analysis. \n"
            "Your job is to identify and extract verbatim arguments about {topic} from long-form sustainability texts.\n\n"

            "Instructions:\n"
            "1. Carefully read the entire input text.\n"
            "2. Identify ONLY those sentences or phrases that:\n"
            "   - Clearly support or argue for or against the topic {topic}\n"
            "   - Contain keyword from the relevant lists below\n"
            "   - Are exclusively about {topic} (EXCLUDE if they mention or refer to other SDGs or unrelated sustainability topics)\n\n"
            "3. Keywords for filtering:\n"
            "   - In favor: {pos_kw_str}\n"
            "   - Against: {neg_kw_str}\n"
            "4. Each extracted argument must:\n"
            "   - Relate exclusively to the specified SDG ({topic})\n"
            "   - Stand as a full statement\n"
            "   - Be copied exactly from the original (no paraphrasing)\n"
            "   - Include only the necessary context for understanding\n"
            "5. If no qualifying arguments are found, return an empty array.\n\n"

            "Output Rules:\n"
            "   - Use only the exact text from the original\n"
            "   - No additional commentary or explanation\n"
            "   - Return only valid JSON\n"
            "   - No markdown formatting\n\n"

            "Text:\n\"\"\"\n{text}\n\"\"\"\n\n"

            "Respond ONLY with a JSON object like this:\n\n"
            "{format_instructions}"
        ),
        input_variables=["text", "topic"],
        partial_variables={
            "format_instructions": format_instructions,
            "pos_kw_str": pos_kw_str,
            "neg_kw_str": neg_kw_str,
        },
    )

    final_prompt = prompt.format_prompt(text=text, topic=topic).to_string()

    payload = {
        "model": model_name,
        "prompt": final_prompt,
        "temperature": 0,
        "stream": False
    }

    response = requests.post("http://localhost:11434/api/generate", json=payload)
    if response.status_code != 200:
        raise Exception(f"Ollama error: {response.text}")

    raw_output = response.json()["response"]
    print("Model Output:", raw_output)

    try:
        return pydantic_parser.parse(raw_output)
    except OutputParserException as err:
        print("Parse failed:", err)
        return ArgumentResponse(arguments=[])

# 5. Wrapper function for pipeline
def extract_arguments_from_text(text, topic, keywords, model_name) -> List[str]:
    result = extract_arguments_json(text, topic, keywords, model_name)
    return result.arguments

# 6. Main document-level processor
def process_document(pages, model_name, topic="", keywords=None):
    extended_pages = extend_pages_with_next_sentence(pages)
    processed = []
    for page in extended_pages:
        print(f"\n--- Processing Page {page['page']} ---")
        #print("Text to analyze:\n", page["text"])
        
        arguments = extract_arguments_from_text(page["text"], topic, keywords, model_name)
        
        print("Extracted Arguments:")
        for i, arg in enumerate(arguments, 1):
            print(f"{i}. {arg}")

        processed.append({
            "page": page["page"],
            "text": page["text"],
            "arguments": arguments
        })
    return processed


# 7. File I/O
def save_to_json(processed, output_path):
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(processed, f, indent=2, ensure_ascii=False)

def process_directory(input_dir, output_dir, prefix, model_name, topic="", keywords=None):
    os.makedirs(output_dir, exist_ok=True)
    all_results = []

    for filename in os.listdir(input_dir):
        if filename.endswith(".json") and filename.startswith(prefix):
            filepath = os.path.join(input_dir, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                pages = json.load(f)

            section_name = filename.replace(".json", "")
            processed = process_document(pages, model_name, topic, keywords)

            for item in processed:
                item["section"] = section_name  # Add section identifier
                all_results.append(item)
                
    return all_results

## SGD 1: Poverty

In [3]:
topic = "SGD 1 (Poverty): End poverty in all its forms everywhere"
sgd_number = "1"

keywords_g1 = {
    "in_favor": [
        "poverty reduction", "poverty alleviation", "social protection", "economic empowerment",
        "wealth creation", "opportunity", "prosperity", "development aid", "microfinance",
        "basic income", "empowerment", "upliftment", "sufficiency", "inclusion", "equity"
    ],
    "against": [
        "poverty", "pennilessness", "distress", "necessity", "hardship", "insolvency",
        "privation", "penury", "destitution", "hand-to-mouth existence", "beggary",
        "indigence", "pauperism", "necessitousness", "extreme poverty", "wealth inequality",
        "exploitation", "lack of opportunity", "exclusion", "vulnerability",
        "deprivation", "marginalization"
    ]
}


resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g1)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "microfinance, microfinance"
  ]
}
Extracted Arguments:
1. microfinance, microfinance

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": [
    "Increased funding from the multilateral development banks (MDBs) and public development banks (PDBs) to low- and middle-income countries, linked to investments in the SDGs;"
  ]
}

Note: There are no arguments provided that exclusively support or argue for ending poverty in all its forms everywhere using only the specified keywords. Therefore, the JSON output reflects an empty list as per the instructions.
Extracted Arguments:
1. Increased funding from the multilateral development banks (MDBs) and public development banks (PDBs) to low- and middle-income countries, linked to investments in the SDGs;

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Pro

## SGD 2: Hunger

In [4]:
topic = "SGD 2 (Hunger): End hunger, achieve food security and improved nutrition and promote sustainable agriculture"
sgd_number = "2"
keywords_g2 = {
    "in_favor": [
        "food security", "food", "nutrition", "zero hunger", "nourishment",
        "food sovereignty", "food aid", "school feeding programs",
        "access to food", "healthy diets"
    ],
    "against": [
        "hunger", "undernutrition", "malnutrition", "starvation", "famine",
        "undernourishment", "food insecurity", "food waste", "crop failure",
        "land grabbing", "price volatility", "nutrient deficiency"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g2)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": [
    "The crises facing our oceans are multidimensional and complex, including over-fishing, destructive technologies like ocean trawling, destruction of coastal wetland ecosystems, mass pollution of estuaries (causing eutrophication), acidification of the oceans due to CO2 increase, pollution of the high seas (including plastic waste and microplastics in marine food chains), slowdown of ocean circulation due to climate change, 

## SGD 3: Health

In [5]:
topic = "SGD 3 (Health): Ensure healthy lives and promote well-being for all at all ages"
sgd_number = "3"
keywords_g3 = {
    "in_favor": [
        "wellbeing", "welfare", "health", "benefit", "advantage", "comfort",
        "happiness", "prosperity", "universal health coverage", "healthcare access",
        "disease prevention", "mental health", "healthy lifestyles", "vaccination",
        "maternal health", "child health", "sanitation", "public health", "interest"
    ],
    "against": [
        "disease", "illness", "epidemic", "pandemic", "mortality", "morbidity",
        "health inequality", "stress", "poor sanitation", "addiction",
        "unhealthy habits", "mental illness", "anxiety"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g3)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {"arguments": []}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 19 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 20 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 21 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 22 ---
Model Output: {
  "arguments": [
    "Ensur

## SGD 4: Education

In [6]:
topic = "SGD 4 (Education): Ensure inclusive and equitable quality education"
sgd_number = "4"
keywords_g4 = {
    "in_favor": [
        "quality", "inclusive", "equitable", "lifelong learning", "teaching",
        "schooling", "training", "development", "coaching", "instruction",
        "tutoring", "tuition", "skills development", "literacy", "numeracy",
        "universal access", "scholarships", 'data literacy'
    ],
    "against": [
        "lack of education", "illiteracy", "school dropout", "dropout",
        "educational inequality", "poor quality teaching", "indoctrination",
        "lack of access", "resource scarcity", "digital divide", "skills gap"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g4)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: []
Parse failed: Failed to parse ArgumentResponse from completion []. Got: 1 validation error for ArgumentResponse
  Input should be a valid dictionary or instance of ArgumentResponse [type=model_type, input_value=[], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": [
    "The UN General Assembly’

## SGD 5: Gender

In [7]:
topic = "SGD 5 (Gender): Achieve gender equality and empower all women and girls"
sgd_number = "5"
keywords_g5 = {
    "in_favor": [
        "gender equality", "women empowerment", "feminism", "women’s movement",
        "suffragette", "suffragist", "feminist", "emancipated", "equal rights",
        "equal opportunity", "women leadership", "girls education", "reproductive rights"
    ],
    "against": [
        "gender inequality", "sexism", "sexist", "discrimination", "gender violence",
        "misogyny", "patriarchy", "wage gap", "glass ceiling", "female genital mutilation",
        "child marriage", "lack of representation", "stereotypes", "glass ceiling"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g5)


merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: [
    "Gender equality and empowerment are essential for a sustainable future, as evidenced by the need to address issues such as child marriage, female genital mutilation, and gender violence."
]
Parse failed: Failed to parse ArgumentResponse from completion ["Gender equality and empowerment are essential for a sustainable future, as evidenced by the need to address issues such as child marriage, female genital mutilation, and gender violence."]. Got: 1 validation error for ArgumentResponse
  Input should be a valid dictionary o

## SGD 6: Water and sanitation

In [8]:
topic = "SGD 6 (Water and sanitation): Ensure availability and sustainable management of water and sanitation for all"
sgd_number = "6"
keywords_g6 = {
    "in_favor": [
        "clean water", "sanitation", "hygiene", "cleanliness", "sewerage",
        "drinking water", "water access", "water management", "water efficiency",
        "wastewater treatment", "water quality"
    ],
    "against": [
        "water scarcity", "water pollution", "lack of sanitation", "open defecation",
        "waterborne diseases", "drought", "unsustainable water use", "contaminated water"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g6)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": [
    "An estimated 1.8 billion people depend on drinking water contaminated by human waste."
  ]
}
Extracted Arguments:
1. An estimated 1.8 billion people depend on drinking water contaminated by human waste.

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 19 ---
Model Output: [
    "The rise of China has led to great tension between it and the United States."
]
Parse fai

## SGD 7: Clean Energy

In [9]:
topic = "SGD 7 (Clean Energy): Ensure access to affordable, reliable, sustainable and modern energy for all"
sgd_number = "7"
keywords_g7 = {
    "in_favor": [
        "clean energy", "green energy", "renewable energy", "sustainable energy",
        "modern energy", "energy access", "energy efficiency", "solar power",
        "wind power", "geothermal energy", "hydropower", "energy transition", "energy matrix"
    ],
    "against": [
        "fossil fuels", "energy poverty", "energy inefficiency", "pollution",
        "carbon emissions", "unsustainable energy", "reliance on non-renewables"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g7)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": [
    "The transition by 2050 of energy systems to net-zero emissions;"
  ]
}
Extracted Arguments:
1. The transition by 2050 of energy systems to net-zero emissions;

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 19 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 20 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Process

## SGD 8: Decent Work, Economic Growth

In [10]:
topic = "SGD 8 (decent work, economic growth): Promote sustained, inclusive and sustainable economic growth, full and productive employment and decent work for all"
sgd_number = "8"
keywords_g8 = {
    "in_favor": [
        "decent work", "full employment", "fair wages", "workers rights",
        "job creation", "entrepreneurship", "financial inclusion", "financial",
        "business", "trade", "industrial", "commercial", "mercantile", "spillover"
    ],
    "against": [
        "unemployment", "underemployment", "precarious work", "exploitation",
        "child labor", "forced labor", "unsafe working conditions", "stagnation",
        "recession", "inequality", "informal economy", "low wages", "job insecurity",
        "informal jobs"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g8)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism."
  ]
}

Note: There were no sentences or phrases that explicitly support or argue for SGD 8 (decent work, economic growth): Promote sustained, inclusive and sustainable economic growth, full and productive employment and decent work for all based on the provided text.
Extracted Arguments:
1. All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism.

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Pa

## SGD 9: Infrastructure, industrilization, innovation

In [11]:
topic = "SGD 9 (Infrastructure, industrilization, innovation): Build resilient infrastructure, promote inclusive and sustainable industrialization and foster innovation"
sgd_number = "9"
keywords_g9 = {
    "in_favor": [
        "infrastructure", "industrialization", "innovation", "technological innovations",
        "research and development", "technology transfer", "connectivity", "internet access",
        "manufacturing", "scientific research", "digitalization", "modernization",
        "technological advances", "digital inclusion", "digital literacy", "technological investment" 
    ],
    "against": [
        "lack of infrastructure", "inadequate infrastructure", "industrial pollution",
        "unsustainable industry", "digital divide", "lack of innovation", "technological gap",
        "brain drain", "resource depletion", "unmaintained", "obsolescence", "decay", "cybersecurity threaths",
        "cybersecurity attacks"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g9)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: [
    "The current loss of species rate is 1,000–10,000 times more than the natural extinction rate. A combination of land-use change (e.g., dramatic increases of tropical deforestation), global warming, and pollution are driving more and more species, including entire families and orders of species, towards mass extinction."
]
Parse failed: Failed to parse ArgumentResponse from completion ["The current loss of species rate is 1,000\u201310,000 times more than the natural extinction rate. A combination of land-use change (e.g., d

## SGD 10: Inequality

In [12]:
topic = "SGD 10 (Inequality): Reduce inequality within and among countries"
sgd_number = "10"
keywords_g10 = {
    "in_favor": [
        "equality", "equity", "inclusion", "equal opportunity", "fairness",
        "social justice", "progressive taxation", "non-discrimination"
    ],
    "against": [
        "inequality", "disparity", "discrimination", "exclusion", "apartheid",
        "linguistic imperialism", "favouritism", "bias", "partiality", "injustice",
        "imbalance", "nepotism", "marginalization", "wealth concentration",
        "poverty gap", "social stratification", "prejudice"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g10)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments."
  ]
}

Note: After carefully reviewing the provided text, no verbatim arguments exclusively related to SGD 10 (Reduce inequality within and among countries) could be found.
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: [
    "Pre-pandemic

## SGD 11: Sustainable cities

In [13]:
topic = "SGD 11 (Sustainable Cities, Sustainable Communities): Make cities and human settlements inclusive, safe, resilient and sustainable"
sgd_number = "11"
keywords_g11 = {
    "in_favor": [
        "sustainable cities", "sustainable communities", "smart cities", "urban planning",
        "affordable housing", "public transport", "green spaces", "community",
        "preservation", "society", "people", "public", "association", "population",
        "residents", "commonwealth", "general public", "spatial justice", "accessibility"
    ],
    "against": [
        "slums", "urban sprawl", "air pollution", "noise pollution", "traffic",
        "lack of housing", "urban poverty", "crime", "segregation", "gentrification",
        "unsafe", "insecure", "urban degradation", "housing crisis", "urban decay",
        "deteriorated urban areas", "disadvantaged communities"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g11)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": [
    "Sustainable cities: urban infrastructure and services to ensure productive, safe, inclusive, and healthful cities for a world that will be around 70 percent urbanized in 2050;"
  ]
}
Extracted Arguments:
1. Sustainable cities: urban infrastructure and services to ensure productive, safe, inclusive, and healthful cities for a world that will be around 70 percent urbanized in 2050;

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {

## SGD 12: Responsible Consumption, Responsible Production

In [14]:
topic = "SGD 12 (Responsible Consumption, Responsible Production): Ensure sustainable consumption and production patterns"
sgd_number = "12"
keywords_g12 = {
    "in_favor": [
        "sustainable consumption", "sustainable production", "second use", "second hand",
        "circular economy", "recicle", "recycling", "reuse", "sustainable sourcing",
        "eco-design", "corporate social responsibility", "sustainable tourism",
        "manufacture", "manufacturing", "construction"
    ],
    "against": [
        "overconsumption", "waste", "using up", "expenditure", "exhaustion", "depletion",
        "dissipation", "pollution", "planned obsolescence", "fast fashion", "food waste",
        "unsustainable production", "resource inefficiency", "long-tail economy"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g12)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism."
  ]
}

Note: After carefully reviewing the provided text, no verbatim argument supporting or opposing SGD 12 (Responsible Consumption and Production) was found. Therefore, the output is an empty list as per instructions.
Extracted Arguments:
1. All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism.

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
E

## SGD 13: Climate change

In [15]:
topic = "SGD 13 (Climate change): Take urgent action to combat climate change and its impacts"
sgd_number = "13"
keywords_g13 = {
    "in_favor": [
        "climate action", "mitigation", "adaptation", "resilience", "carbon neutrality",
        "decarbonization", "energy transition", "emissions reduction",
        "Paris Agreement", "climate policy"
    ],
    "against": [
        "climate change", "global warming", "greenhouse gas emissions", "CO2 emissions",
        "fossil fuels", "deforestation", "climate inaction", "climate denial",
        "extreme weather events", "sea-level rise", "environmental degradation"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g13)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "Only limited progress is being made on the environmental and biodiversity goals, including SDG 12 (Responsible Consumption and Production), SDG 13 (Climate Action), SDG 14 (Life Below Water), and SDG 15 (Life on Land)"
  ]
}

Note: There are no arguments in favor of or against climate change that directly support the action to combat climate change and its impacts from the given text.
Extracted Arguments:
1. Only limited progress is being made on the environmental and biodiversity goals, including SDG 12 (Responsible Consumption and Production), SDG 13 (Climate Action), SDG 14 (Life Below Water), and SDG 15 (Life on Land)

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "argu

## SGD 14: Life bellow water

In [16]:
topic = "SGD 14 (Life bellow Water): Conserve and sustainably use the oceans, seas and marine resources for sustainable development"
sgd_number = "14"
keywords_g14 = {
    "in_favor": [
        "ocean conservation", "marine conservation", "sustainable fishing",
        "marine protected areas", "ocean biodiversity", "ocean ecosystems", "biology",
        "marine biology", "ecosystem restoration"
    ],
    "against": [
        "overfishing", "marine pollution", "plastic pollution", "microplastics",
        "ocean acidification", "coral bleaching", "habitat destruction", "illegal fishing",
        "destructive fishing practices", "biodiversity loss", "eutrophication"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g14)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": [
    "SDG Index world average: pre-pandemic trend and trend needed to achieve the SDGs by 2030"
  ]
}
Extracted Arguments:
1. SDG Index world average: pre-pandemic trend and trend needed to achieve the SDGs by 2030

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 19 ---
Model Output: {}
```json
{
  "arguments": []
}
```
Parse failed: Failed to parse ArgumentResponse from c

## SGD 15: Life on land

In [17]:
topic = "SGD 15 (Life on land): Protect, restore and promote sustainable use of terrestrial ecosystems, sustainably manage forests, combat desertification, and halt and reverse land degradation and halt biodiversity loss"
sgd_number = "15"
keywords_g15 = {
    "in_favor": [
        "land ecosystem", "agriculture", "ecosystem restoration", "forest",
        "stop desertification", "reverse land degradation", "conservation",
        "sustainable agriculture", "afforestation", "reforestation",
        "wildlife protection", "wildlife"
    ],
    "against": [
        "deforestation", "desertification", "land degradation", "biodiversity loss",
        "habitat loss", "poaching", "illegal wildlife trade", "invasive species",
        "soil erosion", "unsustainable agriculture", "soil pollution"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g15)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "combat desertification, and halt and reverse land degradation"
  ]
}
Extracted Arguments:
1. combat desertification, and halt and reverse land degradation

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 15 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 16 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 17 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 18 ---
Model Output: {}
```json
{
  "arguments": []
}
```
Parse failed: Failed to parse ArgumentResponse from completion {}. Got: 1 validation error for ArgumentResponse
arguments
  Field required [type=missing, input_value={}, input_type=dict]
    F

## SGD 16: Peace, Justice, Strong Institutions

In [18]:
topic = "SGD 16 (Peace, Justice, Strong Institutions): Promote peaceful and inclusive societies for sustainable development, provide access to justice for all and build effective, accountable and inclusive institutions at all levels"
sgd_number = "16"
keywords_g16 = {
    "in_favor": [
        "peace", "justice", "access to justice", "strong institutions", "healthy institutions",
        "accountability", "anti-corruption", "transparency", "governance", "human rights",
        "conflict resolution", "truce", "ceasefire", "treaty", "armistice", "pacification",
        "fairness","integrity",
        "honesty", "decency", "impartiality", "justness", "rightfulness",
        "strong leadership", "good leadership", "institutionalization", "government effort", 
        "public investments", "science-based policy"

    ],

    "against": [
        "conflict", "violence", "war", "insecurity", "injustice", "corruption", "bribery",
        "weak institutions", "lack of accountability", "impunity", "human rights violations",
        "discrimination", "crime", "illicit financial flows", "organized crime", "terrorism",
         "weak leadership", "autoritarism", "dictator"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g16)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism."
  ]
}

Note: There were no sentences or phrases in the provided text that clearly support or argue for SGD 16 (Peace, Justice, Strong Institutions): Promote peaceful and inclusive societies for sustainable development, provide access to justice for all and build effective, accountable and inclusive institutions at all levels.
Extracted Arguments:
1. All countries, poorer and richer alike, should use the half-way momentum to self-critically review and revise their national SDG strategies and commit to strengthening multilateralism.

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 14 ---
Model Output: {

## SGD 17: Partnerships, sustainable development

In [19]:
topic = "SGD 17 (Partnerships, sustainable development):Strengthen the means of implementation and revitalize the Global Partnership for Sustainable Development"
sgd_number = "17"
keywords_g17 = {
    "in_favor": [
        "global partnership", "cooperation", "association", "alliance", "sharing",
        "union", "connection", "participation", "copartnership", "technology transfer",
        "capacity building", "international cooperation", 'positive spillover', 
        "transboundary", "coordination"
    ],
    "against": [
        "lack of cooperation", "isolationism", "protectionism", "insufficient funding",
        "debt", "policy incoherence", "data gaps", "weak monitoring", "non-participation",
        "aid dependency", "technological gatekeeping", "negative spillover"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g17)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "The Stimulus’ urgent objective is to address the chronic shortfall of international SDG financing confronting the LICs and LMICs, and to ramp up financing flows by at least US$500 billion by 2025."
  ]
}

Note: The provided text does not contain any explicit arguments related to SGD 17 (Partnerships, sustainable development): Strengthen the means of implementation and revitalize the Global Partnership for Sustainable Development. Therefore, no arguments were extracted.
Extracted Arguments:
1. The Stimulus’ urgent objective is to address the chronic shortfall of international SDG financing confronting the LICs and LMICs, and to ramp up financing flows by at least US$500 billion by 2025.

--- Processing Page 8 ---
Model Output: {
  "arguments": []
}
Extracted Arguments:

--- Processing Page 13 ---
Model Output: {
  "arguments": [
    "SDSN recommends an SDG Stimulus plan to close the massive financing gap faced by many deve

## SGD 0: Overarching terms

In [None]:
topic = "SGD Overarching terms: Sustainable Development Goal, SDG, Agenda 2030, leave no one behind, Voluntary National Review, SDG transformations, "
sgd_number = "0"
keywords_g0 = {
    "in_favor": [
        "Sustainability", "Sustainable Development Goal", "SDG", "Agenda 2030", "global goals", 
        "development", "progress", "implementation", "monitoring", "accountability", "inclusive", "leave no one behind", 
        "Voluntary National Review", "VNR", "SDG transformations"
    ],
    "against": [
          "Unsustainability", "inaction", "regression", "lack of funding", "greenwashing", "exploitation", 
          "environmental degradation", "SDG needs", "regression", 
          "multidimensional vulnerability", "stagnation"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g0)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: {
  "arguments": [
    "At their core, the SDGs are an investment agenda: it is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture."
  ]
}
Extracted Arguments:
1. At their core, the SDGs are an investment agenda: it is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.

--- Processing Page 8 ---
Model Output: {
  "arguments": [
    "Investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs."
  ]
}
Extracted Arguments:
1. Investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs.

--- Processing Page 13 ---
Model Output: {
  "arguments": [
    "At the global level, averaging across countries, not a single SDG is currently projected to be met by 2030, with the poorest c

In [None]:
!git add .
!git commit -m "sgd keywords qwen update"
!git push origin main  # or 'master' or your branch name



[main 21d563a] sgd 16, 17 0
 2 files changed, 35119 insertions(+), 1710 deletions(-)


error: src refspec # does not match any
error: src refspec or does not match any
error: src refspec 'master' does not match any
error: src refspec or does not match any
error: src refspec your does not match any
error: src refspec branch does not match any
error: src refspec name does not match any
error: failed to push some refs to 'https://github.com/camipalo/TFM.git'
