# TFM: Análisis de Políticas de Sostenibilidad mediante técnicas de Argumentacion Computacional

## Detección de Argumentos con Gemma 1B

- ollama serve
- ollama run gemma3:4b

In [1]:
#%pip install langchain pymupdf openai openpyxl --quiet

In [1]:
from typing import List
from pydantic import BaseModel, Field, ValidationError
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.exceptions import OutputParserException
import requests
import json
import re
from openai import OpenAI
import openai
import httpx
import pandas as pd
import numpy as np
import os
import openpyxl

process_text_path = "..\\Data\\Processed Files (sections)\\"
output_dir = "..\\Data\\Extracted Arguments Keywords (all text)\\"

prefix = 'GLOBAL_SGD2023_'
model_name="gemma3:4b"

## Input text processing

In [2]:
# 1. Define your Pydantic schema for output
class ArgumentResponse(BaseModel):
    arguments: List[str] = Field(..., description="List of arguments extracted directly from the text.")

# 2. Setup output parser
pydantic_parser = PydanticOutputParser(pydantic_object=ArgumentResponse)

# 3. Extend text with first sentence from the next page
def extend_pages_with_next_sentence(pages):
    def get_first_sentence(text):
        match = re.search(r'(.+?\.)', text.strip())
        return match.group(1).strip() if match else ""

    extended_pages = []
    for i, page in enumerate(pages):
        current_text = page["text"]
        if i + 1 < len(pages):
            next_sentence = get_first_sentence(pages[i + 1]["text"])
            current_text += " " + next_sentence
        extended_pages.append({
            "page": page["page"],
            "text": current_text
        })
    return extended_pages

# 4. Build the prompt and call the LLM to extract arguments
def extract_arguments_json(text, topic, keywords, model_name) -> ArgumentResponse:
    format_instructions = pydantic_parser.get_format_instructions()

    # Keywords for filtering arguments
    positive_keywords = (keywords or {}).get('in_favor', [])
    negative_keywords = (keywords or {}).get('against', [])

    # Normalize & join for readability
    def to_str(xs):
        return ", ".join(sorted({s.strip().lower() for s in xs if isinstance(s, str) and s.strip()}))
    pos_kw_str = to_str(positive_keywords)
    neg_kw_str = to_str(negative_keywords)

    prompt = PromptTemplate(
        template=(
            "Task: Text Span Identification for Arguments related ONLY to Sustainable Development Goal: {topic}\n"

            "Role: You are an expert in logical reasoning, sustainability reporting, and argument analysis. \n"
            "Your job is to identify and extract verbatim arguments about {topic} from long-form sustainability texts.\n\n"

            "Instructions:\n"
            "1. Carefully read the entire input text.\n"
            "2. Identify ONLY those sentences or phrases that:\n"
            "   - Clearly support or argue for or against the topic {topic}\n"
            "   - Contain keyword from the relevant lists below\n"
            "   - Are exclusively about {topic} (EXCLUDE if they mention or refer to other SDGs or unrelated sustainability topics)\n\n"
            "3. Keywords for filtering:\n"
            "   - In favor: {pos_kw_str}\n"
            "   - Against: {neg_kw_str}\n"
            "4. Each extracted argument must:\n"
            "   - Relate exclusively to the specified SDG ({topic})\n"
            "   - Stand as a full statement\n"
            "   - Be copied exactly from the original (no paraphrasing)\n"
            "   - Include only the necessary context for understanding\n"
            "5. If no qualifying arguments are found, return an empty array.\n\n"

            "Output Rules:\n"
            "   - Use only the exact text from the original\n"
            "   - No additional commentary or explanation\n"
            "   - Return only valid JSON\n"
            "   - No markdown formatting\n\n"

            "Text:\n\"\"\"\n{text}\n\"\"\"\n\n"

            "Respond ONLY with a JSON object like this:\n\n"
            "{format_instructions}"
        ),
        input_variables=["text", "topic"],
        partial_variables={
            "format_instructions": format_instructions,
            "pos_kw_str": pos_kw_str,
            "neg_kw_str": neg_kw_str,
        },
    )

    final_prompt = prompt.format_prompt(text=text, topic=topic).to_string()

    payload = {
        "model": model_name,
        "prompt": final_prompt,
        "temperature": 0,
        "stream": False
    }

    response = requests.post("http://localhost:11434/api/generate", json=payload)
    if response.status_code != 200:
        raise Exception(f"Ollama error: {response.text}")

    raw_output = response.json()["response"]
    print("Model Output:", raw_output)

    try:
        return pydantic_parser.parse(raw_output)
    except OutputParserException as err:
        print("Parse failed:", err)
        return ArgumentResponse(arguments=[])

# 5. Wrapper function for pipeline
def extract_arguments_from_text(text, topic, keywords, model_name) -> List[str]:
    result = extract_arguments_json(text, topic, keywords, model_name)
    return result.arguments

# 6. Main document-level processor
def process_document(pages, model_name, topic="", keywords=None):
    extended_pages = extend_pages_with_next_sentence(pages)
    processed = []
    for page in extended_pages:
        print(f"\n--- Processing Page {page['page']} ---")
        #print("Text to analyze:\n", page["text"])
        
        arguments = extract_arguments_from_text(page["text"], topic, keywords, model_name)
        
        print("Extracted Arguments:")
        for i, arg in enumerate(arguments, 1):
            print(f"{i}. {arg}")

        processed.append({
            "page": page["page"],
            "text": page["text"],
            "arguments": arguments
        })
    return processed


# 7. File I/O
def save_to_json(processed, output_path):
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(processed, f, indent=2, ensure_ascii=False)

def process_directory(input_dir, output_dir, prefix, model_name, topic="", keywords=None):
    os.makedirs(output_dir, exist_ok=True)
    all_results = []

    for filename in os.listdir(input_dir):
        if filename.endswith(".json") and filename.startswith(prefix):
            filepath = os.path.join(input_dir, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                pages = json.load(f)

            section_name = filename.replace(".json", "")
            processed = process_document(pages, model_name, topic, keywords)

            for item in processed:
                item["section"] = section_name  # Add section identifier
                all_results.append(item)
                
    return all_results



## SGD 1: Poverty

In [4]:
topic = "SGD 1 (Poverty): End poverty in all its forms everywhere"
sgd_number = "1"

keywords_g1 = {
    "in_favor": [
        "poverty reduction", "poverty alleviation", "social protection", "economic empowerment",
        "wealth creation", "opportunity", "prosperity", "development aid", "microfinance",
        "basic income", "empowerment", "upliftment", "sufficiency", "inclusion", "equity"
    ],
    "against": [
        "poverty", "pennilessness", "distress", "necessity", "hardship", "insolvency",
        "privation", "penury", "destitution", "hand-to-mouth existence", "beggary",
        "indigence", "pauperism", "necessitousness", "extreme poverty", "wealth inequality",
        "exploitation", "lack of opportunity", "exclusion", "vulnerability",
        "deprivation", "marginalization"
    ]
}


resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g1)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{"arguments": ["To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.", "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.", "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.", "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises;"]}
```
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.
2. Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
3. Revise the credit rating system and debt sustainability m

## SGD 2: Hunger

In [5]:
topic = "SGD 2 (Hunger): End hunger, achieve food security and improved nutrition and promote sustainable agriculture"
sgd_number = "2"
keywords_g2 = {
    "in_favor": [
        "food security", "food", "nutrition", "zero hunger", "nourishment",
        "food sovereignty", "food aid", "school feeding programs",
        "access to food", "healthy diets"
    ],
    "against": [
        "hunger", "undernutrition", "malnutrition", "starvation", "famine",
        "undernourishment", "food insecurity", "food waste", "crop failure",
        "land grabbing", "price volatility", "nutrient deficiency"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g2)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "The world must both alter its current investment patterns and increase the overall volume of investments.",
    "It is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises"
  ]
}
```
Extracted Arguments:
1. The world must both alter its current investment patterns and increase the overall volume of investments.
2. It is critical that UN Member States adopt and implement the SDG Stimulus and s

## SGD 3: Health

In [6]:
topic = "SGD 3 (Health): Ensure healthy lives and promote well-being for all at all ages"
sgd_number = "3"
keywords_g3 = {
    "in_favor": [
        "wellbeing", "welfare", "health", "benefit", "advantage", "comfort",
        "happiness", "prosperity", "universal health coverage", "healthcare access",
        "disease prevention", "mental health", "healthy lifestyles", "vaccination",
        "maternal health", "child health", "sanitation", "public health", "interest"
    ],
    "against": [
        "disease", "illness", "epidemic", "pandemic", "mortality", "morbidity",
        "health inequality", "stress", "poor sanitation", "addiction",
        "unhealthy habits", "mental illness", "anxiety"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g3)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions."
  ]
}
```
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.
2. Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
3. Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.

--- Processing Page 8 ---
Model O

## SGD 4: Education

In [7]:
topic = "SGD 4 (Education): Ensure inclusive and equitable quality education"
sgd_number = "4"
keywords_g4 = {
    "in_favor": [
        "quality", "inclusive", "equitable", "lifelong learning", "teaching",
        "schooling", "training", "development", "coaching", "instruction",
        "tutoring", "tuition", "skills development", "literacy", "numeracy",
        "universal access", "scholarships", 'data literacy'
    ],
    "against": [
        "lack of education", "illiteracy", "school dropout", "dropout",
        "educational inequality", "poor quality teaching", "indoctrination",
        "lack of access", "resource scarcity", "digital divide", "skills gap"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g4)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight."
  ]
}
```
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.
2. Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
3. Revise the credit rating system and debt sustainability met

## SGD 5: Gender

In [8]:
topic = "SGD 5 (Gender): Achieve gender equality and empower all women and girls"
sgd_number = "5"
keywords_g5 = {
    "in_favor": [
        "gender equality", "women empowerment", "feminism", "women’s movement",
        "suffragette", "suffragist", "feminist", "emancipated", "equal rights",
        "equal opportunity", "women leadership", "girls education", "reproductive rights"
    ],
    "against": [
        "gender inequality", "sexism", "sexist", "discrimination", "gender violence",
        "misogyny", "patriarchy", "wage gap", "glass ceiling", "female genital mutilation",
        "child marriage", "lack of representation", "stereotypes", "glass ceiling"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g5)


merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "The SDGs are still achievable.",
    "It is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight."
  ]
}
```
Extracted Arguments:
1. The SDGs are still achievable.
2. It is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.
3. Greatly increase funding to national and subnational governments and private businesses, especially in L

## SGD 6: Water and sanitation

In [9]:
topic = "SGD 6 (Water and sanitation): Ensure availability and sustainable management of water and sanitation for all"
sgd_number = "6"
keywords_g6 = {
    "in_favor": [
        "clean water", "sanitation", "hygiene", "cleanliness", "sewerage",
        "drinking water", "water access", "water management", "water efficiency",
        "wastewater treatment", "water quality"
    ],
    "against": [
        "water scarcity", "water pollution", "lack of sanitation", "open defecation",
        "waterborne diseases", "drought", "unsustainable water use", "contaminated water"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g6)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises"
  ]
}
```
Extracted Arguments:
1. greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
2. revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.
3. revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises

--- Processing Page 8 ---

## SGD 7: Clean Energy

In [10]:
topic = "SGD 7 (Clean Energy): Ensure access to affordable, reliable, sustainable and modern energy for all"
sgd_number = "7"
keywords_g7 = {
    "in_favor": [
        "clean energy", "green energy", "renewable energy", "sustainable energy",
        "modern energy", "energy access", "energy efficiency", "solar power",
        "wind power", "geothermal energy", "hydropower", "energy transition", "energy matrix"
    ],
    "against": [
        "fossil fuels", "energy poverty", "energy inefficiency", "pollution",
        "carbon emissions", "unsustainable energy", "reliance on non-renewables"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g7)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions."
  ]
}
```
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.
2. Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
3. Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.

--- Processing Page 8 ---
Model O

## SGD 8: Decent Work, Economic Growth

In [11]:
topic = "SGD 8 (decent work, economic growth): Promote sustained, inclusive and sustainable economic growth, full and productive employment and decent work for all"
sgd_number = "8"
keywords_g8 = {
    "in_favor": [
        "decent work", "full employment", "fair wages", "workers rights",
        "job creation", "entrepreneurship", "financial inclusion", "financial",
        "business", "trade", "industrial", "commercial", "mercantile", "spillover"
    ],
    "against": [
        "unemployment", "underemployment", "precarious work", "exploitation",
        "child labor", "forced labor", "unsafe working conditions", "stagnation",
        "recession", "inequality", "informal economy", "low wages", "job insecurity",
        "informal jobs"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g8)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "it is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments",
    "revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight"
  ]
}
```
Extracted Arguments:
1. it is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture
2. Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments
3. revise the credit ratin

## SGD 9: Infrastructure, industrilization, innovation

In [12]:
topic = "SGD 9 (Infrastructure, industrilization, innovation): Build resilient infrastructure, promote inclusive and sustainable industrialization and foster innovation"
sgd_number = "9"
keywords_g9 = {
    "in_favor": [
        "infrastructure", "industrialization", "innovation", "technological innovations",
        "research and development", "technology transfer", "connectivity", "internet access",
        "manufacturing", "scientific research", "digitalization", "modernization",
        "technological advances", "digital inclusion", "digital literacy", "technological investment" 
    ],
    "against": [
        "lack of infrastructure", "inadequate infrastructure", "industrial pollution",
        "unsustainable industry", "digital divide", "lack of innovation", "technological gap",
        "brain drain", "resource depletion", "unmaintained", "obsolescence", "decay", "cybersecurity threaths",
        "cybersecurity attacks"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g9)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "revise liquidity structures for LICs and LMICs, especially regarding sovereign debts, to forestall self-fulfilling banking and balance-of-payments crises",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight."
  ]
}
```
Extracted Arguments:
1. increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.
2. revise the credit rating system 

## SGD 10: Inequality

In [13]:
topic = "SGD 10 (Inequality): Reduce inequality within and among countries"
sgd_number = "10"
keywords_g10 = {
    "in_favor": [
        "equality", "equity", "inclusion", "equal opportunity", "fairness",
        "social justice", "progressive taxation", "non-discrimination"
    ],
    "against": [
        "inequality", "disparity", "discrimination", "exclusion", "apartheid",
        "linguistic imperialism", "favouritism", "bias", "partiality", "injustice",
        "imbalance", "nepotism", "marginalization", "wealth concentration",
        "poverty gap", "social stratification", "prejudice"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g10)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "Despite this alarming development, the SDGs are still achievable.",
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight."
  ]
}
```
Extracted Arguments:
1. Despite this alarming development, the SDGs are still achievable.
2. To achieve the SDGs the world must both alter its current inv

## SGD 11: Sustainable cities

In [14]:
topic = "SGD 11 (Sustainable Cities, Sustainable Communities): Make cities and human settlements inclusive, safe, resilient and sustainable"
sgd_number = "11"
keywords_g11 = {
    "in_favor": [
        "sustainable cities", "sustainable communities", "smart cities", "urban planning",
        "affordable housing", "public transport", "green spaces", "community",
        "preservation", "society", "people", "public", "association", "population",
        "residents", "commonwealth", "general public", "spatial justice", "accessibility"
    ],
    "against": [
        "slums", "urban sprawl", "air pollution", "noise pollution", "traffic",
        "lack of housing", "urban poverty", "crime", "segregation", "gentrification",
        "unsafe", "insecure", "urban degradation", "housing crisis", "urban decay",
        "deteriorated urban areas", "disadvantaged communities"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g11)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments",
    "revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development",
    "align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight",
    "create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions"
  ]
}
```
Extracted Arguments:
1. greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments
2. revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development
3. align private business investment flows with the SDGs, through impr

## SGD 12: Responsible Consumption, Responsible Production

In [15]:
topic = "SGD 12 (Responsible Consumption, Responsible Production): Ensure sustainable consumption and production patterns"
sgd_number = "12"
keywords_g12 = {
    "in_favor": [
        "sustainable consumption", "sustainable production", "second use", "second hand",
        "circular economy", "recicle", "recycling", "reuse", "sustainable sourcing",
        "eco-design", "corporate social responsibility", "sustainable tourism",
        "manufacture", "manufacturing", "construction"
    ],
    "against": [
        "overconsumption", "waste", "using up", "expenditure", "exhaustion", "depletion",
        "dissipation", "pollution", "planned obsolescence", "fast fashion", "food waste",
        "unsustainable production", "resource inefficiency", "long-tail economy"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g12)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "SDG 12 (Responsible Consumption and Production)",
    "SDG 12 (Responsible Consumption and Production)",
    "SDG 12 (Responsible Consumption and Production)",
    "SDG 12 (Responsible Consumption and Production)",
    "SDG 12 (Responsible Consumption and Production)",
    "SDG 12 (Responsible Consumption and Production)"
  ]
}
```
Extracted Arguments:
1. SDG 12 (Responsible Consumption and Production)
2. SDG 12 (Responsible Consumption and Production)
3. SDG 12 (Responsible Consumption and Production)
4. SDG 12 (Responsible Consumption and Production)
5. SDG 12 (Responsible Consumption and Production)
6. SDG 12 (Responsible Consumption and Production)

--- Processing Page 8 ---
Model Output: ```json
{"arguments": ["investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs", "embracing and aligning with the SDGs on sustainable development"]}
```
Extracted Argumen

## SGD 13: Climate change

In [16]:
topic = "SGD 13 (Climate change): Take urgent action to combat climate change and its impacts"
sgd_number = "13"
keywords_g13 = {
    "in_favor": [
        "climate action", "mitigation", "adaptation", "resilience", "carbon neutrality",
        "decarbonization", "energy transition", "emissions reduction",
        "Paris Agreement", "climate policy"
    ],
    "against": [
        "climate change", "global warming", "greenhouse gas emissions", "CO2 emissions",
        "fossil fuels", "deforestation", "climate inaction", "climate denial",
        "extreme weather events", "sea-level rise", "environmental degradation"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g13)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "SDG 13 (Climate Action), SDG 14 (Life Below Water), and SDG 15 (Life on Land)",
    "SDG 13 (Climate Action)"
  ]
}
```
Extracted Arguments:
1. SDG 13 (Climate Action), SDG 14 (Life Below Water), and SDG 15 (Life on Land)
2. SDG 13 (Climate Action)

--- Processing Page 8 ---
Model Output: ```json
{"arguments": ["investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs.", "We are proud of our efforts since 2012 in these four areas.", "All UN Member States and UN agencies can count on the continued efforts and energies of the SDSN around the world to support all governments, businesses, and civil society in embracing and aligning with the SDGs"]}
```
Extracted Arguments:
1. investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs.
2. We are proud of our efforts since 2012 in these four areas.
3. All UN Member St

## SGD 14: Life bellow water

In [17]:
topic = "SGD 14 (Life bellow Water): Conserve and sustainably use the oceans, seas and marine resources for sustainable development"
sgd_number = "14"
keywords_g14 = {
    "in_favor": [
        "ocean conservation", "marine conservation", "sustainable fishing",
        "marine protected areas", "ocean biodiversity", "ocean ecosystems", "biology",
        "marine biology", "ecosystem restoration"
    ],
    "against": [
        "overfishing", "marine pollution", "plastic pollution", "microplastics",
        "ocean acidification", "coral bleaching", "habitat destruction", "illegal fishing",
        "destructive fishing practices", "biodiversity loss", "eutrophication"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g14)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "SDG 14 (Life Below Water), even in countries that are largely to blame for the climate and biodiversity crises.",
    "SDG 14 (Life Below Water)",
    "SDG 14 (Life Below Water)"
  ]
}
```
Extracted Arguments:
1. SDG 14 (Life Below Water), even in countries that are largely to blame for the climate and biodiversity crises.
2. SDG 14 (Life Below Water)
3. SDG 14 (Life Below Water)

--- Processing Page 8 ---
Model Output: ```json
{"arguments": ["investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs.", "We are proud of our efforts since 2012 in these four areas.", "All UN Member States and UN agencies can count on the continued efforts and energies of the SDSN around the world to support all governments, businesses, and civil society in embracing and aligning with the SDGs on sustainable development."]
}
```
Extracted Arguments:
1. investing in statistical capac

## SGD 15: Life on land

In [18]:
topic = "SGD 15 (Life on land): Protect, restore and promote sustainable use of terrestrial ecosystems, sustainably manage forests, combat desertification, and halt and reverse land degradation and halt biodiversity loss"
sgd_number = "15"
keywords_g15 = {
    "in_favor": [
        "land ecosystem", "agriculture", "ecosystem restoration", "forest",
        "stop desertification", "reverse land degradation", "conservation",
        "sustainable agriculture", "afforestation", "reforestation",
        "wildlife protection", "wildlife"
    ],
    "against": [
        "deforestation", "desertification", "land degradation", "biodiversity loss",
        "habitat loss", "poaching", "illegal wildlife trade", "invasive species",
        "soil erosion", "unsustainable agriculture", "soil pollution"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g15)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "even in countries that are largely to blame for the climate and biodiversity crises",
    "SDG 15 (Life on Land)",
    "SDG 15 (Life on Land)",
    "to carry out needed SDG investments",
    "SDG 15 (Life on Land)"
  ]
}
```
Extracted Arguments:
1. even in countries that are largely to blame for the climate and biodiversity crises
2. SDG 15 (Life on Land)
3. SDG 15 (Life on Land)
4. to carry out needed SDG investments
5. SDG 15 (Life on Land)

--- Processing Page 8 ---
Model Output: ```json
{"arguments": ["investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs", "combat desertification", "halt and reverse land degradation"]}
```
Extracted Arguments:
1. investing in statistical capacity, science, and data literacy are important priorities for achieving the SDGs
2. combat desertification
3. halt and reverse land degradation

--- Processing Page 13 ---
Model Outp

## SGD 16: Peace, Justice, Strong Institutions

In [3]:
topic = "SGD 16 (Peace, Justice, Strong Institutions): Promote peaceful and inclusive societies for sustainable development, provide access to justice for all and build effective, accountable and inclusive institutions at all levels"
sgd_number = "16"
keywords_g16 = {
    "in_favor": [
        "peace", "justice", "access to justice", "strong institutions", "healthy institutions",
        "accountability", "anti-corruption", "transparency", "governance", "human rights",
        "conflict resolution", "truce", "ceasefire", "treaty", "armistice", "pacification",
        "fairness","integrity",
        "honesty", "decency", "impartiality", "justness", "rightfulness",
        "strong leadership", "good leadership", "institutionalization", "government effort", 
        "public investments", "science-based policy"

    ],

    "against": [
        "conflict", "violence", "war", "insecurity", "injustice", "corruption", "bribery",
        "weak institutions", "lack of accountability", "impunity", "human rights violations",
        "discrimination", "crime", "illicit financial flows", "organized crime", "terrorism",
         "weak leadership", "autoritarism", "dictator"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g16)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "Create ambitious, internationally-agreed upon criteria for sustainable finance that are mandatory for all public financial institutions.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight."
  ]
}
```
Extracted Arguments:
1. To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.
2. Create ambitious, internationally-a

## SGD 17: Partnerships, sustainable development

In [4]:
topic = "SGD 17 (Partnerships, sustainable development):Strengthen the means of implementation and revitalize the Global Partnership for Sustainable Development"
sgd_number = "17"
keywords_g17 = {
    "in_favor": [
        "global partnership", "cooperation", "association", "alliance", "sharing",
        "union", "connection", "participation", "copartnership", "technology transfer",
        "capacity building", "international cooperation", 'positive spillover', 
        "transboundary", "coordination"
    ],
    "against": [
        "lack of cooperation", "isolationism", "protectionism", "insufficient funding",
        "debt", "policy incoherence", "data gaps", "weak monitoring", "non-participation",
        "aid dependency", "technological gatekeeping", "negative spillover"
    ]
}

resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g17)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments.",
    "It is critical that UN Member States adopt and implement the SDG Stimulus and support a comprehensive reform of the global financial architecture.",
    "Greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments.",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development.",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight.",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner."
  ]
}
```
Extracted Argum

## SGD 0: Overarching terms

In [6]:
topic = "SGD Overarching terms: Sustainable Development Goal, SDG, Agenda 2030, leave no one behind, Voluntary National Review, SDG transformations, "
sgd_number = "0"
keywords_g0 = {
    "in_favor": [
        "Sustainability", "Sustainable Development Goal", "SDG", "Agenda 2030", "global goals", 
        "development", "progress", "implementation", "monitoring", "accountability", "inclusive", "leave no one behind", 
        "Voluntary National Review", "VNR", "SDG transformations"
    ],
    "against": [
          "Unsustainability", "inaction", "regression", "lack of funding", "greenwashing", "exploitation", 
          "environmental degradation", "SDG needs", "regression", 
          "multidimensional vulnerability", "stagnation"
    ]
}


resultado = process_directory(input_dir = process_text_path, 
                  output_dir = output_dir, 
                  prefix = prefix,
                  model_name = model_name,
                  topic = topic,
                  keywords = keywords_g0)

merged_output_path = os.path.join(output_dir, f"{prefix}_ArgsSGD{sgd_number}_{model_name.replace(':', '-')}.json")
save_to_json(resultado, merged_output_path)


--- Processing Page 7 ---
Model Output: ```json
{
  "arguments": [
    "the SDGs are seriously off track",
    "the SDGs are still achievable",
    "it is critical that UN Member States adopt and implement the SDG Stimulus",
    "To achieve the SDGs the world must both alter its current investment patterns and increase the overall volume of investments",
    "greatly increase funding to national and subnational governments and private businesses, especially in LICs and LMICs, to carry out needed SDG investments",
    "Revise the credit rating system and debt sustainability metrics to facilitate long-term sustainable development",
    "Align private business investment flows with the SDGs, through improved national planning, regulation, reporting, and oversight",
    "Reform current institutional frameworks and develop new mechanisms to improve the quality and speed of deployment of international cooperation, and monitor progress in an open and timely manner"
  ]
}
```
Extracted Argume

In [7]:
!git add .
!git commit -m "sgd keywords gemma SGD 16-0 update"
!git push origin main  # or 'master' or your branch name



[main f949985] sgd keywords gemma SGD 16-0 update
 1 file changed, 8943 insertions(+), 1262 deletions(-)


error: src refspec # does not match any
error: src refspec or does not match any
error: src refspec 'master' does not match any
error: src refspec or does not match any
error: src refspec your does not match any
error: src refspec branch does not match any
error: src refspec name does not match any
error: failed to push some refs to 'https://github.com/camipalo/TFM.git'
