<h3 align="center">Status : <span class="badge"><b>En cours</b></span></h3>

<h1 align="center">RL GRPO</h1>

---

<h1 align="center">Training a small Unimarc reasoner with RL</h1>

# Requirements

In [1]:
import torch
print("PyTorch:")
print("PyTorch version is:" + torch.__version__)
print("PyTorch is working with CUDA" if torch.cuda.is_available() else "Error! It is not working correctly")
print("The GPU model is: "+ torch.cuda.get_device_name(0))

PyTorch:
PyTorch version is:2.6.0+cu124
PyTorch is working with CUDA
The GPU model is: Tesla T4


In [3]:
!pip install -U --quiet datasets transformers trl huggingface_hub accelerate peft --use-deprecated=legacy-resolver
#!pip install flash-attn

In [None]:
from huggingface_hub import login, whoami

login(
  token="..." # with write permissions
)
whoami()

# Load model

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    #attn_implementation="flash_attention_2"
)
model.config.sliding_window = None

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [6]:
device = "cuda" # for GPU usage or "cpu" for CPU usage
model = model.to(device)
model.eval()

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbe

In [7]:
# !Important
tokenizer.padding_side = "left"

## Inspect tokenizer

In [7]:
len(tokenizer)

151665

In [16]:
print(f"**EOS**\nEOS token: {tokenizer.eos_token}\n- EOS token id: {tokenizer.eos_token_id}\n\n**PAD**\nPAD token: {tokenizer.pad_token}\n- PAD token id: {tokenizer.pad_token_id}")

**EOS**
EOS token: <|im_end|>
- EOS token id: 151645

**PAD**
PAD token: <|endoftext|>
- PAD token id: 151643


In [18]:
tokenizer("the sky is blue", return_tensors="pt").to(device)

{'input_ids': tensor([[ 1782, 12884,   374,  6303]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1]], device='cuda:0')}

In [19]:
input_ids = tokenizer("the sky is blue", return_tensors="pt").to(device).input_ids[0]

In [20]:
tokenizer.decode(input_ids)

'the sky is blue'

In [21]:
if hasattr(tokenizer, "chat_template"):
    print("Current chat template:", tokenizer.chat_template)

Current chat template: {%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0]['role'] == 'system' %}
        {{- messages[0]['content'] }}
    {%- else %}
        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
    {%- endif %}
    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0]['role'] == 'system' %}
        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
    {%- else %}
        {{- '<|im_start|>system\nYou are Qwe

## Inference

### Without applying chat templating

In [37]:
prompt = "<|im_start|>system: You are a helpful assistant<|im_end|><|im_start|>user: complete this sentence 'the sky is blue and '<|im_end|><|im_start|>assistant: "
inputs = tokenizer(prompt, return_tensors="pt").to(device)

In [38]:
outputs = model.generate(**inputs,
                         max_new_tokens = 20,
                         use_cache = True,)

In [39]:
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

<|im_start|>system: You are a helpful assistant<|im_end|><|im_start|>user: complete this sentence 'the sky is blue and '<|im_end|><|im_start|>assistant:  the sky is blue and the clouds are white.<|im_end|>


### With applying chat templating

In [10]:
SYSTEM_PROMPT_0 = """
You are an expert in Unimarc/XML bibliographic records.
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

SYSTEM_PROMPT_0_SMALL = """
You are an expert in Unimarc/XML bibliographic records.
Respond in the following format:
### Resaoning:...
### Answer: ...
"""

# source: https://arxiv.org/pdf/2503.19470v1
SYSTEM_PROMPT_1 = """
You are a helpful assistant that can solve the given question step by step. 
Given a question, you need to first think about the reasoning process in
the mind and then provide the answer. The reasoning process and
answer are enclosed within <think> </think> and <answer> </answer> tags respectively. 
For example, <think> This is the reasoning process. </think>
<answer> The final answer is \boxed{answer here}
</answer>. In the last part of the answer, the final exact answer is enclosed within \boxed{}
with xml format.
"""

SYSTEM_PROMPT_2 = (
    "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
    "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
    "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
    "<think> reasoning process here </think><answer> answer here </answer>"
)

SYSTEM_PROMPT_2_SMALL = """
You are a helpful assistant expert in Unimarc/XML bibliographic records. 
Given a question, you need to first think about the reasoning process in the mind and then provide the answer. 
The reasoning process and answer are separated in distincts paragraphs respectively starting with `### Reasoning:` and `### Answer:`.
For example, ### Reasoning: This is the reasoning process. \n ### Answer: The final exact answer is \boxed{answer here}. 
In the last part of the answer, the final exact answer is enclosed within \boxed{} with XML format.
"""


user_prompt = """
Generate a valid Unimarc/XML record from these unstructured informations:
Title: Electric vehicle tribology
Subtitle: Challenges and opportunities for a sustainable transportation future
Author: Leonardo I. Farfan-Cabrera, Ali Erdemir
Publisher: Elsevier
Year: 2024
ISBN: 978-0-443-14074-7
Language: English
Collection/Series: Elsevier Series on Tribology and Surface Engineering
Edition: Not specified
Material description: 1 vol. (XI-313 p.), couv. ill. en coul., 23 cm
Abstract/Notes: "Electric vehicle tribology, challenges and opportunities for a sustainable transportation future" provides practical, comprehensive guidance on a new and increasingly important area of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological techniques essential to the efficient electrification of transport systems. Led by professors with a combined three decades in industry and academia, and collecting insights from experts around the world, this book begins with the essential knowledge regarding both electric vehicles and tribology. After outlining the unique tribological needs of EVs, the book then breaks down the components and hardware required. It provides detailed protocols and methods for the testing and improvement of lubricants and materials as well as a dedicated section on modern lubrication specific to EVs. Throughout, it considers the critical question of sustainable tribology and the long-term sustainable options for lubrication and materials for electric vehicles.
Source of the abstract/notes: 4e de couverture
Table of contents: Not specified
Keywords: Tribologie (technologie), Tribologie (Technologie)
"""

In [11]:
messages = [
  {"role": "system", "content": SYSTEM_PROMPT_2_SMALL},
  {"role": "user", "content": user_prompt},
]

In [12]:
inputs = tokenizer.apply_chat_template(
    messages,
    return_dict=True,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to(device)

In [13]:
outputs = model.generate(
    **inputs,
    max_new_tokens = 2048,
    use_cache = True,
)

In [None]:
#print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [14]:
generated_ids = [
   output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, outputs)
]

In [15]:
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

### Reasoning:

To generate an Unimarc/XML record, we need to follow these steps:

1. **Identify the Title:** The title provided is "Electric vehicle tribology".
2. **Identify the Subtitle:** The subtitle is "Challenges and opportunities for a sustainable transportation future".
3. **Identify the Author:** The author's name is listed as Leonardo I. Farfan-Cabrera, Ali Erdemir.
4. **Identify the Publisher:** The publisher is Elsevier.
5. **Identify the Year:** The year is given as 2024.
6. **Identify the ISBN:** The ISBN is not explicitly mentioned but can be inferred from the structure of the record.
7. **Identify the Language:** The language is English.

### Answer:

```xml
<?xml version="1.0"?>
<record xmlns="http://unimarc.org/xml/ns/unimarc">
    <author>
        <name>Leonardo I. Farfan-Cabrera, Ali Erdemir</name>
    </author>
    <publisher>Elsevier</publisher>
    <year>2024</year>
    <isbn>978-0-443-14074-7</isbn>
    <language>English</language>
    <collection>
        <ser

# Load data

In [8]:
from datasets import load_dataset, Dataset
import pandas as pd

In [9]:
dataset = load_dataset("Geraldine/Unimarc-rcr061522302-1.5k", split="train")
dataset

Generating train split: 100%|██████████| 1510/1510 [00:00<00:00, 4830.95 examples/s]


Dataset({
    features: ['ppn', 'question', 'answer'],
    num_rows: 1510
})

In [10]:
system_message = """You are a helpful assistant expert in Unimarc/XML bibliographic records."""

def create_conversation(row):
  return {
    "messages": [
      {"role": "system", "content": system_message},
      {"role": "user", "content": row["question"]},
      {"role": "assistant", "content": row["answer"]}
    ]
  }

dataset = dataset.map(create_conversation, remove_columns=dataset.features, batched=False)
dataset

Map: 100%|██████████| 1510/1510 [00:00<00:00, 5823.41 examples/s]


Dataset({
    features: ['messages'],
    num_rows: 1510
})

In [11]:
dataset[0]

{'messages': [{'content': 'You are a helpful assistant expert in Unimarc/XML bibliographic records.',
   'role': 'system'},
  {'content': 'Title: Electric vehicle tribology\nSubtitle: Challenges and opportunities for a sustainable transportation future\nAuthor: Leonardo I. Farfan-Cabrera, Ali Erdemir\nPublisher: Elsevier\nYear: 2024\nISBN: 978-0-443-14074-7\nLanguage: English\nCollection/Series: Elsevier Series on Tribology and Surface Engineering\nEdition: Not specified\nMaterial description: 1 vol. (XI-313 p.), couv. ill. en coul., 23 cm\nAbstract/Notes: "Electric vehicle tribology, challenges and opportunities for a sustainable transportation future" provides practical, comprehensive guidance on a new and increasingly important area of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological techniques essential to the efficient electrification of transport systems. Led by professors with a combined three decades in industry and

# Lora SFT fine-tuning

In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import SFTTrainer
from peft import LoraConfig

In [None]:
for name, module in model.named_modules():
    print(name)

In [None]:
help(SFTTrainer)

In [13]:
# ---- 3. Define LoRA config ----
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # may vary based on model, check with `model.named_modules()` in previous cell
)

# ---- 4. Training args ----
training_args = TrainingArguments(
    output_dir="./sft_lora_output",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    report_to="none"
)

# ---- 5. Launch training ----
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_args
)

Converting train dataset to ChatML: 100%|██████████| 1510/1510 [00:00<00:00, 8874.83 examples/s]
Applying chat template to train dataset: 100%|██████████| 1510/1510 [00:00<00:00, 4337.00 examples/s]
Tokenizing train dataset: 100%|██████████| 1510/1510 [00:08<00:00, 170.76 examples/s]
Truncating train dataset: 100%|██████████| 1510/1510 [00:00<00:00, 13133.86 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [14]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.32 GiB. GPU 0 has a total capacity of 14.57 GiB of which 296.75 MiB is free. Process 3848786 has 14.28 GiB memory in use. Of the allocated memory 11.58 GiB is allocated by PyTorch, and 2.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Reward functions

## utils functions

In [29]:
import xml.etree.ElementTree as ET
import re

def extract_hash_answer(text: str) -> str | None:
    if "### Answer:" not in text:
        return None
    first_split = text.split("### Answer:")[1].strip()
    if "### Explanation:" not in first_split:
        return first_split
    else:
        return first_split.split("### Explanation:")[0].strip()

def extract_xml(text: str) -> str | None:
    # Use regular expression to find the XML part enclosed in ```xml...```
    xml_match = re.search(r'```xml(.*?)```', response, re.DOTALL)
    if xml_match:
        return xml_match.group(1).strip()
    else:
        return None

def extract_field_values(xml_str):
    root = ET.fromstring(xml_str)
    fields = {}
    for df in root.findall(".//datafield"):
        tag = df.get("tag")
        subfields = [sf.text for sf in df.findall("subfield")]
        fields[tag] = " ".join(subfields)
    return fields

In [24]:
# Example avec previous example
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [25]:
print(extract_hash_answer(text=response))

```xml
<?xml version="1.0"?>
<record xmlns="http://unimarc.org/xml/ns/unimarc">
    <author>
        <name>Leonardo I. Farfan-Cabrera, Ali Erdemir</name>
    </author>
    <publisher>Elsevier</publisher>
    <year>2024</year>
    <isbn>978-0-443-14074-7</isbn>
    <language>English</language>
    <collection>
        <series>Elsevier Series on Tribology and Surface Engineering</series>
    </collection>
    <edition>
        Not specified
    </edition>
    <material>
        <description>1 vol. (XII-313 p.), couv. ill. en coul., 23 cm</description>
    </material>
    <abstract>
        "Electric vehicle tribology, challenges and opportunities for a sustainable transportation future"
        provides practical, comprehensive guidance on a new and increasingly important area of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological techniques essential to the efficient electrification of transport systems."
    </abstract>
    <k

In [26]:
print(extract_xml(text=response))

<?xml version="1.0"?>
<record xmlns="http://unimarc.org/xml/ns/unimarc">
    <author>
        <name>Leonardo I. Farfan-Cabrera, Ali Erdemir</name>
    </author>
    <publisher>Elsevier</publisher>
    <year>2024</year>
    <isbn>978-0-443-14074-7</isbn>
    <language>English</language>
    <collection>
        <series>Elsevier Series on Tribology and Surface Engineering</series>
    </collection>
    <edition>
        Not specified
    </edition>
    <material>
        <description>1 vol. (XII-313 p.), couv. ill. en coul., 23 cm</description>
    </material>
    <abstract>
        "Electric vehicle tribology, challenges and opportunities for a sustainable transportation future"
        provides practical, comprehensive guidance on a new and increasingly important area of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological techniques essential to the efficient electrification of transport systems."
    </abstract>
    <keywords

## Format-based reward

In [27]:
def format_reward(xml_output: str) -> float:
    try:
        root = ET.fromstring(xml_output)
    except ET.ParseError:
        return 0.0  # Not even valid XML

    required_tags = ["leader", "controlfield", "datafield"]
    has_required_tags = all(root.find(f".//{tag}") is not None for tag in required_tags)

    if not has_required_tags:
        return 0.3  # Barely structured but not fully compliant

    # Optionally: integrate XSD/Schematron validation here
    return 1.0  # Well-formed and conforms to expected structure

In [28]:
xml_answer = extract_xml(text=response)
print(format_reward(xml_output=xml_answer))

0.3


## Accuracy-based reward

In [30]:
xml_answer = extract_xml(text=response)
xml_target = """<record><controlfield tag="005">20240711151255.000</controlfield><leader> nam0 22 450 </leader><controlfield tag="001">279354177</controlfield><controlfield tag="003">http://www.sudoc.fr/279354177</controlfield><datafield tag="035" ind1=" " ind2=" "><subfield code="a">(OCoLC)1449675800</subfield></datafield><datafield tag="010" ind1=" " ind2=" "><subfield code="a">978-0-443-14074-7</subfield><subfield code="b">br.</subfield></datafield><datafield tag="073" ind1=" " ind2="1"><subfield code="a">9780443140747</subfield></datafield><datafield tag="100" ind1=" " ind2=" "><subfield code="a">20240711d2024 ||||0frey50 ba</subfield></datafield><datafield tag="101" ind1="0" ind2=" "><subfield code="a">eng</subfield><subfield code="2">639-2</subfield></datafield><datafield tag="105" ind1=" " ind2=" "><subfield code="a">y ||||001yy</subfield></datafield><datafield tag="106" ind1=" " ind2=" "><subfield code="a">r</subfield></datafield><datafield tag="181" ind1=" " ind2=" "><subfield code="6">z01</subfield><subfield code="c">txt</subfield><subfield code="2">rdacontent</subfield></datafield><datafield tag="181" ind1=" " ind2="1"><subfield code="6">z01</subfield><subfield code="a">i#</subfield><subfield code="b">xxxe##</subfield></datafield><datafield tag="182" ind1=" " ind2=" "><subfield code="6">z01</subfield><subfield code="c">c</subfield><subfield code="2">rdamedia</subfield></datafield><datafield tag="182" ind1=" " ind2="1"><subfield code="6">z01</subfield><subfield code="a">b</subfield></datafield><datafield tag="183" ind1=" " ind2="1"><subfield code="6">z01</subfield><subfield code="a">nga</subfield><subfield code="2">RDAfrCarrier</subfield></datafield><datafield tag="102" ind1=" " ind2=" "><subfield code="a">NL</subfield><subfield code="a">GB</subfield><subfield code="a">US</subfield></datafield><datafield tag="200" ind1="0" ind2=" "><subfield code="a">Electric vehicle tribology</subfield><subfield code="e">challenges and opportunities for a sustainable transportation future</subfield><subfield code="f">edited by Leonardo I. Farfan-Cabrera, ... Ali Erdemir, ...</subfield></datafield><datafield tag="701" ind1=" " ind2="1"><subfield code="3">279354606</subfield><subfield code="a">Farfan-Cabrera</subfield><subfield code="b">Leonardo Israel</subfield><subfield code="4">340</subfield></datafield><datafield tag="701" ind1=" " ind2="1"><subfield code="3">11490314X</subfield><subfield code="a">Erdemir</subfield><subfield code="b">Ali</subfield><subfield code="4">340</subfield></datafield><datafield tag="801" ind1=" " ind2="3"><subfield code="a">FR</subfield><subfield code="b">Abes</subfield><subfield code="c">20240711</subfield><subfield code="g">AFNOR</subfield></datafield><datafield tag="214" ind1=" " ind2="0"><subfield code="a">Amsterdam</subfield><subfield code="c">Elsevier</subfield><subfield code="d">2024</subfield></datafield><datafield tag="215" ind1=" " ind2=" "><subfield code="a">1 vol. (XI-313 p.)</subfield><subfield code="c">couv. ill. en coul.</subfield><subfield code="d">23 cm</subfield></datafield><datafield tag="225" ind1="0" ind2=" "><subfield code="a">Elsevier Series on Tribology and Surface Engineering</subfield></datafield><datafield tag="606" ind1=" " ind2=" "><subfield code="3">027720055</subfield><subfield code="a">Tribologie (technologie)</subfield><subfield code="2">rameau</subfield></datafield><datafield tag="606" ind1=" " ind2=" "><subfield code="a">Tribologie (Technologie)</subfield><subfield code="2">lc</subfield></datafield><datafield tag="330" ind1=" " ind2=" "><subfield code="a">"Electric vehicle tribology, challenges and opportunities for a sustainable transportation future" provides practical, comprehensive guidance on a new and increasingly important aera of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological techniques essential to the efficient electrification of transportt systems. Led by professors with a combined three decades in industry and academia, and collecting insights from experts around the world, this book begins with the essential knowledge regarding both electric vehicles and tribology. After outlining the unique tribological needs of EVs, the book then breaks down the components and hardware required. It provides detailed protocols and methods for the testing and improvement of lubricants and materials as well as a dedicated section on modern lubrication specific to EVs. Throughout, it considers the critical question of sustainable tribology and the long-term sustainable options for lubrication and materials for electric vehicles.</subfield><subfield code="2">4e de couverture</subfield></datafield></record>"""

In [34]:
print(extract_field_values(xml_answer))

{}


In [33]:
print(extract_field_values(xml_target))

{'035': '(OCoLC)1449675800', '010': '978-0-443-14074-7 br.', '073': '9780443140747', '100': '20240711d2024 ||||0frey50 ba', '101': 'eng 639-2', '105': 'y ||||001yy', '106': 'r', '181': 'z01 i# xxxe##', '182': 'z01 b', '183': 'z01 nga RDAfrCarrier', '102': 'NL GB US', '200': 'Electric vehicle tribology challenges and opportunities for a sustainable transportation future edited by Leonardo I. Farfan-Cabrera, ... Ali Erdemir, ...', '701': '11490314X Erdemir Ali 340', '801': 'FR Abes 20240711 AFNOR', '214': 'Amsterdam Elsevier 2024', '215': '1 vol. (XI-313 p.) couv. ill. en coul. 23 cm', '225': 'Elsevier Series on Tribology and Surface Engineering', '606': 'Tribologie (Technologie) lc', '330': '"Electric vehicle tribology, challenges and opportunities for a sustainable transportation future" provides practical, comprehensive guidance on a new and increasingly important aera of tribology. Building skills from fundamentals to solution design, this book demonstrates the unique tribological te

In [31]:
def accuracy_reward(generated_xml: str, target_xml: str) -> float:
    try:
        gen_fields = extract_field_values(generated_xml)
        tgt_fields = extract_field_values(target_xml)
    except ET.ParseError:
        return 0.0

    shared_keys = set(gen_fields) & set(tgt_fields)
    if not shared_keys:
        return 0.0

    total_sim = 0
    for key in shared_keys:
        sim = SequenceMatcher(None, gen_fields[key], tgt_fields[key]).ratio()
        total_sim += sim

    return total_sim / len(shared_keys)

In [32]:
print(accuracy_reward(generated_xml=xml_answer, target_xml=xml_target))

0.0
