In [4]:
import transformers
from transformers import AutoTokenizer
import torch
import os
print(f"number of GPUs: torch.cuda.device_count()")
print(torch.__version__)

number of GPUs: torch.cuda.device_count()
2.6.0+rocm6.1


In [5]:
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct" 

if  torch.cuda.is_available():
    device = "cuda"
else:
    raise ValueError("No se reconoció GPU.")

pipeline = transformers.pipeline(
	"text-generation", 
	model=model_id,
	model_kwargs={"torch_dtype": torch.bfloat16},
	device=device
)

# Tokenizer necesario para contar tokens
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda


In [None]:
# Directorios de entrada y salida
input_dir = "test/"
output_dir = "out/"
os.makedirs(output_dir, exist_ok=True)

# Instrucciones para el modelo
prompt = [
    {"role": "system", 
     "content": 
        """ 
        Quiero que identifiques entidades nombradas que requieren ser anonimizadas en el informe clínico que copio entre comillas al final de esta instrucción. Quiero que me des el resultado en formato .xml in-line, donde las entidades sean identificadas por etiquetas en el mismo texto. Quiero que etiquetes con los criterios MEDDOCAN. A continuación, te muestro un ejemplo que contiene:
        - El texto original del informe en formato plano (.txt)
        - La representación estructurada del mismo en XML con etiquetas semánticas detalladas y posiciones de texto (atributos start, end, text, TYPE, etc.).
        Tu tarea será generar un XML con las mismas reglas de estructura y etiquetado a partir de cada texto clínico. Instrucciones:
        - Conserva el formato exacto del XML del ejemplo.
        - Cada etiqueta tiene que tener el tipo de entidad (`TYPE`) del inventario de MEDDOCAN. Los tipos de entidad que puedes usar son los siguientes: 
            NOMBRE_SUJETO_ASISTENCIA
            EDAD_SUJETO_ASISTENCIA
            SEXO_SUJETO_ASISTENCIA
            FAMILIARES_SUJETO_ASISTENCIA
            NOMBRE_PERSONAL_SANITARIO
            FECHAS
            PROFESION
            HOSPITAL
            CENTRO_SALUD
            INSTITUCION
            CALLE
            TERRITORIO
            PAIS
            NUMERO_TELEFONO
            NUMERO_FAX
            CORREO_ELECTRONICO
            ID_SUJETO_ASISTENCIA
            ID_CONTACTO_ASISTENCIAL
            ID_ASEGURAMIENTO
            ID_TITULACION_PERSONAL_SANITARIO
            ID_EMPLEO_PERSONAL_SANITARIO
            IDENTIF_VEHICULOS_NRSERIE_PLACAS
            IDENTIF_DISPOSITIVOS_NRSERIE
            DIREC_PROT_INTERNET
            URL_WEB
            IDENTIF_BIOMETRICOS
            OTRO_NUMERO_IDENTIF
            OTROS_SUJETO_ASISTENCIA
          - y un campo de comentario (`comment`) vacío
        Cuando te dé un nuevo texto, responde solo con el XML, sin explicaciones adicionales.
    
        Ejemplo - Informe en formato .txt: 
        Datos del paciente.
        Nombre: María Soledad Moreno Roca
        DNI: 23556552K
        Fecha de nacimiento: 09/01/1941
        Género: Mujer
        Domicilio: Calle de Almagro 80
        Ciudad: Denia, Valencia, Comunidad Valenciana
        Código postal: 46571
        Email: mariasoledad_roca@ucm.es
        Teléfono fijo: +34 960 66 89 48
        Teléfono móvil: +34 660 57 14 97
        NHC: 2409425
        NASS: 468043486571
        Condición de riesgo: Científico de Investigación
        
        Datos asistenciales.
        Médico: Dr. Juan Ramón Benito Vicente. NC 097900390. Investigador Clínico en Epidemiología. Instituto de Investigación Biomédica en Red de Enfermedades Infecciosas (CIBERINFEC). Avenida Monforte de Lemos 3-5. 28029. Madrid. España.
        Fecha de ingreso: 05/06/1996
        Centro de salud: Centro de Salud Carabanchel
        
        Informe clínico del paciente:
        Paciente sobreviviente de violencia de 55 años de edad, acompañado de su madre. 
        
        Ejemplo - Informe en formato .xml: lo que debes generar
        <?xml version='1.0' encoding='UTF-8'?>
        <MEDDOCAN>
          <TEXT>
        Ejemplo - Informe en formato .txt: 
        Datos del paciente.
        Nombre:  <TAG TYPE="NOMBRE_SUJETO_ASISTENCIA">María Soledad</TAG> <TAG TYPE="NOMBRE_SUJETO_ASISTENCIA">Moreno Roca</TAG>
        DNI: <TAG TYPE="ID_SUJETO_ASISTENCIA">23556552K</TAG>
        Fecha de nacimiento: <TAG TYPE="FECHAS">09/01/1941</TAG>
        Género: <TAG TYPE="SEXO_SUJETO_ASISTENCIA">Mujer</TAG>
        Domicilio: <TAG TYPE="CALLE">Calle de Almagro 80</TAG>.
        Ciudad: <TAG TYPE="TERRITORIO">Denia</TAG>, <TAG TYPE="TERRITORIO">Valencia</TAG>, <TAG TYPE="TERRITORIO">Comunidad Valenciana</TAG>
        Código postal: <TAG TYPE="TERRITORIO">46571</TAG>
        Email: <TAG TYPE="CORREO_ELECTRONICO">mariasoledad_roca@ucm.es</TAG>
        Teléfono fijo: <TAG TYPE="NUMERO_TELEFONO">+34 960 66 89 48</TAG>
        Teléfono móvil: <TAG TYPE="NUMERO_TELEFONO">+34 660 57 14 97</TAG>
        NHC: <TAG TYPE="ID_SUJETO_ASISTENCIA">2409425</TAG>
        NASS: <TAG TYPE="ID_ASEGURAMIENTO">468043486571</TAG>
        Condición de riesgo: <TAG TYPE="PROFESION">Científico de Investigación</TAG>
        
        Datos asistenciales.
        Médico: Dr. <TAG TYPE="NOMBRE_PERSONAL_SANITARIO">Juan Ramón Benito Vicente</TAG>. NC <TAG TYPE="ID_TITULACION_PERSONAL_SANITARIO">097900390</TAG>. <TAG TYPE="ID_EMPLEO_PERSONAL_SANITARIO">Investigador Clínico en Epidemiología</TAG>. <TAG TYPE="INSTITUCION">Instituto de Investigación Biomédica en Red de Enfermedades Infecciosas (CIBERINFEC)</TAG>. <TAG TYPE="CALLE">Avenida Monforte de Lemos 3-5</TAG>. <TAG TYPE="TERRITORIO">28029</TAG>. <TAG TYPE="TERRITORIO">Madrid</TAG>. <TAG TYPE="PAIS">España</TAG>.
        Fecha de ingreso: <TAG TYPE="FECHAS">05/06/1996</TAG>
        Centro de salud: <TAG TYPE="CENTRO_SALUD">Centro de Salud Carabanchel</TAG>
        
        Informe clínico del paciente:
        Paciente <TAG TYPE="OTROS_SUJETO_ASISTENCIA">sobreviviente de violencia</TAG> de <TAG TYPE="EDAD_SUJETO_ASISTENCIA">55 años</TAG> de edad, acompañado de su <TAG TYPE="FAMILIARES_SUJETO_ASISTENCIA">madre</TAG>.
            </TEXT>
        </MEDDOCAN>
    
        Recordá que en ningún caso debes incluir advertencias, explicaciones ni descripciones sobre la tarea, sobre la instrucción que te he dado o sobre cuestiones de funcionamiento del modelo de lenguaje.
        """},
     ]

# Configuración de tokens
MAX_CONTEXT_TOKENS = 8192
MAX_GENERATION_TOKENS = 4000
MAX_INPUT_TOKENS = MAX_CONTEXT_TOKENS - MAX_GENERATION_TOKENS



# Procesar cada archivo .txt
for filename in os.listdir(input_dir):
    if filename.endswith(".txt"):
        filepath = os.path.join(input_dir, filename)
        with open(filepath, "r", encoding="utf-8") as f:
            texto = f.read()

        # Crear mensaje estilo chat
        prompt_text = prompt[0]["content"]
        messages = [
            {"role": "system", "content": prompt_text},
            {"role": "user", "content": texto}
        ]

        # Calcular tokens de entrada
        full_prompt = prompt_text + texto
        total_tokens = len(tokenizer.encode(full_prompt))
        print(f"{filename}: Tokens de entrada: {total_tokens}")

        # Truncar el prompt si se pasa del límite permitido
        if total_tokens > MAX_INPUT_TOKENS:
            print(f"Truncando prompt: {filename}")
            # Calcular los tokens disponibles para el prompt
            max_tokens_prompt = MAX_INPUT_TOKENS - len(tokenizer.encode(texto))
            
            # Truncar el prompt para ajustarlo al límite de tokens
            prompt_tokens = tokenizer.encode(prompt[0]["content"])
            truncated_prompt_tokens = prompt_tokens[:max_tokens_prompt]
            
            # Decodificar los tokens truncados y actualizar el prompt
            truncated_prompt = tokenizer.decode(truncated_prompt_tokens, skip_special_tokens=True)
            messages[0]["content"] = truncated_prompt

        # Generar texto
        output = pipeline(messages, max_new_tokens=MAX_GENERATION_TOKENS)

        # Extraer solo el contenido generado por el modelo
        respuesta = output[0]["generated_text"][2]["content"]

        # Guardar en .xml
        output_filename = os.path.splitext(filename)[0] + ".xml"
        output_path = os.path.join(output_dir, output_filename)
        with open(output_path, "w", encoding="utf-8") as out_f:
            out_f.write(respuesta)

        print(f"Procesado: {filename} → {output_filename}")

print("Proceso completado.")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


839003070.txt: Tokens de entrada: 1928


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 839003070.txt → 839003070.xml
791237162.txt: Tokens de entrada: 1979


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 791237162.txt → 791237162.xml
964191583.txt: Tokens de entrada: 1936


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 964191583.txt → 964191583.xml
975577618.txt: Tokens de entrada: 1956


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 975577618.txt → 975577618.xml
760419145.txt: Tokens de entrada: 1958


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 760419145.txt → 760419145.xml
834506919.txt: Tokens de entrada: 1982


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 834506919.txt → 834506919.xml
818236375.txt: Tokens de entrada: 1975


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 818236375.txt → 818236375.xml
988700329.txt: Tokens de entrada: 1907


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 988700329.txt → 988700329.xml
981198630.txt: Tokens de entrada: 1945


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 981198630.txt → 981198630.xml
944404412.txt: Tokens de entrada: 2016


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 944404412.txt → 944404412.xml
862537906.txt: Tokens de entrada: 1909


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 862537906.txt → 862537906.xml
932402402.txt: Tokens de entrada: 1922


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 932402402.txt → 932402402.xml
910138354.txt: Tokens de entrada: 2029


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 910138354.txt → 910138354.xml
856347094.txt: Tokens de entrada: 1965


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 856347094.txt → 856347094.xml
945232534.txt: Tokens de entrada: 2015


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 945232534.txt → 945232534.xml
821542730.txt: Tokens de entrada: 2004


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 821542730.txt → 821542730.xml
818067560.txt: Tokens de entrada: 1972


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 818067560.txt → 818067560.xml
973866801.txt: Tokens de entrada: 1997


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 973866801.txt → 973866801.xml
769642025.txt: Tokens de entrada: 1992


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 769642025.txt → 769642025.xml
827278311.txt: Tokens de entrada: 1918


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 827278311.txt → 827278311.xml
852054481.txt: Tokens de entrada: 1931


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 852054481.txt → 852054481.xml
804953670.txt: Tokens de entrada: 1940


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 804953670.txt → 804953670.xml
949789988.txt: Tokens de entrada: 1937


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 949789988.txt → 949789988.xml
810385877.txt: Tokens de entrada: 1928


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 810385877.txt → 810385877.xml
876682700.txt: Tokens de entrada: 1919


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 876682700.txt → 876682700.xml
989036296.txt: Tokens de entrada: 1964


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 989036296.txt → 989036296.xml
765659453.txt: Tokens de entrada: 1967


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 765659453.txt → 765659453.xml
948537830.txt: Tokens de entrada: 1938


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 948537830.txt → 948537830.xml
818261480.txt: Tokens de entrada: 1966


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 818261480.txt → 818261480.xml
934277412.txt: Tokens de entrada: 1925


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 934277412.txt → 934277412.xml
978387247.txt: Tokens de entrada: 1970


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 978387247.txt → 978387247.xml
822091008.txt: Tokens de entrada: 1970


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 822091008.txt → 822091008.xml
976493341.txt: Tokens de entrada: 1928


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 976493341.txt → 976493341.xml
983548640.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 983548640.txt → 983548640.xml
907210617.txt: Tokens de entrada: 1942


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 907210617.txt → 907210617.xml
938582350.txt: Tokens de entrada: 1993


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 938582350.txt → 938582350.xml
936813726.txt: Tokens de entrada: 1949


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 936813726.txt → 936813726.xml
828999873.txt: Tokens de entrada: 1952


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 828999873.txt → 828999873.xml
959942196.txt: Tokens de entrada: 1962


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 959942196.txt → 959942196.xml
927909325.txt: Tokens de entrada: 1987


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 927909325.txt → 927909325.xml
768892512.txt: Tokens de entrada: 1949


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 768892512.txt → 768892512.xml
799874626.txt: Tokens de entrada: 1928


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 799874626.txt → 799874626.xml
806673179.txt: Tokens de entrada: 1934


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 806673179.txt → 806673179.xml
932525644.txt: Tokens de entrada: 1995


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 932525644.txt → 932525644.xml
901492517.txt: Tokens de entrada: 1950


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 901492517.txt → 901492517.xml
958411362.txt: Tokens de entrada: 1895


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 958411362.txt → 958411362.xml
798290653.txt: Tokens de entrada: 1969


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 798290653.txt → 798290653.xml
827326900.txt: Tokens de entrada: 1988


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 827326900.txt → 827326900.xml
933048349.txt: Tokens de entrada: 1913


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 933048349.txt → 933048349.xml
835933968.txt: Tokens de entrada: 2007


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 835933968.txt → 835933968.xml
826007578.txt: Tokens de entrada: 1930


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 826007578.txt → 826007578.xml
986986756.txt: Tokens de entrada: 1919


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 986986756.txt → 986986756.xml
802598618.txt: Tokens de entrada: 1963


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 802598618.txt → 802598618.xml
920310564.txt: Tokens de entrada: 2003


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 920310564.txt → 920310564.xml
970733994.txt: Tokens de entrada: 1963


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 970733994.txt → 970733994.xml
792396058.txt: Tokens de entrada: 1949


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 792396058.txt → 792396058.xml
874430747.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 874430747.txt → 874430747.xml
940248294.txt: Tokens de entrada: 1967


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 940248294.txt → 940248294.xml
941069412.txt: Tokens de entrada: 1941


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 941069412.txt → 941069412.xml
956320769.txt: Tokens de entrada: 2049


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 956320769.txt → 956320769.xml
875489792.txt: Tokens de entrada: 1982


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 875489792.txt → 875489792.xml
768530341.txt: Tokens de entrada: 2005


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 768530341.txt → 768530341.xml
924985962.txt: Tokens de entrada: 1962


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 924985962.txt → 924985962.xml
839535214.txt: Tokens de entrada: 1934


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 839535214.txt → 839535214.xml
951194742.txt: Tokens de entrada: 1978


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 951194742.txt → 951194742.xml
754993080.txt: Tokens de entrada: 1907


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 754993080.txt → 754993080.xml
781034562.txt: Tokens de entrada: 1908


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 781034562.txt → 781034562.xml
796846863.txt: Tokens de entrada: 1958


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 796846863.txt → 796846863.xml
820442023.txt: Tokens de entrada: 1947


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 820442023.txt → 820442023.xml
978580236.txt: Tokens de entrada: 1971


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 978580236.txt → 978580236.xml
975828051.txt: Tokens de entrada: 1919


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 975828051.txt → 975828051.xml
973409304.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 973409304.txt → 973409304.xml
865440609.txt: Tokens de entrada: 1971


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 865440609.txt → 865440609.xml
990575060.txt: Tokens de entrada: 1958


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 990575060.txt → 990575060.xml
875812911.txt: Tokens de entrada: 1985


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 875812911.txt → 875812911.xml
793571467.txt: Tokens de entrada: 1983


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 793571467.txt → 793571467.xml
878300141.txt: Tokens de entrada: 2036


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 878300141.txt → 878300141.xml
943028597.txt: Tokens de entrada: 1938


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 943028597.txt → 943028597.xml
839579642.txt: Tokens de entrada: 1996


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 839579642.txt → 839579642.xml
783699979.txt: Tokens de entrada: 1966


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 783699979.txt → 783699979.xml
843193708.txt: Tokens de entrada: 1986


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 843193708.txt → 843193708.xml
857127028.txt: Tokens de entrada: 1979


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 857127028.txt → 857127028.xml
991670909.txt: Tokens de entrada: 1911


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 991670909.txt → 991670909.xml
755993387.txt: Tokens de entrada: 1984


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 755993387.txt → 755993387.xml
964614634.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 964614634.txt → 964614634.xml
860817379.txt: Tokens de entrada: 1938


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 860817379.txt → 860817379.xml
918485962.txt: Tokens de entrada: 1945


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 918485962.txt → 918485962.xml
903277227.txt: Tokens de entrada: 1969


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 903277227.txt → 903277227.xml
925033386.txt: Tokens de entrada: 1914


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 925033386.txt → 925033386.xml
786686645.txt: Tokens de entrada: 1944


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 786686645.txt → 786686645.xml
893862396.txt: Tokens de entrada: 2022


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 893862396.txt → 893862396.xml
817688441.txt: Tokens de entrada: 1960


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 817688441.txt → 817688441.xml
997024020.txt: Tokens de entrada: 2007


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 997024020.txt → 997024020.xml
775838425.txt: Tokens de entrada: 1943


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 775838425.txt → 775838425.xml
901550052.txt: Tokens de entrada: 1956


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 901550052.txt → 901550052.xml
929157278.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 929157278.txt → 929157278.xml
858831674.txt: Tokens de entrada: 1925


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 858831674.txt → 858831674.xml
900212897.txt: Tokens de entrada: 1973


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 900212897.txt → 900212897.xml
893189352.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 893189352.txt → 893189352.xml
860703747.txt: Tokens de entrada: 1952


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 860703747.txt → 860703747.xml
941878909.txt: Tokens de entrada: 1947


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 941878909.txt → 941878909.xml
823840648.txt: Tokens de entrada: 1927


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 823840648.txt → 823840648.xml
815684553.txt: Tokens de entrada: 1930


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 815684553.txt → 815684553.xml
929817590.txt: Tokens de entrada: 1941


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 929817590.txt → 929817590.xml
799030721.txt: Tokens de entrada: 1960


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 799030721.txt → 799030721.xml
826430104.txt: Tokens de entrada: 1957


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 826430104.txt → 826430104.xml
761218358.txt: Tokens de entrada: 1915


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 761218358.txt → 761218358.xml
843054476.txt: Tokens de entrada: 1983


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 843054476.txt → 843054476.xml
839007788.txt: Tokens de entrada: 1931


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 839007788.txt → 839007788.xml
999706351.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 999706351.txt → 999706351.xml
934369810.txt: Tokens de entrada: 1917


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 934369810.txt → 934369810.xml
943916419.txt: Tokens de entrada: 1963


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 943916419.txt → 943916419.xml
783507628.txt: Tokens de entrada: 2000
Procesado: 783507628.txt → 783507628.xml


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


958755095.txt: Tokens de entrada: 1953


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 958755095.txt → 958755095.xml
779264601.txt: Tokens de entrada: 1944
Procesado: 779264601.txt → 779264601.xml


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


886849671.txt: Tokens de entrada: 1957


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 886849671.txt → 886849671.xml
982570255.txt: Tokens de entrada: 1957


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 982570255.txt → 982570255.xml
854340264.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 854340264.txt → 854340264.xml
915623376.txt: Tokens de entrada: 1935


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 915623376.txt → 915623376.xml
832082119.txt: Tokens de entrada: 1936


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 832082119.txt → 832082119.xml
988951928.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 988951928.txt → 988951928.xml
861585164.txt: Tokens de entrada: 1954


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 861585164.txt → 861585164.xml
865201860.txt: Tokens de entrada: 1999


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 865201860.txt → 865201860.xml
995068172.txt: Tokens de entrada: 1965


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 995068172.txt → 995068172.xml
985579508.txt: Tokens de entrada: 1985


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 985579508.txt → 985579508.xml
934901961.txt: Tokens de entrada: 1943


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 934901961.txt → 934901961.xml
940636729.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 940636729.txt → 940636729.xml
904999396.txt: Tokens de entrada: 1959


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 904999396.txt → 904999396.xml
790954525.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 790954525.txt → 790954525.xml
882166142.txt: Tokens de entrada: 1966


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 882166142.txt → 882166142.xml
921900526.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 921900526.txt → 921900526.xml
785575886.txt: Tokens de entrada: 1973


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 785575886.txt → 785575886.xml
881946431.txt: Tokens de entrada: 1971


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 881946431.txt → 881946431.xml
862513537.txt: Tokens de entrada: 1960


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 862513537.txt → 862513537.xml
944811219.txt: Tokens de entrada: 2020


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 944811219.txt → 944811219.xml
955183501.txt: Tokens de entrada: 2003


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 955183501.txt → 955183501.xml
841445747.txt: Tokens de entrada: 1967


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 841445747.txt → 841445747.xml
765844591.txt: Tokens de entrada: 1944


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 765844591.txt → 765844591.xml
761291078.txt: Tokens de entrada: 2033


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 761291078.txt → 761291078.xml
775420089.txt: Tokens de entrada: 1999


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 775420089.txt → 775420089.xml
862648189.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 862648189.txt → 862648189.xml
851639758.txt: Tokens de entrada: 1956


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 851639758.txt → 851639758.xml
917555538.txt: Tokens de entrada: 1996


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 917555538.txt → 917555538.xml
992658259.txt: Tokens de entrada: 1983


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 992658259.txt → 992658259.xml
994427603.txt: Tokens de entrada: 1941


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 994427603.txt → 994427603.xml
884437438.txt: Tokens de entrada: 1908


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 884437438.txt → 884437438.xml
949223650.txt: Tokens de entrada: 1926


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 949223650.txt → 949223650.xml
916946631.txt: Tokens de entrada: 1990


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 916946631.txt → 916946631.xml
831357094.txt: Tokens de entrada: 1969


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 831357094.txt → 831357094.xml
921611101.txt: Tokens de entrada: 1999


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 921611101.txt → 921611101.xml
912232568.txt: Tokens de entrada: 1936


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 912232568.txt → 912232568.xml
807021208.txt: Tokens de entrada: 2031


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 807021208.txt → 807021208.xml
753264677.txt: Tokens de entrada: 1992


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 753264677.txt → 753264677.xml
860313762.txt: Tokens de entrada: 1968


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 860313762.txt → 860313762.xml
781424959.txt: Tokens de entrada: 1939


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 781424959.txt → 781424959.xml
836507849.txt: Tokens de entrada: 1917


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 836507849.txt → 836507849.xml
841382782.txt: Tokens de entrada: 1967


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 841382782.txt → 841382782.xml
767230408.txt: Tokens de entrada: 1984


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 767230408.txt → 767230408.xml
958483618.txt: Tokens de entrada: 1986


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 958483618.txt → 958483618.xml
983746151.txt: Tokens de entrada: 1977


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 983746151.txt → 983746151.xml
854562982.txt: Tokens de entrada: 1983


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 854562982.txt → 854562982.xml
826080391.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 826080391.txt → 826080391.xml
980741835.txt: Tokens de entrada: 1980


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 980741835.txt → 980741835.xml
930706405.txt: Tokens de entrada: 1985


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 930706405.txt → 930706405.xml
810668724.txt: Tokens de entrada: 1923


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 810668724.txt → 810668724.xml
881831541.txt: Tokens de entrada: 1939


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 881831541.txt → 881831541.xml
853658541.txt: Tokens de entrada: 1901


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 853658541.txt → 853658541.xml
974586397.txt: Tokens de entrada: 1939


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 974586397.txt → 974586397.xml
991431109.txt: Tokens de entrada: 1946


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 991431109.txt → 991431109.xml
790602246.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 790602246.txt → 790602246.xml
789408657.txt: Tokens de entrada: 1990


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 789408657.txt → 789408657.xml
953217345.txt: Tokens de entrada: 2007


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 953217345.txt → 953217345.xml
776849520.txt: Tokens de entrada: 1968


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 776849520.txt → 776849520.xml
786745479.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 786745479.txt → 786745479.xml
814314381.txt: Tokens de entrada: 1992


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 814314381.txt → 814314381.xml
929183134.txt: Tokens de entrada: 1957


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 929183134.txt → 929183134.xml
841606847.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 841606847.txt → 841606847.xml
901626164.txt: Tokens de entrada: 1919


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 901626164.txt → 901626164.xml
842115767.txt: Tokens de entrada: 1914


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 842115767.txt → 842115767.xml
911408584.txt: Tokens de entrada: 1959


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 911408584.txt → 911408584.xml
844402705.txt: Tokens de entrada: 1942


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 844402705.txt → 844402705.xml
914377314.txt: Tokens de entrada: 1944


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 914377314.txt → 914377314.xml
763281746.txt: Tokens de entrada: 1924


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 763281746.txt → 763281746.xml
760451327.txt: Tokens de entrada: 1925


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 760451327.txt → 760451327.xml
929675066.txt: Tokens de entrada: 1933


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 929675066.txt → 929675066.xml
794026394.txt: Tokens de entrada: 1966


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 794026394.txt → 794026394.xml
853121870.txt: Tokens de entrada: 1960


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 853121870.txt → 853121870.xml
765766190.txt: Tokens de entrada: 1920


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 765766190.txt → 765766190.xml
894083026.txt: Tokens de entrada: 1971


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 894083026.txt → 894083026.xml
971581900.txt: Tokens de entrada: 2014


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 971581900.txt → 971581900.xml
932103950.txt: Tokens de entrada: 1982


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 932103950.txt → 932103950.xml
889070739.txt: Tokens de entrada: 1921


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 889070739.txt → 889070739.xml
862010312.txt: Tokens de entrada: 1978


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 862010312.txt → 862010312.xml
955704690.txt: Tokens de entrada: 1978


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 955704690.txt → 955704690.xml
905848513.txt: Tokens de entrada: 1947


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 905848513.txt → 905848513.xml
773787806.txt: Tokens de entrada: 1941


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 773787806.txt → 773787806.xml
948267996.txt: Tokens de entrada: 1960


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 948267996.txt → 948267996.xml
773071198.txt: Tokens de entrada: 1974


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 773071198.txt → 773071198.xml
951321514.txt: Tokens de entrada: 1979


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 951321514.txt → 951321514.xml
946927743.txt: Tokens de entrada: 1933
Procesado: 946927743.txt → 946927743.xml


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


956791476.txt: Tokens de entrada: 1942


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 956791476.txt → 956791476.xml
926553517.txt: Tokens de entrada: 2005


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 926553517.txt → 926553517.xml
841824272.txt: Tokens de entrada: 1933


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 841824272.txt → 841824272.xml
880598659.txt: Tokens de entrada: 1972


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 880598659.txt → 880598659.xml
788498329.txt: Tokens de entrada: 1958


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 788498329.txt → 788498329.xml
774018498.txt: Tokens de entrada: 2037


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 774018498.txt → 774018498.xml
867264708.txt: Tokens de entrada: 1995


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 867264708.txt → 867264708.xml
882940617.txt: Tokens de entrada: 1938


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 882940617.txt → 882940617.xml
834475453.txt: Tokens de entrada: 2001


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 834475453.txt → 834475453.xml
810195405.txt: Tokens de entrada: 1984


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 810195405.txt → 810195405.xml
900051902.txt: Tokens de entrada: 1980


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 900051902.txt → 900051902.xml
877744736.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 877744736.txt → 877744736.xml
956418510.txt: Tokens de entrada: 1984


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 956418510.txt → 956418510.xml
922489141.txt: Tokens de entrada: 1989


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 922489141.txt → 922489141.xml
811599528.txt: Tokens de entrada: 1947


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 811599528.txt → 811599528.xml
975183453.txt: Tokens de entrada: 1921


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 975183453.txt → 975183453.xml
897254449.txt: Tokens de entrada: 1995


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 897254449.txt → 897254449.xml
926409119.txt: Tokens de entrada: 1950


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 926409119.txt → 926409119.xml
782959227.txt: Tokens de entrada: 1926


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 782959227.txt → 782959227.xml
787493182.txt: Tokens de entrada: 1964


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 787493182.txt → 787493182.xml
906070281.txt: Tokens de entrada: 1935


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 906070281.txt → 906070281.xml
920707969.txt: Tokens de entrada: 2029


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 920707969.txt → 920707969.xml
892466668.txt: Tokens de entrada: 1983


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 892466668.txt → 892466668.xml
828235990.txt: Tokens de entrada: 1946


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 828235990.txt → 828235990.xml
909885983.txt: Tokens de entrada: 1976


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 909885983.txt → 909885983.xml
864937401.txt: Tokens de entrada: 1969


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 864937401.txt → 864937401.xml
946340084.txt: Tokens de entrada: 1972


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 946340084.txt → 946340084.xml
849796182.txt: Tokens de entrada: 2006


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 849796182.txt → 849796182.xml
951204123.txt: Tokens de entrada: 1997


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 951204123.txt → 951204123.xml
922411487.txt: Tokens de entrada: 2053


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 922411487.txt → 922411487.xml
774011819.txt: Tokens de entrada: 1982


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 774011819.txt → 774011819.xml
991559876.txt: Tokens de entrada: 1963


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 991559876.txt → 991559876.xml
990720341.txt: Tokens de entrada: 1933


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 990720341.txt → 990720341.xml
885254936.txt: Tokens de entrada: 1974


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 885254936.txt → 885254936.xml
756754091.txt: Tokens de entrada: 1971


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 756754091.txt → 756754091.xml
916606884.txt: Tokens de entrada: 1925


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 916606884.txt → 916606884.xml
932528658.txt: Tokens de entrada: 1976


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 932528658.txt → 932528658.xml
881762397.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 881762397.txt → 881762397.xml
993577188.txt: Tokens de entrada: 1957


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 993577188.txt → 993577188.xml
880424816.txt: Tokens de entrada: 1934


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 880424816.txt → 880424816.xml
824271687.txt: Tokens de entrada: 1976


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 824271687.txt → 824271687.xml
900899875.txt: Tokens de entrada: 1981


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 900899875.txt → 900899875.xml
887826232.txt: Tokens de entrada: 1948


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 887826232.txt → 887826232.xml
762721037.txt: Tokens de entrada: 1975


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 762721037.txt → 762721037.xml
822812148.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 822812148.txt → 822812148.xml
780612522.txt: Tokens de entrada: 1945


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 780612522.txt → 780612522.xml
781838481.txt: Tokens de entrada: 1951


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 781838481.txt → 781838481.xml
993479328.txt: Tokens de entrada: 1976


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 993479328.txt → 993479328.xml
869873706.txt: Tokens de entrada: 1959


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Procesado: 869873706.txt → 869873706.xml
874833638.txt: Tokens de entrada: 1987
Procesado: 874833638.txt → 874833638.xml
Proceso completado.
