In [5]:
import os
from transformers import AutoTokenizer, AutoModel, BitsAndBytesConfig, AutoModelForCausalLM
import torch
from tqdm.auto import tqdm
import gc
from rdflib import Graph
import re

In [6]:
print('hue')

hue


In [7]:
model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [8]:
sample1_path = '/users/melodi/gsantoss/cmatcher/rag/sample1.txt'
sample2_path = '/users/melodi/gsantoss/cmatcher/rag/sample2.txt'

def gen_prompt(r1, r2, query, include_sample1=False, include_sample2=False):
    sample_prompt = 'And examples of complex alignment between different ontologies:'
    with open(sample1_path, 'r') as f:
        sample1 = f.read()

    with open(sample2_path, 'r') as f:
        sample2 = f.read()

    instruction = "Write a file in EDOAL format containing the complex alignment between the input ontologies <ontology1> and <ontology2>. You don't need to explain yourself. Just give as response the resulting alignment file without saying anything else."

    if query is not None:
        instruction = f'Considering that the input ontologies were filtered to include only the entities related to the query:\n\n{query}\n\n{instruction}'

    if not include_sample1:
        sample1 = ''

    if not include_sample2:
        sample2 = ''

    if not include_sample1 and not include_sample2:
        sample_prompt = ''

    return f'''Given the two ontologies below:
<ontology1>
{r1}    
</ontology1>    
<ontology2>
{r2}
</ontology2>
{sample_prompt}
{sample1}
{sample2}
{instruction}'''




g1 = Graph().parse('/projets/melodi/gsantoss/data/complex/conference/ont/cmt.owl')
g2 = Graph().parse('/projets/melodi/gsantoss/data/complex/conference/ont/conference.owl')

r1 = g1.serialize(format='ttl')
r2 = g2.serialize(format='ttl')

query = 'SELECT ?x WHERE { ?x a <http://cmt#Conference> }'

prompt = gen_prompt(r1, r2, query, include_sample1=True, include_sample2=True)
print(len(tokenizer.tokenize(prompt)))

11633


In [9]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)


model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map='auto',
    quantization_config=quantization_config,
    low_cpu_mem_usage=True,
)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
messages = [
    # {"role": "system", "content": "You are an Ontology Alignment expert. You are able to align two ontologies by creating a file in EDOAL format containing the result alignments. You are able to produce complex alignments that are those involving multiple entities and relationships in a n:m cardinality. The user will provide you with two ontologies and you respond with the EDOAL file containing the alignments. You don't need to explain yourself. Just give as response the resulting file without saying anything."},
    {"role": "user", "content": prompt},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

print(input_ids.shape)

torch.Size([1, 11643])


In [11]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

with torch.no_grad():
    outputs = model.generate(
        input_ids,
        max_new_tokens=2 * 1024,
        eos_token_id=terminators,
        do_sample=False,
        temperature=None,
        top_p=None,

    )
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


OutOfMemoryError: CUDA out of memory. Tried to allocate 5.56 GiB. GPU 

In [4]:

for p, d, fs in os.walk('/projets/melodi/gsantoss/complex-llm/generated-prompts'):
    for f in fs:
        if 'cmt#conference' not in f or 'q-s1-s2' not in f:
            continue
        with open(os.path.join(p, f), 'r') as file:
            prompt = file.read()
            tokens = len(tokenizer.tokenize(prompt))
            print(f'{f}: {tokens}')

prompt#cmt#conference#c-paper#nq-s1-s2.txt: 4083
prompt#cmt#conference#c-person#q-s1-s2.txt: 4055
prompt#cmt#conference#c-preference#q-s1-s2.txt: 3435
prompt#cmt#conference#c-document#nq-s1-s2.txt: 3938
prompt#cmt#conference#c-programcommittee#q-s1-s2.txt: 3926
prompt#cmt#conference#c-bid#q-s1-s2.txt: 4014
prompt#cmt#conference#c-decision#q-s1-s2.txt: 3991
prompt#cmt#conference#c-decision#nq-s1-s2.txt: 3956
prompt#cmt#conference#c-administrator#q-s1-s2.txt: 3952
prompt#cmt#conference#c-bid#nq-s1-s2.txt: 3979
prompt#cmt#conference#c-preference#nq-s1-s2.txt: 3400
prompt#cmt#conference#c-document#q-s1-s2.txt: 3973
prompt#cmt#conference#c-administrator#nq-s1-s2.txt: 3917
prompt#cmt#conference#c-programcommittee#nq-s1-s2.txt: 3889
prompt#cmt#conference#c-conference#nq-s1-s2.txt: 4755
prompt#cmt#conference#c-paper#q-s1-s2.txt: 4118
prompt#cmt#conference#c-conference#q-s1-s2.txt: 4790
prompt#cmt#conference#c-person#nq-s1-s2.txt: 4020
