In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, QuantoConfig
import torch
from rdflib import Graph
import time
import psutil

In [3]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_id = "google/gemma-2b-it"
# model_id = "microsoft/Phi-3-mini-128k-instruct"
# model_id = "unsloth/llama-3-8b-bnb-4bit"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# quantization_config = QuantoConfig(weights="int4")

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='auto',
    quantization_config=quantization_config,
    low_cpu_mem_usage=True
    
)
print(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): Ll

In [4]:
print(model.model.config)

LlamaConfig {
  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  

In [5]:
# base_ont = '/projets/melodi/gsantoss/data/oaei/tracks/populated/data_100'
# 
# with open(f'{base_ont}/cmt_100.ttl') as f:
#     o1 = f.read()
# 
# with open(f'{base_ont}/conference_100.ttl') as f:
#     o2 = f.read()

base_ont = '/projets/melodi/gsantoss/data/oaei/tracks/complex/geolink'

o1 = Graph().parse(f'{base_ont}/rdfgmo.rdf').serialize(format='ttl')
o2 = Graph().parse(f'{base_ont}/rdfgbo.rdf').serialize(format='ttl')

txt = f'''
Given the two ontologies bellow:

<ontology1>
{o1}    
</ontology1>    
<ontology2>
{o2}
</ontology2>

And one example of alignment between two different ontologies:

<ontology1>
@prefix lib: <http://example.org/library#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .

lib:Book1 a lib:Book ;
    dcterms:title "The Catcher in the Rye" ;
    dcterms:creator lib:Author1 ;
    lib:hasGenre "Fiction" .

lib:Author1 a lib:Author ;
    foaf:name "J.D. Salinger" ;
    foaf:birthDate "1919-01-01" .
</ontology1>
<ontology2>
@prefix pub: <http://example.org/publishing#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .

pub:Book1 a pub:Book ;
    dcterms:title "To Kill a Mockingbird" ;
    dcterms:creator pub:Author1 ;
    pub:publicationYear "1960" .

pub:Author1 a pub:Author ;
    foaf:name "Harper Lee" ;
    pub:hasNationality "American" .
</ontology2>
<alignment>
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns="http://knowledgeweb.semanticweb.org/heterogeneity/alignment"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
         xmlns:align="http://knowledgeweb.semanticweb.org/heterogeneity/alignment#"
         xmlns:edoal="http://ns.inria.org/edoal/1.0/#">

  <Alignment>
    <xml>yes</xml>
    <level>2EDOAL</level>
    <type>**</type>
    
    <onto1>
      <Ontology rdf:about="http://example.org/library#"/>
    </onto1>
    <onto2>
      <Ontology rdf:about="http://example.org/publishing#"/>
    </onto2>

    <map>
      <Cell>
        <entity1 rdf:resource="http://example.org/library#Book"/>
        <entity2 rdf:resource="http://example.org/publishing#Book"/>
        <relation>=</relation>
        <measure>1.0</measure>
      </Cell>
    </map>
    <map>
      <Cell>
        <entity1 rdf:resource="http://example.org/library#Author"/>
        <entity2 rdf:resource="http://example.org/publishing#Author"/>
        <relation>=</relation>
        <measure>1.0</measure>
      </Cell>
    </map>
  </Alignment>
</rdf:RDF>
</alignment>

Write a file in EDOAL format containing the complex alignment between the ontology1 and ontology2. You don't need to explain yourself. Just give as response the resulting file without saying anything. Here is one example bellow:
'''

In [6]:
sample_prompt = '''apple: fruit
orange: fruit
zucchini: vegetable
tomato:

Complete this list'''

messages = [
    # {"role": "system", "content": "You are an Ontology Alignment expert. You are able to align two ontologies by creating a file in EDOAL format containing the result alignments. You are able to produce complex alignments that are those involving multiple entities and relationships in a n:m cardinality. The user will provide you with two ontologies and you respond with the EDOAL file containing the alignments. You don't need to explain yourself. Just give as response the resulting file without saying anything."},
    {"role": "user", "content": txt},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

print(input_ids.shape)



torch.Size([1, 27391])


In [7]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=1024,
    eos_token_id=terminators,
    do_sample=False,
    temperature=None,
    top_p=None,
    
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


OutOfMemoryError: CUDA out of memory. Tried to allocate 44.72 GiB. GPU 