In [1]:
pip install rdflib transformers torch nltk sentence_transformers

Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting transformers
  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)
     ---------------------------------------- 0.0/44.4 kB ? eta -:--:--
     ---------------------------------------- 0.0/44.4 kB ? eta -:--:--
     --------- ------------------------------ 10.2/44.4 kB ? eta -:--:--
     ----------------- -------------------- 20.5/44.4 kB 165.2 kB/s eta 0:00:01
     ----------------------------------- -- 41.0/44.4 kB 219.4 kB/s eta 0:00:01
     -------------------------------------- 44.4/44.4 kB 218.8 kB/s eta 0:00:00
Collecting torch
  Downloading torch-2.4.1-cp39-cp39-win_amd64.whl.metadata (27 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Collecting pyparsing<4,>=2.1.0 (from rdflib)
  Downloading pypars

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pyldavis 3.4.1 requires funcy, which is not installed.
pyldavis 3.4.1 requires gensim, which is not installed.
pyldavis 3.4.1 requires numexpr, which is not installed.
pyldavis 3.4.1 requires pandas>=2.0.0, which is not installed.
requests-oauthlib 1.3.1 requires oauthlib>=3.0.0, which is not installed.
tensorboard 2.11.2 requires markdown>=2.6.8, which is not installed.
tensorboard 2.11.2 requires protobuf<4,>=3.9.2, which is not installed.
tensorboard 2.11.2 requires tensorboard-data-server<0.7.0,>=0.6.0, which is not installed.
tensorboard 2.11.2 requires tensorboard-plugin-wit>=1.6.0, which is not installed.
tensorboard 2.11.2 requires werkzeug>=1.0.1, which is not installed.
tensorflow 2.5.0 requires flatbuffers~=1.12.0, which is not installed.
tensorflow 2.5.0 requires keras-nightly~=2.5.0.dev, which is not 

In [None]:
import rdflib
from rdflib import Graph
import json
import os
import nltk
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer, util
import torch

nltk.download('punkt')

: 

In [None]:
# Load RDF Data
def load_rdf_data(file_path):
    g = Graph()
    g.parse(file_path, format='turtle')
    return g

# Extract Triples with IDs
def extract_triples_with_ids(graph):
    triples_with_ids = []
    for idx, triple in enumerate(graph):
        triples_with_ids.append((idx + 1, triple))
    return triples_with_ids

# Generate Prompts
def generate_prompts(triple):
    subject, predicate, obj = triple
    prompt = f"Express the following RDF triple as a natural language sentence:\n{triple}"
    return prompt

In [None]:
# Load Models
def load_models():
    models = {
        'Vicuna-13B': {
            'tokenizer': AutoTokenizer.from_pretrained('TheBloke/vicuna-13B-1.1-HF', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-1.1-HF')
        },
        'WizardLM': {
            'tokenizer': AutoTokenizer.from_pretrained('WizardLM/WizardLM-13B-V1.0', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('WizardLM/WizardLM-13B-V1.0')
        },
        'Alpaca': {
            'tokenizer': AutoTokenizer.from_pretrained('chavinlo/alpaca-native', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('chavinlo/alpaca-native')
        },
        'Orca': {
            'tokenizer': AutoTokenizer.from_pretrained('psmathur/orca_mini_13b', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('psmathur/orca_mini_13b')
        },
        'LongOrca': {
            'tokenizer': AutoTokenizer.from_pretrained('Jean-Baptiste/long_orca', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('Jean-Baptiste/long_orca')
        },
        'SOLAR-10B': {
            'tokenizer': AutoTokenizer.from_pretrained('nlpcloud/solar-10b', use_fast=False),
            'model': AutoModelForCausalLM.from_pretrained('nlpcloud/solar-10b')
        },
    }
    return models

In [None]:
# Load Reference Sentences
def load_reference_sentences(file_path):
    with open(file_path, 'r') as file:
        references = json.load(file)
    return references

# Compute Similarity
def compute_similarity(output, reference, model):
    embeddings1 = model.encode(output, convert_to_tensor=True)
    embeddings2 = model.encode(reference, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
    return cosine_scores.item()

# Run Evaluation
def run_evaluation(models, triples_with_ids, references):
    results = {}
    # Sentence Transformer Model for Similarity
    sim_model = SentenceTransformer('all-MiniLM-L6-v2')
    for model_name, components in models.items():
        tokenizer = components['tokenizer']
        model = components['model']
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        print(f"Evaluating with model: {model_name}")
        model_results = []
        for idx, triple in triples_with_ids:
            prompt = generate_prompts(triple)
            inputs = tokenizer(prompt, return_tensors='pt').to(device)

            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_length=inputs.input_ids.shape[1] + 50,
                    num_return_sequences=1,
                    no_repeat_ngram_size=2,
                    early_stopping=True,
                )
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Extract the part after the prompt
            generated_sentence = generated_text[len(prompt):].strip()

            # Reference Sentence
            reference_sentence = references.get(str(idx), "")
            # Compute Similarity
            similarity = compute_similarity(generated_sentence, reference_sentence, sim_model)
            # Determine Correctness (Threshold can be adjusted)
            threshold = 0.75
            is_correct = similarity >= threshold

            model_results.append({
                'id': idx,
                'prompt': prompt,
                'output': generated_sentence,
                'reference': reference_sentence,
                'similarity': similarity,
                'is_correct': is_correct
            })
        results[model_name] = model_results
    return results

# Compute Metrics
def compute_metrics(results):
    metrics = {}
    for model_name, outputs in results.items():
        total = len(outputs)
        correct = sum(1 for o in outputs if o['is_correct'])
        precision = correct / total if total > 0 else 0
        accuracy = precision  # Since each example is equally weighted
        metrics[model_name] = {
            'total': total,
            'correct': correct,
            'accuracy': accuracy,
            'precision': precision
        }
    return metrics

In [None]:
# Main Function
def main():
    rdf_file = 'rdf_data.ttl'
    reference_file = 'reference_sentences.json'

    # Load data
    graph = load_rdf_data(rdf_file)
    triples_with_ids = extract_triples_with_ids(graph)
    references = load_reference_sentences(reference_file)
    models = load_models()

    # Run evaluation
    evaluation_results = run_evaluation(models, triples_with_ids, references)
    metrics = compute_metrics(evaluation_results)

    # Print Results
    for model_name, outputs in evaluation_results.items():
        print(f"\nResults for model: {model_name}")
        for result in outputs:
            print(f"ID: {result['id']}")
            print(f"Prompt:\n{result['prompt']}\n")
            print(f"Model Output:\n{result['output']}\n")
            print(f"Reference Sentence:\n{result['reference']}\n")
            print(f"Similarity Score: {result['similarity']:.4f}")
            print(f"Correct: {result['is_correct']}")
            print("-" * 50)

    # Print Metrics
    print("\nEvaluation Metrics:")
    for model_name, metric in metrics.items():
        print(f"Model: {model_name}")
        print(f"Total Examples: {metric['total']}")
        print(f"Correct Predictions: {metric['correct']}")
        print(f"Accuracy: {metric['accuracy']:.2f}")
        print(f"Precision: {metric['precision']:.2f}")
        print("-" * 50)

if __name__ == "__main__":
    main()