In [1]:
pip install rdflib transformers torch nltk sentence_transformers




In [1]:
import rdflib
from rdflib import Graph
import json
import os
import nltk
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForMaskedLM
from sentence_transformers import SentenceTransformer, util
import torch

nltk.download('punkt')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\31615\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
# Load RDF Data
def load_rdf_data(file_path):
    g = Graph()
    g.parse(file_path, format='turtle')
    return g

# Extract Triples with IDs
def extract_triples_with_ids(graph):
    triples_with_ids = []
    for idx, triple in enumerate(graph):
        triples_with_ids.append((idx + 1, triple))
    return triples_with_ids

# Generate Prompts
def generate_prompts(triple, model_type='causal'):
    subject, predicate, obj = triple
    if model_type == 'causal':
        prompt = f"Express the following RDF triple as a natural language sentence:\n{triple}"
    elif model_type == 'masked':
        # Create a template with masked tokens
        prompt = f"The RDF triple {triple} means that [MASK] [MASK] [MASK]."
    else:
        prompt = ""
    return prompt

In [3]:
# Load Models
def load_models():
    models = {
        'GPT-Neo-1.3B': {
            'tokenizer': AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-1.3B'),
            'model': AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B'),
            'type': 'causal'
        },
        'GPT-Neo-2.7B': {
            'tokenizer': AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-2.7B'),
            'model': AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-neo-2.7B'),
            'type': 'causal'
        },
        'GPT-J-6B': {
            'tokenizer': AutoTokenizer.from_pretrained('EleutherAI/gpt-j-6B'),
            'model': AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-j-6B'),
            'type': 'causal'
        },
        'BERT-base-uncased': {
            'tokenizer': AutoTokenizer.from_pretrained('bert-base-uncased'),
            'model': AutoModelForMaskedLM.from_pretrained('bert-base-uncased'),
            'type': 'masked'
        },
        'RoBERTa-base': {
            'tokenizer': AutoTokenizer.from_pretrained('roberta-base'),
            'model': AutoModelForMaskedLM.from_pretrained('roberta-base'),
            'type': 'masked'
        },
    }
    return models

In [4]:
# Load Reference Sentences
def load_reference_sentences(file_path):
    with open(file_path, 'r') as file:
        references = json.load(file)
    return references

# Compute Similarity
def compute_similarity(output, reference, model):
    embeddings1 = model.encode(output, convert_to_tensor=True)
    embeddings2 = model.encode(reference, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
    return cosine_scores.item()

# Run Evaluation
def run_evaluation(models, triples_with_ids, references):
    results = {}
    # Sentence Transformer Model for Similarity
    sim_model = SentenceTransformer('all-MiniLM-L6-v2')
    for model_name, components in models.items():
        tokenizer = components['tokenizer']
        model = components['model']
        model_type = components['type']
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        print(f"Evaluating with model: {model_name}")
        model_results = []
        for idx, triple in triples_with_ids:
            prompt = generate_prompts(triple, model_type=model_type)
            if model_type == 'causal':
                inputs = tokenizer(prompt, return_tensors='pt').to(device)
                with torch.no_grad():
                    outputs = model.generate(
                        **inputs,
                        max_length=inputs.input_ids.shape[1] + 50,
                        num_return_sequences=1,
                        no_repeat_ngram_size=2,
                        early_stopping=True,
                    )
                generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
                # Extract the part after the prompt
                generated_sentence = generated_text[len(prompt):].strip()
            elif model_type == 'masked':
                # For masked language models
                inputs = tokenizer(prompt, return_tensors='pt').to(device)
                with torch.no_grad():
                    outputs = model(**inputs)
                # Get the predicted tokens
                token_logits = outputs.logits
                mask_token_indices = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]
                predicted_tokens = []
                for idx in mask_token_indices:
                    logits = token_logits[0, idx, :]
                    predicted_token_id = torch.argmax(logits).item()
                    predicted_token = tokenizer.decode([predicted_token_id])
                    predicted_tokens.append(predicted_token)
                generated_sentence = prompt.replace(tokenizer.mask_token, ' '.join(predicted_tokens))
            else:
                generated_sentence = ""
            # Reference Sentence
            reference_sentence = references.get(str(idx), "")
            # Compute Similarity
            similarity = compute_similarity(generated_sentence, reference_sentence, sim_model)
            # Determine Correctness (Threshold can be adjusted)
            threshold = 0.75
            is_correct = similarity >= threshold
            model_results.append({
                'id': idx,
                'prompt': prompt,
                'output': generated_sentence,
                'reference': reference_sentence,
                'similarity': similarity,
                'is_correct': is_correct
            })
        results[model_name] = model_results
    return results

# Compute Metrics
def compute_metrics(results):
    metrics = {}
    for model_name, outputs in results.items():
        total = len(outputs)
        correct = sum(1 for o in outputs if o['is_correct'])
        precision = correct / total if total > 0 else 0
        accuracy = precision  # Since each example is equally weighted
        metrics[model_name] = {
            'total': total,
            'correct': correct,
            'accuracy': accuracy,
            'precision': precision
        }
    return metrics

In [13]:
# Main Function
def main():
    rdf_file = 'rdf_data.ttl'
    reference_file = 'reference_sentences.json'

    # Load data
    graph = load_rdf_data(rdf_file)
    triples_with_ids = extract_triples_with_ids(graph)
    references = load_reference_sentences(reference_file)
    models = load_models()

    # Run evaluation
    evaluation_results = run_evaluation(models, triples_with_ids, references)
    metrics = compute_metrics(evaluation_results)

    # Print Results
    for model_name, outputs in evaluation_results.items():
        print(f"\nResults for model: {model_name}")
        for result in outputs:
            print(f"ID: {result['id']}")
            print(f"Prompt:\n{result['prompt']}\n")
            print(f"Model Output:\n{result['output']}\n")
            print(f"Reference Sentence:\n{result['reference']}\n")
            print(f"Similarity Score: {result['similarity']:.4f}")
            print(f"Correct: {result['is_correct']}")
            print("-" * 50)

    # Print Metrics
    print("\nEvaluation Metrics:")
    for model_name, metric in metrics.items():
        print(f"Model: {model_name}")
        print(f"Total Examples: {metric['total']}")
        print(f"Correct Predictions: {metric['correct']}")
        print(f"Accuracy: {metric['accuracy']:.2f}")
        print(f"Precision: {metric['precision']:.2f}")
        print("-" * 50)

if __name__ == "__main__":
    main()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


KeyboardInterrupt: 