In [None]:
import os
from dataclasses import asdict
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from typing import List, Dict, Any, Tuple
import json
from kgg.models import Document, ProcessedDocument
from kgg.nodes.entity_extraction import GLiNEREntitiesGenerator
import openai

In [None]:
class GLiNERLLMHybridGenerator:
    def __init__(self, openai_api_key: str, model: str = "gpt-3.5-turbo"):
        """
        Initialize hybrid generator that combines GLiNER and LLM capabilities
        
        Args:
            openai_api_key: OpenAI API key for LLM integration
            model: LLM model to use
        """
        self.gliner = GLiNEREntitiesGenerator()
        self.model = model
        openai.api_key = openai_api_key
        
    def _extract_entities(self, document: Document) -> List[Dict[str, Any]]:
        """Extract entities using GLiNER"""
        processed = self.gliner.invoke({"document": document, "schema": None})
        return processed.entities
    
    def _generate_relations_prompt(self, text: str, entities: List[Dict[str, Any]]) -> str:
        """Generate prompt for LLM to extract relations"""
        prompt = f"""Given the following text and extracted entities, identify meaningful relationships between the entities.
        Format the output as a list of JSON objects with 'subject', 'relation', and 'object' fields.
        
        Text: {text}