In [11]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Graph_structure_research"
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin/'

In [12]:
%pwd

'c:\\Users\\Maza\\Desktop\\Pinecone_pipeline'

In [3]:
os.chdir("../")
%pwd

'c:\\Users\\Maza\\Desktop\\Pinecone_pipeline'

In [13]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class GraphStructureConfig:
    root_dir: Path
    graph_structure_file: Path
    sructure_file: Path
    graph_json_model:Path
    models: dict
    graph_prompts: dict

In [14]:

def read_txt(file_path):
    with open(file_path, 'r') as file:
              content = file.read()
    return content

In [15]:
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from vector_db_pipeline.constants import *
from vector_db_pipeline.utils.common import read_yaml, save_json, create_directories
from vector_db_pipeline import logger
from dotenv import load_dotenv
from graphviz import Digraph
import json
load_dotenv()

True

In [27]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        models_filepath = MODELS_FILE_PATH,
        prompt_template = PROMPT_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)
        self.models = read_yaml(models_filepath)
        self.prompt_template = read_yaml(prompt_template)
    
        

    
    def get_graph_structure_config(self) -> GraphStructureConfig:
        config = self.config.graph_structure
        prompt_teplates = self.prompt_template.generate_graph_structure
        create_directories([config.root_dir])
        
        graph_structure_config = GraphStructureConfig(
            root_dir=config.root_dir,
            graph_structure_file = config.graph_structure_file,
            sructure_file = config.sructure_file,
            graph_json_model = config.graph_json_model,
            models = self.models,
            graph_prompts = prompt_teplates
        ) 

        return graph_structure_config

In [32]:
class GraphStructure:
    def __init__(self, config:GraphStructureConfig):
        self.config = config
    def generate_graph_structure(self):
        prompt = ChatPromptTemplate.from_messages([(
                    "system",
                        self.config.graph_prompts.system,),
                    MessagesPlaceholder(variable_name="messages"),])
        models = read_yaml(MODELS_FILE_PATH)
        model = models.Llama3
        logger.info(f"Working with model: {model}")
        llm = ChatGroq(temperature=0, model_name=model)
        model_generate = prompt | llm

        app_structure = read_txt(self.config.sructure_file)
        logger.info(f"App structure loaded")
        task_description = self.config.graph_prompts.task
        request = HumanMessage(
            content= task_description + app_structure
        )
        logger.info(f"Invoking {model}")
        
        results = model_generate.invoke({"messages": [request]})
        logger.info(f"{model} finished task")
        try:
            results_dict = json.loads(results.content)
        except Exception as e:
            logger.info(f"Error while formatting structure: {e}")
            
            prompt = ChatPromptTemplate.from_messages([(
                    "system",
                        self.config.graph_prompts.system_debugger,),
                    MessagesPlaceholder(variable_name="messages"),])
            
            models = read_yaml(MODELS_FILE_PATH)
            model = models.Llama3

            llm = ChatGroq(temperature=0, model_name=model)
            model_generate = prompt | llm

            task_description = self.config.graph_prompts.task_debugger.format(error = str(e), results=results.content)
            request = HumanMessage(content= task_description )

            results = model_generate.invoke({"messages": [request]})
            results_dict= json.loads(results.content)
            logger.info(f"Error {e} fixed")
           
        
        save_json(Path(self.config.graph_json_model),results_dict)
        return results_dict 
    
    def generate_graph_viz_and_render(self,results):
        try:
            filename = self.config.graph_structure_file
            model = eval(results)

        
            graph = Digraph(engine='neato',node_attr={'shape': 'Mrecord'}, format="png")

            for c in model['entities']:
                attnames = (x['name'] + "\n(sch:" + x['schema_org_term'][18:] + ") " for x in c['attributes'])
                graph.node(c['name'].replace(" ","_")+"_node",   "{ Entity: " + c['name']
                            + "\n(sch:" + c['schema_org_term'][18:] + ") " + "|" + "|".join(attnames) + "}")   

            for r in model['relationships']:
                graph.edge(r['from'].replace(" ","_") + "_node",r['to'].replace(" ","_") + "_node",
                            label=r['name'] + "\n(sch:" + r['schema_org_term'][18:] + ") " , len='6.00')

            graph.render(filename)
            logger.info(f"Graph model saved in  {filename}")
            logger.info(f"Graph image saved in  {filename+'.png'}")
        except Exception as e:
            logger.info(f"Error while generating graph: {e}")


In [33]:
config = ConfigurationManager()
graph_structure_config = config.get_graph_structure_config()
graph_structure = GraphStructure(config=graph_structure_config)
generate_graph_structure = graph_structure.generate_graph_structure()
graph_structure.generate_graph_viz_and_render(str(generate_graph_structure))



[2024-04-25 20:45:24,137: INFO: common: yaml file: config\config.yaml loaded successfully:]
[2024-04-25 20:45:24,137: INFO: common: yaml file: schema.yaml loaded successfully:]
[2024-04-25 20:45:24,143: INFO: common: yaml file: params.yaml loaded successfully:]
[2024-04-25 20:45:24,145: INFO: common: yaml file: models.yaml loaded successfully:]
[2024-04-25 20:45:24,145: INFO: common: yaml file: prompt_template.yaml loaded successfully:]
[2024-04-25 20:45:24,145: INFO: common: Directory already exists: artifacts/graph_structure:]
[2024-04-25 20:45:24,145: INFO: common: yaml file: models.yaml loaded successfully:]
[2024-04-25 20:45:24,145: INFO: 2119797541: Working with model: llama3-70b-8192:]
[2024-04-25 20:45:24,627: INFO: 2119797541: App structure loaded:]
[2024-04-25 20:45:24,642: INFO: 2119797541: Invoking llama3-70b-8192:]
[2024-04-25 20:45:36,309: INFO: _client: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK":]
[2024-04-25 20:45:36,325: INFO: 