In [1]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Code_structure_research"

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Maza\\Desktop\\Pinecone_pipeline'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class CodeStructureConfig:
    root_dir: Path
    load_struct_dir: Path
    load_ignored_dir: Path
    gitignore_path: Path
    code_dir: Path
    sructure_file: Path
    models: dict
    structure_prompt: str
    files_to_ignore: set
    

In [5]:
from vector_db_pipeline.constants import *
from vector_db_pipeline.utils.common import read_yaml, save_json, create_directories, set_to_txt
from vector_db_pipeline import logger
from dotenv import load_dotenv
load_dotenv()
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
# 

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        models_filepath = MODELS_FILE_PATH,
        prompt_template = PROMPT_FILE_PATH,
        files_to_ignore = IGNORE_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)
        self.models = read_yaml(models_filepath)
        self.prompt_template = read_yaml(prompt_template)
        self.files_to_ignore = read_yaml(files_to_ignore)
    
        

    
    def get_code_structure_config(self) -> CodeStructureConfig:
        config = self.config.code_structure
        prompt_teplate = self.prompt_template.generate_file_structure
        create_directories([config.root_dir])
        
        code_structure_config = CodeStructureConfig(
            root_dir=config.root_dir,
            load_struct_dir = config.load_struct_dir,
            load_ignored_dir = config.load_ignored_dir,
            gitignore_path = config.gitignore_path,
            code_dir = config.code_dir,
            sructure_file=config.sructure_file,
            models = self.models,
            structure_prompt = prompt_teplate.description,
            files_to_ignore = self.files_to_ignore.IGNORE_FILES
        ) 

        return code_structure_config
    

In [7]:
"""
A class for managing code structure and formatting.

Attributes:
    config (CodeStructureConfig): Configuration object for the CodeStructure class.

Functions:
    get_ignored_subdirs_from_gitignore: Reads ignored directories and extensions from a .gitignore file.
    explore_directory: Explores directories and files, excluding ignored ones.
    build_directory_structure: Builds the directory structure recursively.
    get_formated_strcuture: Formats the directory structure using an AI model.
"""

class CodeStructure:
    def __init__(self, config:CodeStructureConfig):
        """
        Initializes the CodeStructure object with the given configuration.

        Args:
            config (CodeStructureConfig): Configuration object for the CodeStructure class.
        """        
        self.config = config        

    def get_ignored_subdirs_from_gitignore(self):
        """
        Reads ignored directories and extensions from a .gitignore file.

        Returns:
            None
        """
        ignore_subdirs_files = []
        ignore_subdirs_files_extentions = []
        gitignore_path = self.config.gitignore_path
        ignored_fles_path = self.config.load_ignored_dir
        files_to_ignore = self.config.files_to_ignore
        try:
            with open(gitignore_path, "r") as file:
                for i,line in enumerate(file):
                    # Skip comments and empty lines
                    line = line.strip()
                    if not line or line.startswith("#"):
                        continue
                    else:
                        if line.startswith("*"):
                            if line.endswith("/"):
                                ignore_subdirs_files_extentions.append(line[1:-1])
                            else:
                                ignore_subdirs_files_extentions.append(line[1:])
                        else:
                            if line.endswith("/"):
                                ignore_subdirs_files.append(line[:-1])
                            elif line.endswith("*"):
                                ignore_subdirs_files.append(line[:-2])
                            
                            else:
                                ignore_subdirs_files.append(line)
            logger.info(f"Ignored files obtained from: {gitignore_path}")
        except FileNotFoundError:
            return logger.info(f"Warning: {gitignore_path} not found.")
        except Exception as e:
            return logger.info(f"Error while reading {gitignore_path}: {e}")

        self.ignored_subdirs = set(ignore_subdirs_files)
        self.ignored_subdirs.update(files_to_ignore)
        self.ignored_extensions = set(ignore_subdirs_files_extentions)
        try:
            all_ignored_files = self.ignored_subdirs.union(self.ignored_extensions)
            set_to_txt(Path(ignored_fles_path),all_ignored_files)
            return logger.info(f"Files to ignore in code structure loaded to  : {ignored_fles_path}")
        except Exception as e:
            return logger.info(f"Error while loading ignored files to {ignored_fles_path}: {e}")
        

        
    
    def explore_directory(self,directory):
        """
        Explores directories and files, excluding ignored ones.

        Args:
            directory (str): Path to the directory to explore.

        Returns:
            dict: A dictionary containing the list of directories and files.
        """
        directories = []
        files = []
        for item in os.listdir(directory):
            if item not in self.ignored_subdirs and not item.endswith(tuple(self.ignored_extensions)):
                item_path = os.path.join(directory, item)

                if os.path.isdir(item_path):
                    directories.append(item)
                else:
                    files.append(item)

        return {'Directories': directories, 'Files': files}

    def build_directory_structure(self):
        """
        Builds the directory structure recursively.

        Returns:
            dict: A dictionary representing the directory structure.
        """
        directory_structure = {}
        self.root_directory = self.config.code_dir
        dir_structure_file = self.config.load_struct_dir
        def explore_and_build(directory):
            dir_path = os.path.join(self.root_directory, directory)
            directory_structure[directory] = self.explore_directory(dir_path)
            
            for subdir in directory_structure[directory]['Directories']:
                explore_and_build(os.path.join(directory, subdir))
        
        explore_and_build(self.root_directory)
        
        
        save_json(Path(dir_structure_file), directory_structure)
        logger.info(f"Directory structure loaded to {dir_structure_file}")
        return  directory_structure

    def get_formated_strcuture(self, directory_structure):
        """
        Formats the directory structure using an AI model.

        Args:
            directory_structure (dict): The directory structure to format.

        Returns:
            None
        """
        try:
            formated_structure_file = self.config.sructure_file
            model = self.config.models.Llama3
            logger.info(f"Working with model: {model}")
            chat = ChatGroq(temperature=0, model_name=model)
            file_structure_prompt = self.config.structure_prompt
            prompt = ChatPromptTemplate.from_messages([("human", file_structure_prompt)])
            chain = prompt | chat
            fromated_structure = chain.invoke({"JSON_FILE": directory_structure})
            with open(formated_structure_file, "w") as f:
                f.write(fromated_structure.content)
            return logger.info(f"Formated file structure loaded to : {formated_structure_file}")
        except Exception as e:
            return logger.info(f"Error while formating structure: {e}")



In [8]:
config = ConfigurationManager()
code_structure_config = config.get_code_structure_config()
get_code_structure = CodeStructure(config=code_structure_config)
get_code_structure.get_ignored_subdirs_from_gitignore()
directory_structure = get_code_structure.build_directory_structure()
# get_code_structure.get_formated_strcuture(directory_structure)

[2024-05-03 10:14:05,145: INFO: common: yaml file: config\config.yaml loaded successfully:]
[2024-05-03 10:14:05,148: INFO: common: yaml file: schema.yaml loaded successfully:]
[2024-05-03 10:14:05,152: INFO: common: yaml file: params.yaml loaded successfully:]
[2024-05-03 10:14:05,154: INFO: common: yaml file: models.yaml loaded successfully:]
[2024-05-03 10:14:05,159: INFO: common: yaml file: prompt_template.yaml loaded successfully:]
[2024-05-03 10:14:05,162: INFO: common: yaml file: exhalation_ignore.yaml loaded successfully:]
[2024-05-03 10:14:05,163: INFO: common: Directory already exists: artifacts/app_schema:]
[2024-05-03 10:14:05,165: INFO: 2912658414: Ignored files obtained from: .gitignore:]
[2024-05-03 10:14:05,167: INFO: 2912658414: Files to ignore in code structure loaded to  : artifacts/app_schema/ignored_files.json:]
[2024-05-03 10:14:05,175: INFO: common: json file saved at: artifacts\app_schema\schema.json:]
[2024-05-03 10:14:05,176: INFO: 2912658414: Directory struct