docs: graphs and helpers docstrings

VinciGit00 · May 1, 2024 · 0631985 · 0631985
1 parent 18c20eb
commit 0631985
Show file tree

Hide file tree

Showing 14 changed files with 305 additions and 81 deletions.
diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py
@@ -1,5 +1,5 @@
 """ 
-Module for making the graph building
+GraphBuilder Module
 """
 
 from langchain_core.prompts import ChatPromptTemplate

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
@@ -1,6 +1,7 @@
 """ 
 __init__.py file for graphs folder
 """
+
 from .base_graph import BaseGraph
 from .smart_scraper_graph import SmartScraperGraph
 from .speech_graph import SpeechGraph

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -1,6 +1,7 @@
 """
-Module having abstract class for creating all the graphs
+AbstractGraph Module
 """
+
 from abc import ABC, abstractmethod
 from typing import Optional
 from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
@@ -9,13 +10,34 @@
 
 class AbstractGraph(ABC):
     """
-    Abstract class representing a generic graph-based tool.
+    Scaffolding class for creating a graph representation and executing it.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        llm_model: An instance of a language model client, configured for generating answers.
+        embedder_model: An instance of an embedding model client, configured for generating embeddings.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+
+    Args:
+        prompt (str): The prompt for the graph.
+        config (dict): Configuration parameters for the graph.
+        source (str, optional): The source of the graph.
+
+    Example:
+        >>> class MyGraph(AbstractGraph):
+        ...     def _create_graph(self):
+        ...         # Implementation of graph creation here
+        ...         return graph
+        ...
+        >>> my_graph = MyGraph("Example Graph", {"llm": {"model": "gpt-3.5-turbo"}}, "example_source")
+        >>> result = my_graph.run()
     """
 
     def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
-        """
-        Initializes the AbstractGraph with a prompt, file source, and configuration.
-        """
+
         self.prompt = prompt
         self.source = source
         self.config = config
@@ -32,10 +54,20 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
         self.final_state = None
         self.execution_info = None
 
-    def _create_llm(self, llm_config: dict):
+    def _create_llm(self, llm_config: dict) -> object:
         """
-        Creates an instance of the language model (OpenAI or Gemini) based on configuration.
+        Create a large language model instance based on the configuration provided.
+
+        Args:
+            llm_config (dict): Configuration parameters for the language model.
+
+        Returns:
+            object: An instance of the language model client.
+
+        Raises:
+            KeyError: If the model is not supported.
         """
+
         llm_defaults = {
             "temperature": 0,
             "streaming": False
@@ -104,16 +136,27 @@ def _create_llm(self, llm_config: dict):
 
     def get_state(self, key=None) -> dict:
         """""
-        Obtain the current state
+        Get the final state of the graph.
+
+        Args:
+            key (str, optional): The key of the final state to retrieve.
+
+        Returns:
+            dict: The final state of the graph.
         """
+
         if key is not None:
             return self.final_state[key]
         return self.final_state
 
     def get_execution_info(self):
         """
         Returns the execution information of the graph.
+
+        Returns:
+            dict: The execution information of the graph.
         """
+
         return self.execution_info
 
     @abstractmethod

diff --git a/scrapegraphai/graphs/base_graph.py b/scrapegraphai/graphs/base_graph.py
@@ -1,6 +1,7 @@
 """
-Module for creating the base graphs
- """
+BaseGraph Module
+"""
+
 import time
 import warnings
 from langchain_community.callbacks import get_openai_callback
@@ -16,21 +17,33 @@ class BaseGraph:
                       key-value pair corresponds to the from-node and to-node relationship.
         entry_point (str): The name of the entry point node from which the graph execution begins.
 
-    Methods:
-        execute(initial_state): Executes the graph's nodes starting from the entry point and
-                                traverses the graph based on the provided initial state.
-
     Args:
         nodes (iterable): An iterable of node instances that will be part of the graph.
         edges (iterable): An iterable of tuples where each tuple represents a directed edge
                           in the graph, defined by a pair of nodes (from_node, to_node).
         entry_point (BaseNode): The node instance that represents the entry point of the graph.
+
+    Raises:
+        Warning: If the entry point node is not the first node in the list.
+
+    Example:
+        >>> BaseGraph(
+        ...    nodes=[
+        ...        fetch_node,
+        ...        parse_node,
+        ...        rag_node,
+        ...        generate_answer_node,
+        ...    ],
+        ...    edges=[
+        ...        (fetch_node, parse_node),
+        ...        (parse_node, rag_node),
+        ...        (rag_node, generate_answer_node)
+        ...    ],
+        ...    entry_point=fetch_node
+        ... )
     """
 
     def __init__(self, nodes: list, edges: list, entry_point: str):
-        """
-        Initializes the graph with nodes, edges, and the entry point.
-        """
 
         self.nodes = nodes
         self.edges = self._create_edges({e for e in edges})
@@ -51,6 +64,7 @@ def _create_edges(self, edges: list) -> dict:
         Returns:
             dict: A dictionary of edges with the from-node as keys and to-node as values.
         """
+
         edge_dict = {}
         for from_node, to_node in edges:
             edge_dict[from_node.node_name] = to_node.node_name
@@ -66,8 +80,10 @@ def execute(self, initial_state: dict) -> Tuple[dict, list]:
             initial_state (dict): The initial state to pass to the entry point node.
 
         Returns:
-            dict: The state after execution has completed, which may have been altered by the nodes.
+            Tuple[dict, list]: A tuple containing the final state of the execution and a list
+                               of execution information for each node.
         """
+
         current_node_name = self.nodes[0]
         state = initial_state
 

diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py
@@ -1,6 +1,7 @@
 """
-Module for creating the smart scraper
+JSONScraperGraph Module
 """
+
 from .base_graph import BaseGraph
 from ..nodes import (
     FetchNode,
@@ -13,22 +14,44 @@
 
 class JSONScraperGraph(AbstractGraph):
     """
-    SmartScraper is a comprehensive web scraping tool that automates the process of extracting
-    information from web pages using a natural language model to interpret and answer prompts.
+    JSONScraperGraph defines a scraping pipeline for JSON files.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        llm_model: An instance of a language model client, configured for generating answers.
+        embedder_model: An instance of an embedding model client, configured for generating embeddings.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+
+    Args:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+
+    Example:
+        >>> json_scraper = JSONScraperGraph(
+        ...     "List me all the attractions in Chioggia.",
+        ...     "data/chioggia.json",
+        ...     {"llm": {"model": "gpt-3.5-turbo"}}
+        ... )
+        >>> result = json_scraper.run()
     """
 
     def __init__(self, prompt: str, source: str, config: dict):
-        """
-        Initializes the JsonScraperGraph with a prompt, source, and configuration.
-        """
         super().__init__(prompt, config, source)
 
         self.input_key = "json" if source.endswith("json") else "json_dir"
 
-    def _create_graph(self):
+    def _create_graph(self) -> BaseGraph:
         """
         Creates the graph of nodes representing the workflow for web scraping.
+        
+        Returns:
+            BaseGraph: A graph instance representing the web scraping workflow.
         """
+
         fetch_node = FetchNode(
             input="json_dir",
             output=["doc"],
@@ -81,7 +104,11 @@ def _create_graph(self):
     def run(self) -> str:
         """
         Executes the web scraping process and returns the answer to the prompt.
+
+        Returns:
+            str: The answer to the prompt.
         """
+
         inputs = {"user_prompt": self.prompt, self.input_key: self.source}
         self.final_state, self.execution_info = self.graph.execute(inputs)
 

diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py
@@ -1,6 +1,7 @@
 """
-Module for creating the smart scraper
+ScriptCreatorGraph Module
 """
+
 from .base_graph import BaseGraph
 from ..nodes import (
     FetchNode,
@@ -13,24 +14,47 @@
 
 class ScriptCreatorGraph(AbstractGraph):
     """
-    SmartScraper is a comprehensive web scraping tool that automates the process of extracting
-    information from web pages using a natural language model to interpret and answer prompts.
+    ScriptCreatorGraph defines a scraping pipeline for generating web scraping scripts.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        llm_model: An instance of a language model client, configured for generating answers.
+        embedder_model: An instance of an embedding model client, configured for generating embeddings.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+        model_token (int): The token limit for the language model.
+        library (str): The library used for web scraping.
+
+    Args:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+
+    Example:
+        >>> script_creator = ScriptCreatorGraph(
+        ...     "List me all the attractions in Chioggia.",
+        ...     "https://en.wikipedia.org/wiki/Chioggia",
+        ...     {"llm": {"model": "gpt-3.5-turbo"}}
+        ... )
+        >>> result = script_creator.run()
     """
 
     def __init__(self, prompt: str, source: str, config: dict):
-        """
-        Initializes the ScriptCreatorGraph with a prompt, source, and configuration.
-        """
-        self.library = config['library']
-
         super().__init__(prompt, config, source)
 
         self.input_key = "url" if source.startswith("http") else "local_dir"
+        self.library = config['library']
 
-    def _create_graph(self):
+    def _create_graph(self) -> BaseGraph:
         """
         Creates the graph of nodes representing the workflow for web scraping.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping workflow.
         """
+
         fetch_node = FetchNode(
             input="url | local_dir",
             output=["doc"],
@@ -76,7 +100,11 @@ def _create_graph(self):
     def run(self) -> str:
         """
         Executes the web scraping process and returns the answer to the prompt.
+
+        Returns:
+            str: The answer to the prompt.
         """
+
         inputs = {"user_prompt": self.prompt, self.input_key: self.source}
         self.final_state, self.execution_info = self.graph.execute(inputs)
 

diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py
@@ -1,6 +1,7 @@
 """ 
-Module for making the search on the intenet
+SearchGraph Module
 """
+
 from .base_graph import BaseGraph
 from ..nodes import (
     SearchInternetNode,
@@ -14,13 +15,37 @@
 
 class SearchGraph(AbstractGraph):
     """ 
-    Module for searching info on the internet
+    SearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
+    It only requires a user prompt to search the internet and generate an answer.
+
+    Attributes:
+        prompt (str): The user prompt to search the internet.
+        llm_model (dict): The configuration for the language model.
+        embedder_model (dict): The configuration for the embedder model.
+        headless (bool): A flag to run the browser in headless mode.
+        verbose (bool): A flag to display the execution information.
+        model_token (int): The token limit for the language model.
+
+    Args:
+        prompt (str): The user prompt to search the internet.
+        config (dict): Configuration parameters for the graph.
+
+    Example:
+        >>> search_graph = SearchGraph(
+        ...     "What is Chioggia famous for?",
+        ...     {"llm": {"model": "gpt-3.5-turbo"}}
+        ... )
+        >>> result = search_graph.run()
     """
 
-    def _create_graph(self):
+    def _create_graph(self) -> BaseGraph:
         """
         Creates the graph of nodes representing the workflow for web scraping and searching.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
+
         search_internet_node = SearchInternetNode(
             input="user_prompt",
             output=["url"],
@@ -83,7 +108,11 @@ def _create_graph(self):
     def run(self) -> str:
         """
         Executes the web scraping and searching process.
+        
+        Returns:
+            str: The answer to the prompt.
         """
+
         inputs = {"user_prompt": self.prompt}
         self.final_state, self.execution_info = self.graph.execute(inputs)