In [2]:
# Input setup

loan_app_name = "loan_application_form.docx"
cr_name = "industrial_license.pdf"
il_name = "industrial_license.png"

In [3]:
from scripts.document_ext import Data_extractor

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data = Data_extractor().get(loan_app_name, cr_name, il_name)

test-1


convert /home/ubuntu/loan_analysis_app/data/loan_application_form.docx -> /home/ubuntu/loan_analysis_app/data/loan_application_form.pdf using filter : writer_pdf_Export
Overwriting: /home/ubuntu/loan_analysis_app/data/loan_application_form.pdf
D1
D2
D3
D4


In [None]:
import os
# If you're using the Python REPL tool, note it has moved to `langchain_experimental`
from langchain_experimental.tools import PythonREPLTool
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
import os
from dotenv import load_dotenv
load_dotenv()

# Now retrieve the key from the environment
openai_api_key = os.getenv("OPENAI_API_KEY")
# -------------------------------------------------------------------
# 1. System instructions (the multi-step prompt).
# -------------------------------------------------------------------
system_instructions = """\
You are a smart business analysis tool provided with 'Tables of loan application' and 'Market Data'.
You have access to a math tool for calculations when necessary.

Follow these steps:
1. identify Table '2.8 Expected sales volume* (for each targeted market) during the first five years of project life' from 'Tables of loan application'.
2. Extract the yearly data for all the products contaiing in the table in a well structured JSON format.
3. Calcule the sales growth rate for each product(In percentage) for each year and return it as output in json for each product anually.

No additional text or formatting is allowed beyond these JSON objects. The data for analysis is given as under;
"""

# -------------------------------------------------------------------
# 2. Human template
# -------------------------------------------------------------------
human_template = """\
'Tables of loan application':
{tables}
"""

# -------------------------------------------------------------------
# 3. LLM (OpenAI Chat)
# -------------------------------------------------------------------
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.0,
    openai_api_key=openai_api_key
)

# -------------------------------------------------------------------
# 4. Define the Python REPL tool (if needed for calculations).
# -------------------------------------------------------------------
python_repl_tool = PythonREPLTool()

# -------------------------------------------------------------------
# 5. Initialize the agent
# -------------------------------------------------------------------
tools = [python_repl_tool]

agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=True
)

# -------------------------------------------------------------------
# 6. A function to run the agent using a single string input.
# -------------------------------------------------------------------
def analyze_growth_discrepancies(tables: str, market_data: str) -> str:
    """
    Takes the text for 'Tables of loan application' and 'Market Data',
    and returns the JSON objects as a string (with no extra text)
    according to the instructions.
    """
    # Combine both system instructions and the user data into one string:
    final_prompt = (
        f"{system_instructions}\n"
        + human_template.format(tables=tables)
    )

    # Pass the final prompt to the agent via the `run` method
    # which expects either a single string or {'input': ...}
    response = agent.run({"input": final_prompt, "chat_history": []})
    return response

# -------------------------------------------------------------------
# 7. Example usage
# -------------------------------------------------------------------
if __name__ == "__main__":
    example_tables = data['loan_app_tables']
    example_market_data = data["market_data"]

    final_json_output = analyze_growth_discrepancies(example_tables, example_market_data)
    print(final_json_output)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": {
        "Paper Cups": {
            "Year 2024": 500,
            "Year 2025": 800,
            "Year 2026": 800,
            "Year 2027": 900,
            "Year 2028": 900
        },
        "Paper Bowels": {
            "Year 2024": 100,
            "Year 2025": 200,
            "Year 2026": 50,
            "Year 2027": 500,
            "Year 2028": 600
        }
    }
}
```[0m

[1m> Finished chain.[0m
{'Paper Cups': {'Year 2024': 500, 'Year 2025': 800, 'Year 2026': 800, 'Year 2027': 900, 'Year 2028': 900}, 'Paper Bowels': {'Year 2024': 100, 'Year 2025': 200, 'Year 2026': 50, 'Year 2027': 500, 'Year 2028': 600}}


In [23]:
final_json_output

{'Paper Cups': {'Year 2024': 500,
  'Year 2025': 800,
  'Year 2026': 800,
  'Year 2027': 900,
  'Year 2028': 900},
 'Paper Bowels': {'Year 2024': 100,
  'Year 2025': 200,
  'Year 2026': 50,
  'Year 2027': 500,
  'Year 2028': 600}}

In [21]:
import os
import json
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_experimental.tools import PythonREPLTool
from langchain.agents import initialize_agent, AgentType

class ai_tool:
    def __init__(self, model="gpt-4o", temperature=0):
        """
        Initialize the SmartAnalysisAgent with model configuration and validate OpenAI API key.
        """
        self._load_and_validate_api_key()
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.math_agent = initialize_agent(
            tools=[PythonREPLTool()],
            llm=self.llm,
            agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
            # handle_parse_errors=True,
            verbose=True
        )

    @staticmethod
    def _load_and_validate_api_key():
        """
        Load and validate the OpenAI API key from the .env file.
        """
        load_dotenv()
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OpenAI API key not found. Please set it in your .env file.")
        os.environ["OPENAI_API_KEY"] = api_key

    def analyse(self, inputs: dict):
        """
        Run an LLM-based analysis on tables provided in markdown format.
        
        Args:
            inputs (dict): A dictionary containing:
                - "tables": List of tables in markdown format as strings.
                - "analysis_guidelines": Guidelines for the analysis.
                - "output_guidelines": Guidelines for the output formatting.

        Returns:
            str: Analysis result from the LLM.
        """
        # Validate input keys
        step_1_keys = {"tables", "prompt", "industrial_licence", "commercial_registration", "market_data"}
        step_2_keys = {"compile_prompt", "input_payload"}

        if step_1_keys.issubset(inputs.keys()):
            if inputs["prompt"]["task_number"]=="2":
                # Combine inputs into a structured prompt
                prompt_template = """
                {prompt}

                **Tables of loan application**:
                {tables}

                **Commercial Registration**:
                {commercial_registration}

                **Industrial License**:
                {industrial_licence}
                """
                prompt_input = ["tables", "prompt", "industrial_licence", "commercial_registration"]
                if isinstance(inputs["tables"], dict):
                    tables = json.dumps(inputs["tables"]['1. PROJECT DATA'])
                else:
                    tables = inputs["tables"]

                llm_input = {
                "prompt": inputs["prompt"]["prompt"],
                "tables": tables,
                "commercial_registration": "\n\n"+ inputs["commercial_registration"],
                "industrial_licence": "\n\n"+ inputs["industrial_licence"]
                }

            elif inputs["prompt"]["task_number"]=="4":
                    # Combine inputs into a structured prompt
                    prompt_template = """
                    {prompt}

                    **Tables of loan application**:
                    {tables}
                    """
                    prompt_input = ["tables", "prompt"]
                    if isinstance(inputs["tables"], dict):
                        tables = json.dumps({x:y for x,y in inputs["tables"].items() if x in ['1. PROJECT DATA', '2. MARKETING INFORMATION', "3. TECHNICAL INFORMATION"]})
                    else:
                         tables = inputs["tables"]
                    llm_input = {
                    "prompt": inputs["prompt"]["prompt"],
                    "tables": tables
                    }

            elif inputs["prompt"]["task_number"]=="5":
                    # Combine inputs into a structured prompt
                    prompt_template = """
                    {prompt}

                    **Tables of loan application**:
                    {tables}
                    """
                    prompt_input = ["tables", "prompt"]
                    if isinstance(inputs["tables"], dict):
                        tables = json.dumps({x:y for x,y in inputs["tables"].items() if x in ['2. MARKETING INFORMATION']})
                    else:
                         tables = inputs["tables"]
                    llm_input = {
                    "prompt": inputs["prompt"]["prompt"],
                    "tables": tables,
                    }

            elif inputs["prompt"]["task_number"]=="6":
                # Combine inputs into a structured prompt
                prompt_template = """
                {prompt}

                **Tables**:
                {tables}

                **Market Data**:
                {market_data}
                """
                prompt_input = ["tables", "prompt", "market_data"]
                if isinstance(inputs["tables"], dict):
                    tables = json.dumps({x:y for x,y in inputs["tables"].items() if x in ['1. PROJECT DATA', '2. MARKETING INFORMATION', "3. TECHNICAL INFORMATION"]})
                else:
                     tables = inputs["tables"]
                llm_input = {
                "prompt": inputs["prompt"]["prompt"],
                "tables": tables,
                "market_data": "\n\n"+ inputs["market_data"]
                }
                
            else:
                # Combine inputs into a structured prompt
                prompt_template = """
                {prompt}

                **Tables**:
                {tables}
                """
                prompt_input = ["tables", "prompt"]
                if isinstance(inputs["tables"], dict):
                     tables = json.dumps(inputs["tables"])
                else:
                     tables = inputs["tables"]
                
                llm_input = {
                "prompt": inputs["prompt"]["prompt"],
                "tables": tables,
                }

        elif step_2_keys.issubset(inputs.keys()):
                # Combine inputs into a structured prompt
                prompt_template = """
                {prompt}

                **Input Payload**:
                {input_payload}
                """
                prompt_input = ["input_payload", "prompt"]
                llm_input = {
                "prompt": inputs["compile_prompt"],
                "input_payload": "\n\n".join(inputs["input_payload"]),
                }

        elif not step_1_keys.issubset(inputs.keys()):
            raise ValueError(f"Input dictionary must contain keys: {step_1_keys}")
        else:
            raise ValueError(f"Input dictionary must contain keys: {step_1_keys}")
        
        prompt = PromptTemplate(
            input_variables=prompt_input,
            template=prompt_template.strip()
        )
        if inputs["prompt"]["task_number"]=="6":
             prompt_formatted = prompt.format(prompt=llm_input["prompt"],
                                               tables=llm_input["tables"],
                                                 market_data=llm_input["market_data"])
             response = self.math_agent.run({"input": prompt_formatted, "chat_history": []})
             return response
        else:
            chain = prompt | self.llm
            result = chain.invoke(input=llm_input)
            return result.content

In [22]:
ai_agent = ai_tool()
l_appt = data["loan_app_tables"]
il = data["industrial_licence"]
cr = data["commercial_registration"]
md = data["market_data"]

print("Data Extraction Completed!")

input = prompts
all_results = []
for index, prompt in enumerate([input[5]]):
    inputs = {
    "prompt": prompt,
    "tables": l_appt,
    "industrial_licence": il,
    "commercial_registration": cr,
    "market_data": json.dumps(md, indent=4)
    }

    analysis_result = ai_agent.analyse(inputs)
    all_results.append(analysis_result)


Data Extraction Completed!


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": [
        {
            "Perspective": "Market",
            "Request": "Please provide the basis and reasoning for your projected 0% growth rate for Paper Bowels in 2026 as mentioned in Table 2.8, while the market shows a 4% growth rate."
        },
        {
            "Perspective": "Market",
            "Request": "Please revise your sales estimates for Paper Bowels in 2026 to include an appropriate growth percentage in Table 2.8, as the market shows a 4% growth rate."
        }
    ]
}
```[0m

[1m> Finished chain.[0m


In [23]:
all_results[0]

[{'Perspective': 'Market',
  'Request': 'Please provide the basis and reasoning for your projected 0% growth rate for Paper Bowels in 2026 as mentioned in Table 2.8, while the market shows a 4% growth rate.'},
 {'Perspective': 'Market',
  'Request': 'Please revise your sales estimates for Paper Bowels in 2026 to include an appropriate growth percentage in Table 2.8, as the market shows a 4% growth rate.'}]