In [1]:
pip install langchain openai azure-ai-formrecognizer pdfplumber tqdm

Collecting azure-ai-formrecognizer
  Using cached azure_ai_formrecognizer-3.3.3-py3-none-any.whl.metadata (64 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
Collecting msrest>=0.6.21 (from azure-ai-formrecognizer)
  Using cached msrest-0.7.1-py3-none-any.whl.metadata (21 kB)
Collecting azure-common>=1.1 (from azure-ai-formrecognizer)
  Using cached azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting Pillow>=9.1 (from pdfplumber)
  Downloading pillow-11.0.0-cp311-cp311-win_amd64.whl.metadata (9.3 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl.metadata (48 kB)
Collecting cryptography>=36.0.0 (from pdfminer.six==20231228->pdfplumber)
  Downloading cryptography-43.0.3-cp39-abi3-win_amd64.whl.metadata (5.4 kB)
Collecting isodate>=0.6.0 (from msrest>=0

In [11]:
import os
import pdfplumber
from langchain.llms import AzureOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [12]:
# 1. Set Azure OpenAI environment variables
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-07-01-preview"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://dskumar.openai.azure.com/"
os.environ["AZURE_OPENAI_API_KEY"] = "62855d6dd08945819bf83aee0c104127"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "DskumarDeployment"
os.environ["OPENAI_TYPE"] = "Azure"
os.environ["LLM_MODEL"] = "gpt-35-turbo-16k"
os.environ["LLM_EMBEDDING_MODEL"] = "dskumar-text-embedding-ada-002"

In [13]:
# 2. Define the PDF extraction function
def extract_text_from_pdf(pdf_path):
    full_text = ""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                full_text += page.extract_text()
        return full_text
    except Exception as e:
        print(f"Error extracting PDF content: {e}")
        return None

In [14]:
# 3. Initialize the Azure OpenAI LLM
llm = AzureOpenAI(
    deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
    model_name=os.environ["LLM_MODEL"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_base=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature=0.5
)

                api_base was transferred to model_kwargs.
                Please confirm that api_base is what you intended.
  exec(code_obj, self.user_global_ns, self.user_ns)


In [15]:
# 4. Define the prompts
prompt1 = PromptTemplate(
    input_variables=["content"],
    template="Summarize the following document: {content}"
)

prompt2 = PromptTemplate(
    input_variables=["content"],
    template="List all the steps mentioned in the document in sequential order: {content}"
)

prompt3 = PromptTemplate(
    input_variables=["steps", "technology"],
    template="For the following steps: {steps}, suggest feasible solutions using the given technology: {technology}."
)

prompt4 = PromptTemplate(
    input_variables=["steps", "developer_level"],
    template="Estimate the time required to complete the following steps: {steps}. Assume the developer is {developer_level} level."
)

prompt5 = PromptTemplate(
    input_variables=["steps", "time_estimate"],
    template="Given the steps: {steps} and estimated time: {time_estimate}, provide the estimated cost for the automation."
)


In [16]:
# 5. Define the function to run the analysis
def run_automation_analysis(content, technology, developer_level):
    # Chain 1: Summarize the content
    chain1 = LLMChain(llm=llm, prompt=prompt1)
    summary = chain1.run(content=content)

    # Chain 2: List the steps
    chain2 = LLMChain(llm=llm, prompt=prompt2)
    steps = chain2.run(content=content)

    # Chain 3: Feasible solutions
    chain3 = LLMChain(llm=llm, prompt=prompt3)
    feasible_solution = chain3.run(steps=steps, technology=technology)

    # Chain 4: Time estimation
    chain4 = LLMChain(llm=llm, prompt=prompt4)
    time_estimate = chain4.run(steps=steps, developer_level=developer_level)

    # Chain 5: Cost estimation
    chain5 = LLMChain(llm=llm, prompt=prompt5)
    cost_estimate = chain5.run(steps=steps, time_estimate=time_estimate)

    # Final output
    return {
        "summary": summary,
        "steps": steps,
        "feasible_solution": feasible_solution,
        "time_estimate": time_estimate,
        "cost_estimate": cost_estimate
    }


In [18]:
# 6. Example usage
pdf_path = r"C:\Users\817840\OneDrive - Cognizant\Documents\GitHub\ML-AI\Codes\Projects&POC\PGE - Self service chatbot\MRBR followups _Sweden_ PDD_V1.pdf"
content = extract_text_from_pdf(pdf_path)

if content:
    result = run_automation_analysis(
        content=content,
        technology="UiPath",
        developer_level="intermediate"
    )

    # Print the results
    for key, value in result.items():
        print(f"{key.upper()}:\n{value}\n")
else:
    print("Failed to extract content from the PDF.")

  chain1 = LLMChain(llm=llm, prompt=prompt1)
  summary = chain1.run(content=content)


TypeError: Completions.create() got an unexpected keyword argument 'api_base'