In [4]:
import modelbit
import nest_asyncio
import tempfile
from llama_parse import LlamaParse  # Ensure this is the correct module import
import os
import base64

# Login to Modelbit
mb = modelbit.login()

# Apply `nest_asyncio` for compatibility with async frameworks
nest_asyncio.apply()

# Securely get the Llama Cloud API Key
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY", "llx-I2ak5Fd5jXCnZg95khu2RqUDTNSBNmVbqYmLdwKNkkBvrVWB")

# Define the fields to be extracted
fields = [
    "Contract Number", "Amount", "Award Date", "Tender Title", "Eval Completion Date", 
    "Notification Of Award Date", "Sign Date", "Start Date", "End Date", 
    "Agpo Certificate Number", "Awarded Agpo Group Id", "Created By", "Terminated", 
    "Financial Year", "Quarter", "Tender Ref.", "PE Name", "Supplier Name", 
    "No. of B.O.I", "Created At"
]

# Define parsing instructions to guide LlamaParse to extract the table with specific fields
parsing_instructions = f"""
Extract a structured table with the following fields:
{', '.join(fields)}.

Each field should be parsed as an individual column. 
Format date fields to YYYY-MM-DD where possible.
For "Amount", extract numeric values only.
Return the parsed data as a structured table or JSON.
"""

# Define deployment settings
deploy_settings = {
    "env": "production",
    "model_name": "parser_model",
    "model_description": "A parser model for extracting information from PDF files and converting to Markdown",
    "compute": {
        "cpu": 2,
        "gpu": 1,
        "memory": 8
    },
    "batch_size": 32,
    "input_data_format": "pdf",
    "output_data_format": "markdown",
    "model_versioning": True
}

# Function to process PDF content with LlamaParse
def llama(file_contents):
    with tempfile.TemporaryDirectory() as tmp_dir:
        file_path = f"{tmp_dir}/uploaded_file.pdf"

        file_contents = base64.b64decode(file_contents)
        
        # Write the received file contents as bytes to a temporary PDF file
        with open(file_path, "wb") as f:
            f.write(file_contents)

        # Process the file with LlamaParse
        try:
            # Initialize the parser with specified instructions and API key
            parser = LlamaParse(
                api_key=LLAMA_CLOUD_API_KEY,
                result_type="markdown",  # Use "table" for structured data extraction
                parsing_instructions=parsing_instructions
            )
            
            # Load and parse the data
            result = parser.load_data(file_path)
            
            # Check if the result is a list and process it accordingly
            if isinstance(result, list):
                # Join all table rows into a single markdown string
                combined_text = "\n".join([item.text for item in result if hasattr(item, 'text')])
                return combined_text if combined_text else "No text content found in document."
            else:
                # If result is not a list, return its text directly if it has any
                return result.text if hasattr(result, 'text') else "No text content found in document."

        except Exception as e:
            print(f"Error processing file with LlamaParse: {e}")
            return None

# Example usage of the function
try:
    # Simulate reading a PDF file as bytes
    with open("/home/clencyc/Dev/Llama_Parse/Package_1_Lot_2_Kulamawe_Modogashe_Consultancy_Services_Beneficial.pdf", "rb") as file:
        file_contents = file.read()

    # Pass the file contents directly to the function
    document_with_instruction = llama(file_contents)

    if document_with_instruction:
        # Optionally save the output
        output_path = "/tmp/output.md"
        with open(output_path, "w") as output_file:
            output_file.write(document_with_instruction)
        print(f"Document processed successfully. Output saved to {output_path}")
    else:
        print("Document processing failed or no data found.")

    # Deploy the model using Modelbit
    mb.deploy(llama)
    print("Model deployed successfully with Modelbit")

except Exception as e:
    print(f"Error during file processing or deployment: {e}")
 

Started parsing the file under job_id 19b9343b-f1f5-4184-b9f1-3d264b000b18
Document processed successfully. Output saved to /tmp/output.md


Model deployed successfully with Modelbit
