In [2]:
import os, re, json, base64
from pathlib import Path
import fitz  # PyMuPDF
from typing import List, Optional, Dict, Any

from mistralai import Mistral

def _create_mistral_client() -> Mistral:
    api_key = os.getenv("MISTRAL_API_KEY")
    if not api_key:
        raise EnvironmentError("Defina MISTRAL_API_KEY en el entorno")
    return Mistral(api_key=api_key, timeout_ms=300000)

client = _create_mistral_client()

import base64

def encode_pdf(pdf_path):
    """Encode the pdf to base64."""
    try:
        with open(pdf_path, "rb") as pdf_file:
            return base64.b64encode(pdf_file.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Error: The file {pdf_path} was not found.")
        return None
    except Exception as e:  # Added general exception handling
        print(f"Error: {e}")
        return None


from pydantic import BaseModel, Field
from enum import Enum
from mistralai.extra import response_format_from_pydantic_model

class ImageType(str, Enum):
    GRAPH = "graph"
    TEXT = "text"
    TABLE = "table"
    IMAGE = "image"

class Image(BaseModel):
    image_type: ImageType = Field(..., description="The type of the image. Must be one of 'graph', 'text', 'table' or 'image'.")
    description: str = Field(..., description="A description of the image.")

class Document(BaseModel):
    language: str = Field(..., description="The language of the document in ISO 639-1 code format (e.g., 'en', 'fr').")
    summary: str = Field(..., description="A summary of the document.")
    authors: list[str] = Field(..., description="A list of authors who contributed to the document.")

from src.config.models.set_10 import Set10ExtractionModel

def process_ocr_mistral(pdf_path, extraction_model):
    # Getting the base64 string
    base64_pdf = encode_pdf(pdf_path)

    # Call the OCR API
    pdf_response = client.ocr.process(
        model="mistral-ocr-latest",
        pages=list(range(8)), # Document Annotations has a limit of 8 pages, we recommend spliting your documents when using it; bbox annotations does not have the same limit
        document={
            "type": "document_url",
            "document_url": f"data:application/pdf;base64,{base64_pdf}"
        },
        document_annotation_format=response_format_from_pydantic_model(extraction_model),
        include_image_base64=False # We are not interested on retrieving the bbox images in this example, only their annotations
    )

    # Convert response to JSON format
    return pdf_response


In [5]:
# 1) Rasteriza
pdf_path_10 = "C:/Users/Ivan/Documents/VALORACION/ESTABILIDAD_FASE_MOVIL/DATA_CROMATOGRAFICA_ESTABILIDAD_FASE_MOVIL/ESTABILIDAD FM T1 V1.pdf"
pdf_path_8 = "C:/Users/Ivan/Documents/VALORACION/ESTABILIDAD_SOLUCIONES/REPORTE_ESTABILIDAD_SOLUCIONES/REPORTE_LIMS_ESTABILIDAD_SOLUCIONES.pdf"
pdf_path_7 = "C:/Users/Ivan/Documents/VALORACION/PRECISION_INTERMEDIA/REPORTE_PRECISION_INTERMEDIA/REPORTE_LIMS_PRECISION_INTERMEDIA.pdf"

from src.config.models.set_10 import Set10ExtractionModel
from src.config.models.set_8 import Set8ExtractionModel
from src.config.models.set_7 import Set7ExtractionModel

ocr_result = process_ocr_mistral(pdf_path_7, Set7ExtractionModel)


print(json.loads(ocr_result.document_annotations))


AttributeError: 'OCRResponse' object has no attribute 'document_annotations'

In [1]:

import json
from src.tools.linealidad_tool import LinearidadTool
import os

params = {
    "concentracion": [0.011979992, 0.011979992, 0.011979992, 0.017969988, 0.017969988, 0.017969988, 0.023959984, 0.023959984, 0.023959984, 0.02994998, 0.02994998, 0.02994998, 0.035939976, 0.035939976, 0.035939976],
    "area_pico": [7.384313887, 7.389636488, 7.405480641, 11.11360372, 11.1339833, 11.15028289, 14.75781586, 14.76604324, 14.76306989, 18.44529736, 18.4664459, 18.47933245, 22.15107203, 22.12680985, 22.14509776],
    "devolver_base64": False,
    "nombre_analito": "Hidrocodona"
}

result = LinearidadTool()._run(**params)
print(result)


{
  "regresion_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_0ebcfdb2-41ad-4c2d-903c-e2a6b7a25663.png",
  "residuales_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_263cf5e9-1238-4cd6-bd52-2a10150f4bb2.png"
}


In [2]:

from concurrent.futures import ThreadPoolExecutor
from src.tools.linealidad_tool import LinearidadTool

input1 = dict(
    concentracion=[0.390520944,0.390520944,0.390520944,0.585781416,0.585781416,0.585781416,0.781041888,0.781041888,0.781041888,0.97630236,0.97630236,0.97630236,1.171562832,1.171562832,1.171562832],
    area_pico=[28.34025136,28.33240449,28.3559417,42.12384439,42.1629014,42.14551766,55.61232423,55.59917092,55.6129532,69.10763656,69.12511413,69.15910298,84.21662857,84.22366482,84.14363356],
    devolver_base64=False,
    nombre_analito="Acetaminofen"
)

input2 = dict(
    concentracion=[0.011979992,0.011979992,0.011979992,0.017969988,0.017969988,0.017969988,0.023959984,0.023959984,0.023959984,0.02994998,0.02994998,0.02994998,0.035939976,0.035939976,0.035939976],
    area_pico=[7.384313887,7.389636488,7.405480641,11.11360372,11.1339833,11.15028289,14.75781586,14.76604324,14.76306989,18.44529736,18.4664459,18.47933245,22.15107203,22.12680985,22.14509776],
    devolver_base64=False,
    nombre_analito="Hidrocodona"
)

tool = LinearidadTool()

def call(params):
    return tool._run(**params)

with ThreadPoolExecutor(max_workers=2) as executor:
    futures = [executor.submit(call, input1), executor.submit(call, input2)]

for f in futures:
    print(f.result())


{
  "regresion_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_dc42a3ef-18b9-4e14-af6a-67c088ca78e7.png",
  "residuales_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_ed87be9a-489c-4ecc-84e0-df518b7496fa.png"
}
{
  "regresion_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_54e54f6a-df93-44ff-a0a6-fbf1b9085761.png",
  "residuales_png_path": "c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_08854a59-f3ba-441e-a953-82faf8558072.png"
}


In [3]:

from concurrent.futures import ThreadPoolExecutor
from src.tools.linealidad_tool import LinearidadTool
import json

input1 = dict(
    concentracion=[0.390520944,0.390520944,0.390520944,0.585781416,0.585781416,0.585781416,0.781041888,0.781041888,0.781041888,0.97630236,0.97630236,0.97630236,1.171562832,1.171562832,1.171562832],
    area_pico=[28.34025136,28.33240449,28.3559417,42.12384439,42.1629014,42.14551766,55.61232423,55.59917092,55.6129532,69.10763656,69.12511413,69.15910298,84.21662857,84.22366482,84.14363356],
    devolver_base64=False,
    nombre_analito="Acetaminofen"
)

input2 = dict(
    concentracion=[0.011979992,0.011979992,0.011979992,0.017969988,0.017969988,0.017969988,0.023959984,0.023959984,0.023959984,0.02994998,0.02994998,0.02994998,0.035939976,0.035939976,0.035939976],
    area_pico=[7.384313887,7.389636488,7.405480641,11.11360372,11.1339833,11.15028289,14.75781586,14.76604324,14.76306989,18.44529736,18.4664459,18.47933245,22.15107203,22.12680985,22.14509776],
    devolver_base64=False,
    nombre_analito="Hidrocodona"
)

def call(params):
    tool = LinearidadTool()
    return json.loads(tool._run(**params))

with ThreadPoolExecutor(max_workers=2) as executor:
    futures = [executor.submit(call, input1), executor.submit(call, input2)]

for idx, f in enumerate(futures, 1):
    print(f"Resultado {idx}:", f.result())


Resultado 1: {'regresion_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_7069c3b2-19b9-4021-862c-db6494bddaf8.png', 'residuales_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_e20428d8-c023-4fde-9f52-5a604e662481.png'}
Resultado 2: {'regresion_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_f3cd5807-b8ae-4b7b-a887-d16653406c93.png', 'residuales_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_14943874-7be2-4b2b-bfc5-33f749c4e504.png'}


In [5]:

from src.tools.linealidad_tool import LinearidadTool
import json

inputs = [
    dict(
        concentracion=[0.390520944,0.390520944,0.390520944,0.585781416,0.585781416,0.585781416,0.781041888,0.781041888,0.781041888,0.97630236,0.97630236,0.97630236,1.171562832,1.171562832,1.171562832],
        area_pico=[28.34025136,28.33240449,28.3559417,42.12384439,42.1629014,42.14551766,55.61232423,55.59917092,55.6129532,69.10763656,69.12511413,69.15910298,84.21662857,84.22366482,84.14363356],
        devolver_base64=False,
        nombre_analito="Acetaminofen"
    ),
    dict(
        concentracion=[0.011979992,0.011979992,0.011979992,0.017969988,0.017969988,0.017969988,0.023959984,0.023959984,0.023959984,0.02994998,0.02994998,0.02994998,0.035939976,0.035939976,0.035939976],
        area_pico=[7.384313887,7.389636488,7.405480641,11.11360372,11.1339833,11.15028289,14.75781586,14.76604324,14.76306989,18.44529736,18.4664459,18.47933245,22.15107203,22.12680985,22.14509776],
        devolver_base64=False,
        nombre_analito="Hidrocodona"
    )
]

tool = LinearidadTool()
for idx, params in enumerate(inputs, 1):
    result = json.loads(tool._run(**params))
    print(f"Resultado {idx}: {result}")


Resultado 1: {'regresion_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_2be1ec93-5311-415a-81d5-4cd9bd508e3b.png', 'residuales_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_352be113-cb50-43fb-9889-d7e1091c4230.png'}
Resultado 2: {'regresion_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_regresion_744bda45-c889-4957-a73c-d3ddccdfba7f.png', 'residuales_png_path': 'c:\\Users\\Ivan\\Documents\\IR Consulting\\Valida\\src\\images\\linealidad_residuales_e0157a70-12ce-40fc-911f-f12b22612191.png'}
