In [1]:
!pip install rapidocr_onnxruntime



#  Convertor (text ot JSON) using LLM 

In [2]:
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import PromptTemplate
import json

def convert_text_to_json_with_llm(input_text):
    """
    Converts any input text into a structured JSON format.
    
    Parameters:
    - input_text (str): Any text input to be converted to JSON
    
    Returns:
    - str: A JSON string containing structured data
    """
    # Create a generic prompt template for JSON conversion
    template = """
    Convert the following text into a proper JSON format.
    Extract all relevant information as key-value pairs.
    Return ONLY a valid JSON object, nothing else.
    
    Text to convert:
    {text}
    
    Rules:
    1. Structure all information into logical key-value pairs
    2. Group related information together
    3. Use arrays for multiple related items
    4. Maintain proper JSON syntax
    5. Return only the JSON object with no additional text or explanation
    """
    
    prompt = PromptTemplate(
        template=template,
        input_variables=["text"]
    )
    
    # Initialize the LLM
    llm = OllamaLLM(model="llama3.1:latest")
    
    # Generate the formatted prompt
    formatted_prompt = prompt.format(text=input_text)
    
    # Get response from LLM
    response = llm(formatted_prompt)
    
    try:
        # Clean the response
        cleaned_response = response.strip()
        # Find JSON boundaries
        start_idx = cleaned_response.find('{')
        end_idx = cleaned_response.rfind('}')
        
        if start_idx != -1 and end_idx != -1:
            cleaned_response = cleaned_response[start_idx:end_idx + 1]
            
            # Parse and validate JSON
            parsed_json = json.loads(cleaned_response)
            
            # Return formatted JSON string
            return json.dumps(parsed_json, indent=2, ensure_ascii=False)
        else:
            raise ValueError("No valid JSON object found in response")
            
    except Exception as e:
        raise Exception(f"Failed to parse LLM output into JSON: {str(e)}")


## Rapid OCR

In [8]:
from rapidocr_onnxruntime import RapidOCR

engine = RapidOCR()

img_path = '1.png'
result_RapidOCR, elapse = engine(img_path)
print(result_RapidOCR)
print(elapse)

[[[[427.0, 32.0], [551.0, 31.0], [551.0, 47.0], [427.0, 48.0]], 'Fortis Hospitals Limited', 0.928359697262446], [[[64.0, 47.0], [170.0, 51.0], [169.0, 88.0], [63.0, 84.0]], 'Fortis', 0.9813376267751058], [[[426.0, 45.0], [564.0, 40.0], [564.0, 59.0], [427.0, 64.0]], '154/9.Baeagof3', 0.7397212577717645], [[[428.0, 55.0], [547.0, 55.0], [547.0, 71.0], [428.0, 71.0]], 'Opp.M-B.Bengaluru-560076', 0.878893873343865], [[[429.0, 67.0], [558.0, 67.0], [558.0, 80.0], [429.0, 81.0]], 'Tel.:+91-80-66214444,2254 4444', 0.9282006283601125], [[[428.0, 78.0], [517.0, 78.0], [517.0, 91.0], [428.0, 91.0]], 'Fax: +91-80-6621 4242.', 0.9088793694972992], [[[429.0, 89.0], [540.0, 89.0], [540.0, 102.0], [429.0, 102.0]], 'care.bng@fortishealthcare.com', 0.929301203324877], [[[429.0, 99.0], [559.0, 99.0], [559.0, 112.0], [429.0, 112.0]], 'CIN No.U93000DL2009PLC222166', 0.954205005296639], [[[241.0, 143.0], [399.0, 143.0], [399.0, 156.0], [241.0, 156.0]], 'DEPARTMENTOFCARDIOLOGY', 0.9958724189888347], [[[78.

In [10]:
json_output_rapidOCR = convert_text_to_json_with_llm(result_RapidOCR)
json_output_rapidOCR = json.loads(json_output_rapidOCR)
print(json_output_rapidOCR)

{'Patient Information': {'Name': '', 'Age': '', 'Sex': '', 'Disease': []}, 'Medical History': {'Allergies': [{'Type': '', 'Description': ''}], 'Diabetes': [], 'ITP': [], 'Other Conditions': []}, 'Medications': {'Prescribed Medications': [], 'Over-the-Counter Medications': []}, 'Physical Examination': {'Vital Signs': [{'Parameter': '', 'Value': ''}], 'Body Mass Index (BMI)': '', 'Blood Pressure': ''}, 'Medical Tests and Results': {'Lab Tests': [], 'Imaging Studies': [], 'Other Tests': []}, 'Discharge Summary': {'Date of Discharge': '', 'Reason for Discharge': '', 'Follow-up Instructions': []}}


In [11]:
result_RapidOCR

[[[[427.0, 32.0], [551.0, 31.0], [551.0, 47.0], [427.0, 48.0]],
  'Fortis Hospitals Limited',
  0.928359697262446],
 [[[64.0, 47.0], [170.0, 51.0], [169.0, 88.0], [63.0, 84.0]],
  'Fortis',
  0.9813376267751058],
 [[[426.0, 45.0], [564.0, 40.0], [564.0, 59.0], [427.0, 64.0]],
  '154/9.Baeagof3',
  0.7397212577717645],
 [[[428.0, 55.0], [547.0, 55.0], [547.0, 71.0], [428.0, 71.0]],
  'Opp.M-B.Bengaluru-560076',
  0.878893873343865],
 [[[429.0, 67.0], [558.0, 67.0], [558.0, 80.0], [429.0, 81.0]],
  'Tel.:+91-80-66214444,2254 4444',
  0.9282006283601125],
 [[[428.0, 78.0], [517.0, 78.0], [517.0, 91.0], [428.0, 91.0]],
  'Fax: +91-80-6621 4242.',
  0.9088793694972992],
 [[[429.0, 89.0], [540.0, 89.0], [540.0, 102.0], [429.0, 102.0]],
  'care.bng@fortishealthcare.com',
  0.929301203324877],
 [[[429.0, 99.0], [559.0, 99.0], [559.0, 112.0], [429.0, 112.0]],
  'CIN No.U93000DL2009PLC222166',
  0.954205005296639],
 [[[241.0, 143.0], [399.0, 143.0], [399.0, 156.0], [241.0, 156.0]],
  'DEPARTMENT

In [12]:
json_output_rapidOCR

{'Patient Information': {'Name': '', 'Age': '', 'Sex': '', 'Disease': []},
 'Medical History': {'Allergies': [{'Type': '', 'Description': ''}],
  'Diabetes': [],
  'ITP': [],
  'Other Conditions': []},
 'Medications': {'Prescribed Medications': [],
  'Over-the-Counter Medications': []},
 'Physical Examination': {'Vital Signs': [{'Parameter': '', 'Value': ''}],
  'Body Mass Index (BMI)': '',
  'Blood Pressure': ''},
 'Medical Tests and Results': {'Lab Tests': [],
  'Imaging Studies': [],
  'Other Tests': []},
 'Discharge Summary': {'Date of Discharge': '',
  'Reason for Discharge': '',
  'Follow-up Instructions': []}}

#### RapidOCR not giving good results 

In [20]:
# https://noedgeai.com/

##  work with API 