<a href="https://colab.research.google.com/github/abhiramsb/DocAssistant/blob/main/OCR_Abhiram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: Use openai's chatgpr and perform OCR on a medical prescription and display the result as a table

!pip install openai pandas

import openai
import pandas as pd
from google.colab import files

# Replace with your actual OpenAI API key
openai.api_key = "YOUR_API_KEY"
image_path = 'https://www.researchgate.net/publication/345830022/figure/fig4/AS:957640029003789@1605330583881/Sample-prescription-used-as-input-to-the-GUI-developed-in-the-present-work.png'
def extract_prescription_info(image_path):
  """
  Extracts information from a medical prescription image using OpenAI's GPT.

  Args:
    image_path: Path to the image file.

  Returns:
    A pandas DataFrame containing the extracted information.
  """
  try:
    with open(image_path, "rb") as image_file:
      response = openai.ChatCompletion.create(
        model="gpt-4-vision-preview",
        messages=[
          {
            "role": "user",
            "content": [
              {
                "type": "text",
                "text": "Extract the information from this medical prescription. Return it as a table with the following columns: 'Drug Name', 'Dosage', 'Frequency', 'Route', 'Refills'"
              },
              {
                "type": "image_url",
                "image_url": {
                  "url": "data:image/jpeg;base64," + base64.b64encode(image_file.read()).decode('utf-8')
                }
              }
            ]
          }
        ],
        max_tokens=500,
        temperature=0
      )

    # Extract the table from the response
    table_text = response.choices[0].message.content

    # Convert the table text to a DataFrame
    df = pd.read_table(pd.compat.StringIO(table_text), sep='\t')
    return df
  except Exception as e:
    print(f"An error occurred: {e}")
    return None

# Upload the image file
uploaded = files.upload()
image_path = list(uploaded.keys())[0]

# Extract information from the image
df = extract_prescription_info(image_path)

# Display the result
if df is not None:
  print(df)
else:
  print("Failed to extract information.")


In [None]:
# prompt: Use gemini instead of openai for above task

import base64
import pandas as pd
from google.colab import files
from langchain.llms import Gemini

!pip install langchain

# Replace with your actual Gemini API key
gemini_api_key = "YOUR_GEMINI_API_KEY"

def extract_prescription_info(image_path):
  """
  Extracts information from a medical prescription image using Gemini.

  Args:
    image_path: Path to the image file.

  Returns:
    A pandas DataFrame containing the extracted information.
  """
  try:
    with open(image_path, "rb") as image_file:
      llm = Gemini(model_name="gemini-pro",
                    temperature=0,
                    max_tokens=500,
                    api_key=gemini_api_key)

      image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
      prompt = f"""Extract the information from this medical prescription. Return it as a table with the following columns: 'Drug Name', 'Dosage', 'Frequency', 'Route', 'Refills'

      Image: data:image/jpeg;base64,{image_base64}"""

      response = llm(prompt)

    # Extract the table from the response
    # Assuming the response contains a tabular format that can be parsed by pd.read_table
    # You might need to adjust this based on the actual response format
    df = pd.read_table(pd.compat.StringIO(response), sep='\t')
    return df
  except Exception as e:
    print(f"An error occurred: {e}")
    return None

# Upload the image file
uploaded = files.upload()
image_path = list(uploaded.keys())[0]

# Extract information from the image
df = extract_prescription_info(image_path)

# Display the result
if df is not None:
  print(df)
else:
  print("Failed to extract information.")
