In [None]:
# Install Tesseract OCR
!apt-get install -y tesseract-ocr
!pip install pytesseract opencv-python textblob numpy matplotlib



In [None]:
import cv2
import pytesseract
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob


In [None]:
pip install easyocr
pip install torch torchvision torchaudio
pip install fuzzywuzzy

In [None]:
import cv2
import pytesseract
import numpy as np
import re
from fuzzywuzzy import process
from PIL import Image
import spacy
from google.colab.patches import cv2_imshow

# Load a pre-trained medical Named Entity Recognition (NER) model
try:
    nlp = spacy.load("en_core_med7_lg")  # A medical NER model (you may need to install this separately)
except:
    nlp = spacy.load("en_core_web_sm")  # Fallback to a general model

# Local comprehensive drug list
drug_list = ["Aspirin", "Paracetamol", "Azithromycin", "Ibuprofen", "Metformin", "Amoxicillin", "Oflozest OZ", "Azenac-MR", "Andial", "Zofer", "Phenytoin Sodium"]

def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Increase contrast
    image = cv2.convertScaleAbs(image, alpha=2.0, beta=50)
    
    # Apply Gaussian Blur to reduce noise
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Apply OTSU thresholding
    _, processed_img = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Morphological operations to remove small noise
    kernel = np.ones((2,2), np.uint8)
    processed_img = cv2.morphologyEx(processed_img, cv2.MORPH_CLOSE, kernel)
    
    # Show preprocessed image for debugging
    cv2_imshow(processed_img)
    
    return processed_img

def extract_text(image_path):
    processed_img = preprocess_image(image_path)
    custom_config = r'--oem 1 --psm 4'  # OCR Engine Mode 1 (Neural Network), Page Segmentation Mode 4 (Column of text)
    extracted_text = pytesseract.image_to_string(processed_img, config=custom_config)
    print("Extracted Text:", extracted_text)  # Debugging step
    return extracted_text

def identify_drug(extracted_text):
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', extracted_text)
    words = cleaned_text.split()
    identified_drugs = []
    for word in words:
        match, score = process.extractOne(word, drug_list)
        print(f"Word: {word}, Match: {match}, Score: {score}")  # Debugging step
        if score > 75:  # Lower threshold for better recall
            identified_drugs.append(match)
    
    # Use Named Entity Recognition (NER) to refine drug identification
    doc = nlp(extracted_text)
    ner_drugs = [ent.text for ent in doc.ents if ent.label_ == "DRUG"]
    
    # Filter out non-relevant terms from NER extraction
    filtered_ner_drugs = [drug for drug in ner_drugs if process.extractOne(drug, drug_list)[1] > 75]
    
    # Merge fuzzy-matched drugs with refined NER-detected drugs
    final_drugs = list(set(identified_drugs + filtered_ner_drugs))
    return final_drugs

# Example Usage
image_path = "/content/9.jpg"
extracted_text = extract_text(image_path)
identified_drugs = identify_drug(extracted_text)
print("Extracted Drugs:", identified_drugs)
