<a href="https://colab.research.google.com/github/War-rack/PillBot/blob/main/Model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio opencv-python numpy torch torchvision transformers paddleocr nltk symspellpy matplotlib albumentations paddlepaddle pillow

import gradio as gr
import cv2
import numpy as np
import torch
import re
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from paddleocr import PaddleOCR
import nltk
from nltk.corpus import words
from symspellpy import SymSpell
import pkg_resources

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize components
nltk.download("words")
english_words = set(words.words())

# Custom medical dictionary and spell checker setup
sym_spell = SymSpell(max_dictionary_edit_distance=2)
sym_spell.load_dictionary(pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt"),
                         term_index=0, count_index=1)

MEDICAL_TERMS = {
    'moxigram', 'trehalube', 'fluocinolone', 'acetonide', 'neomycin',
    'beclomethasone', 'clotrimazole', 'miconazole', 'clobetasol', 'propionate',
    'cetirizine', 'tobetsol', 'lx', 'eye', 'drop', 'drops', 'ointment', 'cream'
}

# Add medical terms with high priority
for term in MEDICAL_TERMS:
    sym_spell.create_dictionary_entry(term, 1000000)

# Load OCR models
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(device)
trocr_model.config.decoder_start_token_id = trocr_processor.tokenizer.cls_token_id
trocr_model.config.pad_token_id = trocr_processor.tokenizer.pad_token_id

paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en")

# OCR processing functions
def extract_text_vit(image):
    pixel_values = trocr_processor(image, return_tensors="pt").pixel_values.to(device)
    with torch.no_grad():
        generated_ids = trocr_model.generate(pixel_values)
    return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

def extract_text_paddleocr(image):
    results = paddle_ocr.ocr(np.array(image), cls=True)
    if not results or not results[0]:
        return ""
    return " ".join([res[1][0] for res in results[0] if res[1][1] > 0.5])

def extract_medicines(text):
    medicine_patterns = [
        r'\b(Moxigram\s+LX\s+Eye\s+Drop|Trehalube\s+Eye\s+Drop)\b',
        r'\b(Fluocinolone\s+Acetonide|Neomycin|Beclomethasone|Clotrimazole|' +
        r'Miconazole|Clobetasol\s+Propionate|Cetirizine)\b'
    ]
    found_meds = set()
    for pattern in medicine_patterns:
        matches = re.findall(pattern, text, flags=re.IGNORECASE)
        found_meds.update(matches)
    return sorted(found_meds)

def identify_medicines(image):
    vit_text = extract_text_vit(image)
    paddle_text = extract_text_paddleocr(image)
    combined_text = f"{vit_text} {paddle_text}"
    medicines = extract_medicines(combined_text)
    return "\n".join(medicines) if medicines else "No medicines identified."

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Medicine Identification using OCR")

    with gr.Tab("Upload Image"):
        image_input = gr.Image(type="pil")
        output_text = gr.Textbox(label="Identified Medicines")
        submit_btn = gr.Button("Process Image")
        submit_btn.click(identify_medicines, inputs=image_input, outputs=output_text)

    with gr.Tab("Medicine Reminder Scheduler"):
        gr.Markdown("[Go to Medicine Reminder Scheduler](http://localhost:3001)")  # Replace '#' with actual link

demo.launch(share=True)




[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
Config of the encoder: <class 'transformers.models.vit.modeling_vit.ViTModel'> is overwritten by shared encoder config: ViTConfig {
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 384,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": false,
  "transformers_version": "4.48.3"
}

Config of the decoder: <class 'transformers.models.trocr.modeling_trocr.TrOCRForCausalLM'> is overwritten by shared decoder config: TrOCRConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_cross_attention": true,
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.0,
  "cross_attenti

[2025/03/07 05:27:02] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_c

