In [1]:
import pytesseract
from PIL import Image

def extract_text_from_image(image_path):
    return pytesseract.image_to_string(Image.open(image_path), lang='ara')


In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

tokenizer_helsinki = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-tc-big-ar-en")
model_helsinki = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-tc-big-ar-en")
model_helsinki.to(device)

def translate_helsinki(text):
    inputs = tokenizer_helsinki(text, return_tensors="pt", truncation=True).to(device)
    translated = model_helsinki.generate(**inputs)
    return tokenizer_helsinki.decode(translated[0], skip_special_tokens=True)




In [4]:
# from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer_allam = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview")
# model_allam = AutoModelForCausalLM.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview")
# model_allam.to(device)

# def translate_allam(text):
#     prompt = f"Translate the following Arabic text to English:\n\n{text}\n\nEnglish:"
#     inputs = tokenizer_allam(prompt, return_tensors="pt", truncation=True).to(device)
#     output = model_allam.generate(**inputs, max_new_tokens=128)
#     return tokenizer_allam.decode(output[0], skip_special_tokens=True)


In [5]:
# from googletrans import Translator

# translator_google = Translator()

# def translate_google(text):
#     return translator_google.translate(text, src='ar', dest='en').text


In [6]:
import dotenv

dotenv.load_dotenv()
import os

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [7]:
import google.generativeai as genai

genai.configure(api_key=GEMINI_API_KEY)

def translate_gemini(text):
    model = genai.GenerativeModel('gemini-pro')
    prompt = f"Translate the following Arabic text to English:\n\n{text}"
    response = model.generate_content(prompt)
    return response.text.strip()


In [8]:
from nltk.translate.bleu_score import sentence_bleu
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def bleu_score(candidate, reference):
    return sentence_bleu([reference.split()], candidate.split())

def cosine_score(candidate, reference):
    vect = TfidfVectorizer().fit_transform([candidate, reference])
    return cosine_similarity(vect[0:1], vect[1:2])[0][0]


In [9]:
def full_pipeline(image_path):
    arabic_text = extract_text_from_image(image_path)

    trans1 = translate_helsinki(arabic_text)
    # trans2 = translate_allam(arabic_text)
    # trans3 = translate_google(arabic_text)
    trans4 = translate_gemini(arabic_text)

    results = {
        "Helsinki": {
            "translation": trans1,
            "BLEU": bleu_score(trans1, trans4),
            "Cosine": cosine_score(trans1, trans4)
        },
        # "ALLaM": {
        #     "translation": trans2,
        #     "BLEU": bleu_score(trans2, trans4),
        #     "Cosine": cosine_score(trans2, trans4)
        # },
        # "Google Translate": {
        #     "translation": trans3,
        #     "BLEU": bleu_score(trans3, trans4),
        #     "Cosine": cosine_score(trans3, trans4)
        # },
        "Gemini (Reference)": {
            "translation": trans4
        }
    }

    return results


In [11]:
from pprint import pprint
result = full_pipeline(r"data\pic_8.jpeg")


TesseractError: (1, 'Error opening data file C:\\Program Files\\Tesseract-OCR/tessdata/ara.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'ara\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')

In [2]:
import google.generativeai as genai
import os
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# Initialize Gemini with correct model name and safety settings
genai.configure(api_key=os.getenv("GEMINI_KEY"))

try:
    # List available models to debug
    print("Available Gemini Models:")
    for m in genai.list_models():
        if 'gemini' in m.name:
            print(f"- {m.name}")
    
    # Use the most likely correct model name
    gemini_model = genai.GenerativeModel('gemini-pro')
    
    # Verify model can generate content
    test_response = gemini_model.generate_content("Test")
    print("✓ Gemini model verified working")
    
except Exception as e:
    print(f"Gemini initialization error: {e}")
    gemini_model = None

Available Gemini Models:
Gemini initialization error: 
  No API_KEY or ADC found. Please either:
    - Set the `GOOGLE_API_KEY` environment variable.
    - Manually pass the key with `genai.configure(api_key=my_api_key)`.
    - Or set up Application Default Credentials, see https://ai.google.dev/gemini-api/docs/oauth for more information.


In [3]:
import pytesseract
from PIL import Image

# Path to the Tesseract executable (if not in your system's PATH)
# On Windows, it might be: pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Path to your image
image_path = 'hindi\WhatsApp Image 2025-06-23 at 15.46.28.jpeg'

# Open the image using Pillow
try:
    image = Image.open(image_path)

    # Use pytesseract to do OCR on the image, specifying the Hindi language
    text = pytesseract.image_to_string(image, lang='hin')

    # Print the extracted text
    print("Detected Text:")
    print(text)

except FileNotFoundError:
    print(f"Error: The file '{image_path}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Detected Text:
शा लक
_ कटिमिकर्नि किसके जिकेचिय
वीन्टदेस्टकब्लि दिकि फ्र््रत्किन्दाब्य उडर्टा |

2
2 2 स््य
शु्
2 2 प्ि

स्िन्युत टरक मकर सैला भी ऋनटिडिस


